Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • calibration/pycalibration
1 result
Show changes
Commits on Source (121)
Showing
with 1545 additions and 1029 deletions
%% Cell type:markdown id: tags:
# AGIPD Characterize Dark Images #
Author: European XFEL Detector Group, Version: 2.0
The following code analyzes a set of dark images taken with the AGIPD detector to deduce detector offsets , noise, bad-pixel maps and thresholding. All four types of constants are evaluated per-pixel and per-memory cell. Data for the detector's three gain stages needs to be present, separated into separate runs.
The evaluated calibration constants are stored locally and injected in the calibration data base.
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/d/raw/CALLAB/202031/p900113" # path to input data, required
out_folder = "" # path to output to, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
modules = [-1] # list of modules to evaluate, RANGE ALLOWED
run_high = 9985 # run number in which high gain data was recorded, required
run_med = 9984 # run number in which medium gain data was recorded, required
run_low = 9983 # run number in which low gain data was recorded, required
operation_mode = "ADAPTIVE_GAIN" # Detector operation mode, optional (defaults to "ADAPTIVE_GAIN")
karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators
receiver_template = "{}CH0" # inset for receiver devices
instrument_source_template = '{}/DET/{}:xtdf' # path in the HDF5 file to images
ctrl_source_template = '{}/MDL/FPGA_COMP' # path to control information
karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device '
use_dir_creation_date = True # use dir creation date as data production reference date
cal_db_interface = "tcp://max-exfl016:8020" # the database interface to use
cal_db_timeout = 3000000 # timeout on caldb requests"
local_output = True # output constants locally
db_output = False # output constants to database
mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
bias_voltage = 0 # bias voltage, set to 0 to use stored value in slow data.
gain_setting = -1 # the gain setting, use -1 to use value stored in slow data.
gain_mode = -1 # gain mode, use -1 to use value stored in slow data.
integration_time = -1 # integration time, negative values for auto-detection.
acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
interlaced = False # assume interlaced data format, for data prior to Dec. 2017
thresholds_offset_sigma = 3. # offset sigma thresholds for offset deduced bad pixels
thresholds_offset_hard = [0, 0] # For setting the same threshold offset for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
thresholds_offset_hard_hg = [3000, 7000] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
thresholds_offset_hard_mg = [6000, 10000] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
thresholds_offset_hard_lg = [6000, 10000] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
thresholds_offset_hard_hg_fixed = [3500, 6500] # Same as thresholds_offset_hard_hg, but for fixed gain operation
thresholds_offset_hard_mg_fixed = [3500, 6500] # Same as thresholds_offset_hard_mg, but for fixed gain operation
thresholds_offset_hard_lg_fixed = [3500, 6500] # Same as thresholds_offset_hard_lg, but for fixed gain operation
thresholds_noise_sigma = 5. # noise sigma thresholds for offset deduced bad pixels
thresholds_noise_hard = [0, 0] # For setting the same threshold noise for the 3 gains. Left for backcompatability. Default [0, 0] to take the following parameters.
thresholds_noise_hard_hg = [4, 20] # High-gain thresholds in absolute ADU terms for offset deduced bad pixels
thresholds_noise_hard_mg = [4, 20] # Medium-gain thresholds in absolute ADU terms for offset deduced bad pixels
thresholds_noise_hard_lg = [4, 20] # Low-gain thresholds in absolute ADU terms for offset deduced bad pixels
thresholds_gain_sigma = 5. # Gain separation sigma threshold
max_trains = 550 # Maximum number of trains to use for processing dark. Set to 0 to process all available trains. 550 added for ~500GB nodes to temporarely avoid memory issues.
min_trains = 1 # Miniumum number of trains for processing dark. If run folder has less than minimum trains, processing is stopped.
high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. ~7mins extra time for 64 memory cells
# This is used if modules is not specified:
def find_modules(in_folder, run_high, modules):
if (modules is not None) and modules != [-1]:
return modules
from pathlib import Path
import re
modules = set()
for file in Path(in_folder, f'r{run_high:04d}').iterdir():
m = re.search(r'-AGIPD(\d{2})-', file.name)
if m:
modules.add(int(m.group(1)))
return sorted(modules)
```
%% Cell type:code id: tags:
``` python
import itertools
import multiprocessing
import os
from collections import OrderedDict
from datetime import timedelta
from pathlib import Path
from typing import List, Tuple
import dateutil.parser
import matplotlib
import numpy as np
import pasha as psh
import tabulate
import yaml
from IPython.display import Latex, Markdown, display
from extra_data import RunDirectory
matplotlib.use('agg')
import iCalibrationDB
import matplotlib.pyplot as plt
from cal_tools.agipdlib import AgipdCtrl
from cal_tools.enums import AgipdGainMode, BadPixels
from cal_tools.plotting import (
create_constant_overview,
plot_badpix_3d,
show_overview,
show_processed_modules,
)
from cal_tools.tools import (
get_dir_creation_date,
get_from_db,
get_pdu_from_db,
get_random_db_interface,
get_report,
map_gain_stages,
module_index_to_qm,
run_prop_seq_from_path,
save_const_to_h5,
send_to_db,
)
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
# insert control device if format string (does nothing otherwise)
ctrl_src = ctrl_source_template.format(karabo_id_control)
runs_dict = OrderedDict()
run_numbers = [run_high, run_med, run_low]
for gain_idx, (run_name, run_number) in enumerate(zip(["high", "med", "low"], run_numbers)):
runs_dict[run_name] = {
"number": run_number,
"gain": gain_idx,
"dc": RunDirectory(f'{in_folder}/r{run_number:04d}/')
}
creation_time=None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run_high)
print(f"Using {creation_time} as creation time of constant.")
run, prop, seq = run_prop_seq_from_path(in_folder)
# Read report path and create file location tuple to add with the injection
file_loc = f"proposal:{prop} runs:{run_low} {run_med} {run_high}"
report = get_report(metadata_folder)
cal_db_interface = get_random_db_interface(cal_db_interface)
print(f'Calibration database interface: {cal_db_interface}')
instrument = karabo_id.split("_")[0]
if instrument == "SPB":
dinstance = "AGIPD1M1"
nmods = 16
elif instrument == "MID":
dinstance = "AGIPD1M2"
nmods = 16
elif instrument == "HED":
dinstance = "AGIPD500K"
nmods = 8
instrument_src = instrument_source_template.format(karabo_id, receiver_template)
def create_karabo_da_list(modules):
return(["AGIPD{:02d}".format(i) for i in modules])
if karabo_da[0] == '-1':
if modules[0] == -1:
modules = list(range(nmods))
karabo_da = create_karabo_da_list(modules)
else:
modules = [int(x[-2:]) for x in karabo_da]
print(f"Detector in use is {karabo_id}")
print(f"Instrument {instrument}")
print(f"Detector instance {dinstance}")
```
%% Cell type:code id: tags:
``` python
# Create out_folder if it doesn't exist.
Path(out_folder).mkdir(parents=True, exist_ok=True)
mod_image_size = []
for run_dict in runs_dict.values():
missing_modules = [] # modules with no images within a run.
n_trains_list = [] # list of the number of trains for each module within a run.
# This is important in case of no slurm parallelization over modules is done.
# (e.g. running notebook interactively)
for m in modules:
# validate that there are trains for the selected modules and run.
dc = run_dict["dc"].select(
instrument_src.format(m), "*", require_all=True)
n_trains = len(dc.train_ids)
if n_trains == 0:
print(f"WARNING: No images for module AGIPD{m:02d}, run {run_dict['number']}.")
missing_modules.append(m)
# Raise a warning if the module has less trains than expected.
elif n_trains < min_trains:
print(f"WARNING: AGIPD{m:02d}, run {run_dict['number']} "
f"has trains less than minimum trains: {min_trains}.")
else:
print(f"Processing {max_trains if max_trains < n_trains else n_trains} "
f"for AGIPD{m:02d}, run {run_dict['number']} ")
n_trains_list.append(n_trains)
mod_image_size.append(np.product(dc[instrument_src.format(m), "image.data"].shape) * 2 / 1e9)
if max(n_trains_list) == 0:
raise ValueError(f"No images to process for run: {run_dict['number']}")
elif max(n_trains_list) < min_trains:
raise ValueError(f"{run_dict['number']} has less than minimum trains: {min_trains}")
# Update modules and karabo_da lists based on available modules to processes.
modules = [m for m in modules if m not in missing_modules]
karabo_da = create_karabo_da_list(modules)
print(f"Will process data of ({sum(mod_image_size):.02f} GB).")
```
%% Cell type:markdown id: tags:
## Read and validate the runs control data.
%% Cell type:code id: tags:
``` python
def read_run_conditions(runs_dict: dict):
agipd_cond = AgipdCtrl(
run_dc=runs_dict["dc"],
image_src=instrument_src_mod,
ctrl_src=ctrl_src,
)
cond_dict["runs"].append(runs_dict["number"])
if acq_rate == 0:
cond_dict["acq_rate"].append(agipd_cond.get_acq_rate())
if mem_cells == 0:
cond_dict["mem_cells"].append(agipd_cond.get_num_cells())
if gain_setting == -1:
cond_dict["gain_setting"].append(
agipd_cond.get_gain_setting(creation_time))
if bias_voltage == 0.:
cond_dict["bias_voltage"].append(
agipd_cond.get_bias_voltage(karabo_id_control))
if integration_time == -1:
cond_dict["integration_time"].append(
agipd_cond.get_integration_time())
if gain_mode == -1:
cond_dict["gain_mode"].append(agipd_cond.get_gain_mode())
else:
cond_dict["gain_mode"].append(AgipdGainMode(gain_mode))
```
%% Cell type:code id: tags:
``` python
def validate_gain_modes(gain_modes: List[AgipdGainMode]):
# Validate that gain modes are not a mix of adaptive and fixed gain.
if all(
gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
):
fixed_gain_mode = False
# Some runs are adaptive by mistake.
elif any(
gm == AgipdGainMode.ADAPTIVE_GAIN for gm in gain_modes
):
raise ValueError(
f"ERROR: Given runs {run_numbers}"
" have a mix of ADAPTIVE and FIXED gain modes: "
f"{gain_modes}."
)
elif list(gain_modes) == [
AgipdGainMode.FIXED_HIGH_GAIN,
AgipdGainMode.FIXED_MEDIUM_GAIN,
AgipdGainMode.FIXED_LOW_GAIN
]:
fixed_gain_mode = True
else:
raise ValueError(
"ERROR: Wrong arrangment of given dark runs. "
f"Given runs' gain_modes are {gain_modes} for runs: {run_numbers}."
)
return fixed_gain_mode
```
%% Cell type:code id: tags:
``` python
# Read slow data from 1st channel only.
# Read all modules in one notebook and validate the conditions across detectors?
# Currently slurm jobs run per one module.
# TODO: what if first module is not available. Maybe only channel 2 available
instrument_src_mod = instrument_src.format(modules[0])
cond_dict = dict()
fixed_gain_mode = None
with multiprocessing.Manager() as manager:
cond_dict["runs"] = manager.list()
cond_dict["acq_rate"] = manager.list()
cond_dict["mem_cells"] = manager.list()
cond_dict["gain_setting"] = manager.list()
cond_dict["gain_mode"] = manager.list()
cond_dict["bias_voltage"] = manager.list()
cond_dict["integration_time"] = manager.list()
with multiprocessing.Pool(processes=len(modules)) as pool:
pool.starmap(read_run_conditions, zip(runs_dict.values()))
for cond, vlist in cond_dict.items():
if cond == "runs":
continue
elif cond == "gain_mode":
fixed_gain_mode = validate_gain_modes(cond_dict["gain_mode"])
elif not all(x == vlist[0] for x in vlist):
# TODO: raise ERROR??
print(
f"WARNING: {cond} is not the same for the runs "
f"{cond_dict['runs']} with values"
f" of {cond_dict[cond]}, respectively."
)
if cond_dict["acq_rate"]: acq_rate = cond_dict["acq_rate"][0]
if cond_dict["mem_cells"]: mem_cells = cond_dict["mem_cells"][0]
if cond_dict["gain_setting"]: gain_setting = cond_dict["gain_setting"][0]
if cond_dict["gain_mode"]: gain_mode = list(cond_dict["gain_mode"])
if cond_dict["bias_voltage"]: bias_voltage = cond_dict["bias_voltage"][0]
if cond_dict["integration_time"]: integration_time = cond_dict["integration_time"][0]
```
%% Cell type:code id: tags:
``` python
# Determine the gain operation mode based on the gain_mode stored in control h5file.
if operation_mode not in ("ADAPTIVE_GAIN", "FIXED_GAIN"):
print(f"WARNING: unknown operation_mode \"{operation_mode}\" parameter set")
if fixed_gain_mode and operation_mode == "ADAPTIVE_GAIN":
print(
"WARNING: Operation_mode parameter is ADAPTIVE_GAIN, but"
"slow data indicates FIXED_GAIN. Processing fixed gain constants.")
elif not fixed_gain_mode and operation_mode == "FIXED_GAIN":
print(
"WARNING: Operation_mode parameter is FIXED_GAIN, "
"slow data indicates ADAPTIVE_GAIN. Processing adaptive gain constants.")
```
%% Cell type:code id: tags:
``` python
print("Parameters are:")
print(f"Proposal: {prop}")
print(f"Acquisition rate: {acq_rate}")
print(f"Memory cells: {mem_cells}")
print(f"Runs: {run_numbers}")
print(f"Interlaced mode: {interlaced}")
print(f"Using DB: {db_output}")
print(f"Input: {in_folder}")
print(f"Output: {out_folder}")
print(f"Bias voltage: {bias_voltage}V")
print(f"Gain setting: {gain_setting}")
print(f"Integration time: {integration_time}")
print(f"Operation mode is {'fixed' if fixed_gain_mode else 'adaptive'} gain mode")
```
%% Cell type:code id: tags:
``` python
if thresholds_offset_hard != [0, 0]:
# if set, this will override the individual parameters
thresholds_offset_hard = [thresholds_offset_hard] * 3
elif fixed_gain_mode:
thresholds_offset_hard = [
thresholds_offset_hard_hg_fixed,
thresholds_offset_hard_mg_fixed,
thresholds_offset_hard_lg_fixed,
]
else:
thresholds_offset_hard = [
thresholds_offset_hard_hg,
thresholds_offset_hard_mg,
thresholds_offset_hard_lg,
]
print("Will use the following hard offset thresholds")
for name, value in zip(("High", "Medium", "Low"), thresholds_offset_hard):
print(f"- {name} gain: {value}")
if thresholds_noise_hard != [0, 0]:
thresholds_noise_hard = [thresholds_noise_hard] * 3
else:
thresholds_noise_hard = [
thresholds_noise_hard_hg,
thresholds_noise_hard_mg,
thresholds_noise_hard_lg,
]
```
%% Cell type:markdown id: tags:
## Calculate Offsets, Noise and Thresholds ##
The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.
%% Cell type:code id: tags:
``` python
parallel_num_procs = min(6, len(modules)*3)
parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
print(f"Will use {parallel_num_procs} processes with {parallel_num_threads} threads each")
def characterize_module(
channel: int, runs_dict: dict,
) -> Tuple[int, int, np.array, np.array, np.array, np.array, np.array]:
# Select the corresponding module channel.
instrument_src_mod = instrument_src.format(channel)
run_dc = runs_dict["dc"].select(instrument_src_mod, require_all=True)
if max_trains != 0:
run_dc = run_dc.select_trains(np.s_[:max_trains])
gain_index = runs_dict["gain"]
# Read module's image and cellId data.
im = run_dc[instrument_src_mod, "image.data"].ndarray()
cell_ids = np.squeeze(run_dc[instrument_src_mod, "image.cellId"].ndarray())
local_thresholds_offset_hard = thresholds_offset_hard[gain_index]
local_thresholds_noise_hard = thresholds_noise_hard[gain_index]
if interlaced:
if not fixed_gain_mode:
ga = im[1::2, 0, ...]
im = im[0::2, 0, ...].astype(np.float32)
cell_ids = cell_ids[::2]
else:
if not fixed_gain_mode:
ga = im[:, 1, ...]
im = im[:, 0, ...].astype(np.float32)
im = np.transpose(im)
if not fixed_gain_mode:
ga = np.transpose(ga)
context = psh.context.ThreadContext(num_workers=parallel_num_threads)
offset = context.alloc(shape=(im.shape[0], im.shape[1], mem_cells), dtype=np.float64)
noise = context.alloc(like=offset)
if fixed_gain_mode:
gains = None
gains_std = None
else:
gains = context.alloc(like=offset)
gains_std = context.alloc(like=offset)
def process_cell(worker_id, array_index, cell_number):
cell_slice_index = (cell_ids == cell_number)
im_slice = im[..., cell_slice_index]
offset[..., cell_number] = np.median(im_slice, axis=2)
noise[..., cell_number] = np.std(im_slice, axis=2)
if not fixed_gain_mode:
ga_slice = ga[..., cell_slice_index]
gains[..., cell_number] = np.median(ga_slice, axis=2)
gains_std[..., cell_number] = np.std(ga_slice, axis=2)
context.map(process_cell, np.unique(cell_ids))
unique_cell_ids = np.unique(cell_ids)
# We assume cells are accepted starting 0.
if np.any(unique_cell_ids > mem_cells):
raise ValueError(
f"Invalid cells found {unique_cell_ids} "
f"for run: {run_dc.run_metadata()['runNumber']}.")
context.map(process_cell, unique_cell_ids)
# bad pixels
bp = np.zeros_like(offset, dtype=np.uint32)
# offset related bad pixels
offset_mn = np.nanmedian(offset, axis=(0,1))
offset_std = np.nanstd(offset, axis=(0,1))
bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
(offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
bp[(offset < local_thresholds_offset_hard[0]) |
(offset > local_thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD
bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR
# noise related bad pixels
noise_mn = np.nanmedian(noise, axis=(0,1))
noise_std = np.nanstd(noise, axis=(0,1))
bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
(noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD
bp[(noise < local_thresholds_noise_hard[0]) | (noise > local_thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD
bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR
return channel, gain_index, offset, noise, gains, gains_std, bp
```
%% Cell type:code id: tags:
``` python
with multiprocessing.Pool(processes=parallel_num_procs) as pool:
results = pool.starmap(
characterize_module, itertools.product(modules, list(runs_dict.values())))
# mapped values for processing 2 modules example:
# [
# 0, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
# 0, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
# 0, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
# 1, {"gain": 0, "run_number": <run-high>, "dc": <high-dc>},
# 1, {"gain": 1, "run_number": <run-med>, "dc": <med-dc>},
# 1, {"gain": 2, "run_number": <run-low>, "dc": <low-dc>},
# ]
```
%% Cell type:code id: tags:
``` python
offset_g = OrderedDict()
noise_g = OrderedDict()
badpix_g = OrderedDict()
if not fixed_gain_mode:
gain_g = OrderedDict()
gainstd_g = OrderedDict()
for module_index, gain_index, offset, noise, gains, gains_std, bp in results:
qm = module_index_to_qm(module_index)
if qm not in offset_g:
offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2], 3))
noise_g[qm] = np.zeros_like(offset_g[qm])
badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
if not fixed_gain_mode:
gain_g[qm] = np.zeros_like(offset_g[qm])
gainstd_g[qm] = np.zeros_like(offset_g[qm])
offset_g[qm][..., gain_index] = offset
noise_g[qm][..., gain_index] = noise
badpix_g[qm][..., gain_index] = bp
if not fixed_gain_mode:
gain_g[qm][..., gain_index] = gains
gainstd_g[qm][..., gain_index] = gains_std
```
%% Cell type:code id: tags:
``` python
# Add bad pixels due to bad gain separation
if not fixed_gain_mode:
for qm in gain_g.keys():
for g in range(2):
# Bad pixels during bad gain separation.
# Fraction of pixels in the module with separation lower than "thresholds_gain_sigma".
bad_sep = (gain_g[qm][..., g+1] - gain_g[qm][..., g]) / \
np.sqrt(gainstd_g[qm][..., g+1]**2 + gainstd_g[qm][..., g]**2)
badpix_g[qm][...,g+1][bad_sep<thresholds_gain_sigma] |= \
BadPixels.GAIN_THRESHOLDING_ERROR
```
%% Cell type:markdown id: tags:
The thresholds for gain switching are then defined as the mean value between in individual gain bit levels. Note that these thresholds need to be refined with charge induced thresholds, as the two are not the same.
%% Cell type:code id: tags:
``` python
if not fixed_gain_mode:
thresholds_g = {}
for qm in gain_g.keys():
thresholds_g[qm] = np.zeros((gain_g[qm].shape[0], gain_g[qm].shape[1], gain_g[qm].shape[2], 5))
thresholds_g[qm][...,0] = (gain_g[qm][...,1]+gain_g[qm][...,0])/2
thresholds_g[qm][...,1] = (gain_g[qm][...,2]+gain_g[qm][...,1])/2
for i in range(3):
thresholds_g[qm][...,2+i] = gain_g[qm][...,i]
```
%% Cell type:code id: tags:
``` python
res = OrderedDict()
for i in modules:
qm = module_index_to_qm(i)
res[qm] = {
'Offset': offset_g[qm],
'Noise': noise_g[qm],
'BadPixelsDark': badpix_g[qm]
}
if not fixed_gain_mode:
res[qm]['ThresholdsDark'] = thresholds_g[qm]
```
%% Cell type:code id: tags:
``` python
# set the operating condition
# note: iCalibrationDB only adds gain_mode if it is truthy, so we don't need to handle None
condition = iCalibrationDB.Conditions.Dark.AGIPD(
memory_cells=mem_cells,
bias_voltage=bias_voltage,
acquisition_rate=acq_rate,
gain_setting=gain_setting,
gain_mode=fixed_gain_mode,
integration_time=integration_time
)
```
%% Cell type:code id: tags:
``` python
# Create mapping from module(s) (qm) to karabo_da(s) and PDU(s)
qm_dict = OrderedDict()
all_pdus = get_pdu_from_db(
karabo_id,
karabo_da,
constant=iCalibrationDB.CalibrationConstant(),
condition=condition,
cal_db_interface=cal_db_interface,
snapshot_at=creation_time.isoformat() if creation_time else None,
timeout=cal_db_timeout
)
for module_index, module_da, module_pdu in zip(modules, karabo_da, all_pdus):
qm = module_index_to_qm(module_index)
qm_dict[qm] = {
"karabo_da": module_da,
"db_module": module_pdu
}
```
%% Cell type:markdown id: tags:
## Sending calibration constants to the database.
%% Cell type:code id: tags:
``` python
md = None
for qm in res:
db_module = qm_dict[qm]["db_module"]
for const in res[qm]:
dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
dconst.data = res[qm][const]
if db_output:
md = send_to_db(db_module, karabo_id, dconst, condition, file_loc,
report, cal_db_interface, creation_time=creation_time,
timeout=cal_db_timeout)
if local_output:
md = save_const_to_h5(db_module, karabo_id, dconst, condition, dconst.data,
file_loc, report, creation_time, out_folder)
print(f"Calibration constant {const} for {qm} is stored locally in {file_loc}.\n")
print("Constants parameter conditions are:\n")
print(f"• memory_cells: {mem_cells}\n• bias_voltage: {bias_voltage}\n"
f"• acquisition_rate: {acq_rate}\n• gain_setting: {gain_setting}\n"
f"• gain_mode: {fixed_gain_mode}\n• integration_time: {integration_time}\n"
f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
```
%% Cell type:markdown id: tags:
## Retrieving previous calibration constants for comparison.
%% Cell type:code id: tags:
``` python
# Start retrieving existing constants for comparison
qm_x_const = [(qm, const) for const in res[qm] for qm in res]
def retrieve_old_constant(qm, const):
dconst = getattr(iCalibrationDB.Constants.AGIPD, const)()
data, mdata = get_from_db(
karabo_id=karabo_id,
karabo_da=qm_dict[qm]["karabo_da"],
constant=dconst,
condition=condition,
empty_constant=None,
cal_db_interface=cal_db_interface,
creation_time=creation_time-timedelta(seconds=1) if creation_time else None,
strategy="pdu_prior_in_time",
verbosity=1,
timeout=cal_db_timeout
)
if mdata is None or data is None:
timestamp = "Not found"
filepath = None
h5path = None
else:
timestamp = mdata.calibration_constant_version.begin_at.isoformat()
filepath = os.path.join(
mdata.calibration_constant_version.hdf5path,
mdata.calibration_constant_version.filename
)
h5path = mdata.calibration_constant_version.h5path
return data, timestamp, filepath, h5path
old_retrieval_pool = multiprocessing.Pool()
old_retrieval_res = old_retrieval_pool.starmap_async(
retrieve_old_constant, qm_x_const
)
old_retrieval_pool.close()
```
%% Cell type:code id: tags:
``` python
mnames=[]
for i in modules:
qm = module_index_to_qm(i)
mnames.append(qm)
display(Markdown(f'## Position of the module {qm} and its ASICs'))
show_processed_modules(dinstance, constants=None, mnames=mnames, mode="position")
```
%% Cell type:markdown id: tags:
## Single-Cell Overviews ##
Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.
%% Cell type:markdown id: tags:
### High Gain ###
%% Cell type:code id: tags:
``` python
cell = 3
gain = 0
show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
```
%% Cell type:markdown id: tags:
### Medium Gain ###
%% Cell type:code id: tags:
``` python
cell = 3
gain = 1
show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
```
%% Cell type:markdown id: tags:
### Low Gain ###
%% Cell type:code id: tags:
``` python
cell = 3
gain = 2
show_overview(res, cell, gain, infix="{}-{}-{}".format(*run_numbers))
```
%% Cell type:code id: tags:
``` python
if high_res_badpix_3d:
cols = {
BadPixels.NOISE_OUT_OF_THRESHOLD: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
BadPixels.OFFSET_NOISE_EVAL_ERROR: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
BadPixels.OFFSET_OUT_OF_THRESHOLD: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
BadPixels.GAIN_THRESHOLDING_ERROR: (BadPixels.GAIN_THRESHOLDING_ERROR.name, '#FF40FF40'),
BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD: ('OFFSET_OUT_OF_THRESHOLD + NOISE_OUT_OF_THRESHOLD', '#DD00DD80'),
BadPixels.OFFSET_OUT_OF_THRESHOLD | BadPixels.NOISE_OUT_OF_THRESHOLD |
BadPixels.GAIN_THRESHOLDING_ERROR: ('MIXED', '#BFDF009F')
}
display(Markdown("""
## Global Bad Pixel Behaviour ##
The following plots show the results of bad pixel evaluation for all evaluated memory cells.
Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
This excludes single bad pixels present only in disconnected pixels.
Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
Colors encode the bad pixel type, or mixed type.
"""))
gnames = ['High Gain', 'Medium Gain', 'Low Gain']
for gain in range(3):
display(Markdown(f'### {gnames[gain]} ###'))
for mod, data in badpix_g.items():
plot_badpix_3d(data[...,gain], cols, title=mod, rebin_fac=1)
plt.show()
```
%% Cell type:markdown id: tags:
## Aggregate values, and per Cell behaviour ##
The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.
%% Cell type:code id: tags:
``` python
create_constant_overview(offset_g, "Offset (ADU)", mem_cells, 4000, 8000,
badpixels=[badpix_g, np.nan])
```
%% Cell type:code id: tags:
``` python
create_constant_overview(noise_g, "Noise (ADU)", mem_cells, 0, 100,
badpixels=[badpix_g, np.nan])
```
%% Cell type:code id: tags:
``` python
if not fixed_gain_mode:
# Plot only three gain threshold maps.
bp_thresh = OrderedDict()
for mod, con in badpix_g.items():
bp_thresh[mod] = np.zeros((con.shape[0], con.shape[1], con.shape[2], 5), dtype=con.dtype)
bp_thresh[mod][...,:2] = con[...,:2]
bp_thresh[mod][...,2:] = con
create_constant_overview(thresholds_g, "Threshold (ADU)", mem_cells, 4000, 10000, 5,
badpixels=[bp_thresh, np.nan],
gmap=['HG-MG Threshold', 'MG-LG Threshold', 'High gain', 'Medium gain', 'low gain'],
marker=['d','d','','','']
)
```
%% Cell type:code id: tags:
``` python
bad_pixel_aggregate_g = OrderedDict()
for m, d in badpix_g.items():
bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", mem_cells, 0, 0.10, 3)
```
%% Cell type:markdown id: tags:
## Summary tables ##
The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.
%% Cell type:code id: tags:
``` python
# now we need the old constants
old_const = {}
old_mdata = {}
old_retrieval_res.wait()
for (qm, const), (data, timestamp, filepath, h5path) in zip(qm_x_const, old_retrieval_res.get()):
old_const.setdefault(qm, {})[const] = data
old_mdata.setdefault(qm, {})[const] = {
"timestamp": timestamp,
"filepath": filepath,
"h5path": h5path
}
```
%% Cell type:code id: tags:
``` python
display(Markdown("The following pre-existing constants are used for comparison:"))
for qm, consts in old_mdata.items():
display(Markdown(f"- {qm}"))
for const in consts:
display(Markdown(f" - {const} at {consts[const]['timestamp']}"))
# saving locations of old constants for summary notebook
with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
yaml.safe_dump(
{
"module": qm,
"pdu": qm_dict[qm]["db_module"],
"old-constants": old_mdata[qm]
},
fd,
)
```
%% Cell type:code id: tags:
``` python
table = []
gain_names = ['High', 'Medium', 'Low']
bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR, BadPixels.GAIN_THRESHOLDING_ERROR]
for qm in badpix_g.keys():
for gain in range(3):
l_data = []
l_data_old = []
data = np.copy(badpix_g[qm][:,:,:,gain])
datau32 = data.astype(np.uint32)
l_data.append(len(datau32[datau32>0].flatten()))
for bit in bits:
l_data.append(np.count_nonzero(badpix_g[qm][:,:,:,gain] & bit))
if old_const[qm]['BadPixelsDark'] is not None:
dataold = np.copy(old_const[qm]['BadPixelsDark'][:, :, :, gain])
datau32old = dataold.astype(np.uint32)
l_data_old.append(len(datau32old[datau32old>0].flatten()))
for bit in bits:
l_data_old.append(np.count_nonzero(old_const[qm]['BadPixelsDark'][:, :, :, gain] & bit))
l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR', 'GAIN_THRESHOLDING_ERROR']
l_threshold = ['', f'{thresholds_noise_sigma}' f'{thresholds_noise_hard[gain]}',
f'{thresholds_offset_sigma}' f'{thresholds_offset_hard[gain]}',
'', f'{thresholds_gain_sigma}']
for i in range(len(l_data)):
line = [f'{l_data_name[i]}, {gain_names[gain]} gain', l_threshold[i], l_data[i]]
if old_const[qm]['BadPixelsDark'] is not None:
line += [l_data_old[i]]
else:
line += ['-']
table.append(line)
table.append(['', '', '', ''])
display(Markdown('''
### Number of bad pixels
One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.
'''))
if len(table)>0:
md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
headers=["Pixel type", "Threshold",
"New constant", "Old constant"])))
```
%% Cell type:code id: tags:
``` python
header = ['Parameter',
"New constant", "Old constant ",
"New constant", "Old constant ",
"New constant", "Old constant ",
"New constant", "Old constant "]
if fixed_gain_mode:
constants = ['Offset', 'Noise']
else:
constants = ['Offset', 'Noise', 'ThresholdsDark']
constants_x_qms = list(itertools.product(constants, res.keys()))
def compute_table(const, qm):
if const == 'ThresholdsDark':
table = [['','HG-MG threshold', 'HG-MG threshold', 'MG-LG threshold', 'MG-LG threshold']]
else:
table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
compare_with_old_constant = old_const[qm][const] is not None and \
old_const[qm]['BadPixelsDark'] is not None
data = np.copy(res[qm][const])
if const == 'ThresholdsDark':
data[...,0][res[qm]['BadPixelsDark'][...,0]>0] = np.nan
data[...,1][res[qm]['BadPixelsDark'][...,1]>0] = np.nan
else:
data[res[qm]['BadPixelsDark']>0] = np.nan
if compare_with_old_constant:
data_old = np.copy(old_const[qm][const])
if const == 'ThresholdsDark':
data_old[...,0][old_const[qm]['BadPixelsDark'][...,0]>0] = np.nan
data_old[...,1][old_const[qm]['BadPixelsDark'][...,1]>0] = np.nan
else:
data_old[old_const[qm]['BadPixelsDark']>0] = np.nan
f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']
def compute_row(i):
line = [n_list[i]]
for gain in range(3):
# Compare only 3 threshold gain-maps
if gain == 2 and const == 'ThresholdsDark':
continue
stat_measure = f_list[i](data[...,gain])
line.append(f"{stat_measure:6.1f}")
if compare_with_old_constant:
old_stat_measure = f_list[i](data_old[...,gain])
line.append(f"{old_stat_measure:6.1f}")
else:
line.append("-")
return line
with multiprocessing.pool.ThreadPool(processes=multiprocessing.cpu_count() // len(constants_x_qms)) as pool:
rows = pool.map(compute_row, range(len(f_list)))
table.extend(rows)
return table
with multiprocessing.Pool(processes=len(constants_x_qms)) as pool:
tables = pool.starmap(compute_table, constants_x_qms)
for (const, qm), table in zip(constants_x_qms, tables):
display(Markdown(f"### {qm}: {const} [ADU], good pixels only"))
display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
```
......
%% Cell type:markdown id:bed7bd15-21d9-4735-82c1-c27c1a5e3346 tags:
# Gotthard2 Offline Correction #
Author: European XFEL Detector Group, Version: 1.0
Offline Calibration for the Gothard2 Detector
%% Cell type:code id:570322ed-f611-4fd1-b2ec-c12c13d55843 tags:
``` python
in_folder = "/gpfs/exfel/exp/FXE/202221/p003225/raw" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/gotthard2" # the folder to output to, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
run = 50 # run to process, required
sequences = [-1] # sequences to correct, set to [-1] for all, range allowed
sequences_per_node = 1 # number of sequence files per node if notebook executed through xfel-calibrate, set to 0 to not run SLURM parallel
# Parameters used to access raw data.
karabo_id = "FXE_XAD_G2XES" # karabo prefix of Gotthard-II devices
karabo_da = ["GH201"] # data aggregators
receiver_template = "RECEIVER" # receiver template used to read INSTRUMENT keys.
control_template = "CONTROL" # control template used to read CONTROL keys.
instrument_source_template = "{}/DET/{}:daqOutput" # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
ctrl_source_template = "{}/DET/{}" # template for control source name (filled with karabo_id_control)
karabo_id_control = "" # Control karabo ID. Set to empty string to use the karabo-id
# Parameters for calibration database.
cal_db_interface = "tcp://max-exfl016:8016#8025" # the database interface to use.
cal_db_timeout = 180000 # timeout on caldb requests.
creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
# Parameters affecting corrected data.
constants_file = "" # Use constants in given constant file path. /gpfs/exfel/data/scratch/ahmedk/dont_remove/gotthard2/constants/calibration_constants_GH2.h5
offset_correction = True # apply offset correction. This can be disabled to only apply LUT or apply LUT and gain correction for non-linear differential results.
gain_correction = True # apply gain correction.
chunks_data = 1 # HDF chunk size for pixel data in number of frames.
# Parameter conditions.
bias_voltage = -1 # Detector bias voltage, set to -1 to use value in raw file.
exposure_time = -1. # Detector exposure time, set to -1 to use value in raw file.
exposure_period = -1. # Detector exposure period, set to -1 to use value in raw file.
acquisition_rate = -1. # Detector acquisition rate (1.1/4.5), set to -1 to use value in raw file.
single_photon = -1 # Detector single photon mode (High/Low CDS), set to -1 to use value in raw file.
# Parameters for plotting
skip_plots = False # exit after writing corrected files
pulse_idx_preview = 3 # pulse index to preview. The following even/odd pulse index is used for preview. # TODO: update to pulseId preview.
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id:6e9730d8-3908-41d7-abe2-d78e046d5de2 tags:
``` python
import warnings
from logging import warning
import h5py
import pasha as psh
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Markdown, display
from extra_data import RunDirectory, H5File
from pathlib import Path
import cal_tools.restful_config as rest_cfg
from cal_tools.calcat_interface import CalCatError, GOTTHARD2_CalibrationData
from cal_tools.files import DataFile
from cal_tools.gotthard2 import gotthard2algs, gotthard2lib
from cal_tools.step_timing import StepTimer
from cal_tools.tools import (
calcat_creation_time,
CalibrationMetadata,
)
from XFELDetAna.plotting.heatmap import heatmapPlot
warnings.filterwarnings('ignore')
%matplotlib inline
```
%% Cell type:code id:d7c02c48-4429-42ea-a42e-de45366d7fa3 tags:
``` python
in_folder = Path(in_folder)
run_folder = in_folder / f"r{run:04d}"
out_folder = Path(out_folder)
out_folder.mkdir(parents=True, exist_ok=True)
metadata = CalibrationMetadata(metadata_folder or out_folder)
# NOTE: this notebook will not overwrite calibration metadata file
const_yaml = metadata.get("retrieved-constants", {})
if not karabo_id_control:
karabo_id_control = karabo_id
instrument_src = instrument_source_template.format(karabo_id, receiver_template)
ctrl_src = ctrl_source_template.format(karabo_id_control, control_template)
print(f"Process modules: {karabo_da} for run {run}")
# Run's creation time:
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f"Creation time: {creation_time}")
```
%% Cell type:code id:b5eb816e-b5f2-44ce-9907-0273d82341b6 tags:
``` python
# Select only sequence files to process for the selected detector.
if sequences == [-1]:
possible_patterns = list(f"*{mod}*.h5" for mod in karabo_da)
else:
possible_patterns = list(
f"*{mod}-S{s:05d}.h5" for mod in karabo_da for s in sequences
)
run_folder = Path(in_folder / f"r{run:04d}")
seq_files = [
f for f in run_folder.glob("*.h5") if any(f.match(p) for p in possible_patterns)
]
seq_files = sorted(seq_files)
if not seq_files:
raise IndexError("No sequence files available for the selected sequences.")
print(f"Processing a total of {len(seq_files)} sequence files")
```
%% Cell type:code id:f9a8d1eb-ce6a-4ed0-abf4-4a6029734672 tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id:892172d8 tags:
``` python
# Read slow data
run_dc = RunDirectory(run_folder)
g2ctrl = gotthard2lib.Gotthard2Ctrl(run_dc=run_dc, ctrl_src=ctrl_src)
if bias_voltage == -1:
bias_voltage = g2ctrl.get_bias_voltage()
if exposure_time == -1:
exposure_time = g2ctrl.get_exposure_time()
if exposure_period == -1:
exposure_period = g2ctrl.get_exposure_period()
if acquisition_rate == -1:
acquisition_rate = g2ctrl.get_acquisition_rate()
if single_photon == -1:
single_photon = g2ctrl.get_single_photon()
print("Bias Voltage:", bias_voltage)
print("Exposure Time:", exposure_time)
print("Exposure Period:", exposure_period)
print("Acquisition Rate:", acquisition_rate)
print("Single Photon:", single_photon)
```
%% Cell type:markdown id:8c852392-bb19-4c40-b2ce-3b787538a92d tags:
### Retrieving calibration constants
%% Cell type:code id:5717d722 tags:
``` python
da_to_pdu = {}
# Used for old FXE (p003225) runs before adding Gotthard2 to CALCAT
const_data = dict()
g2_cal = GOTTHARD2_CalibrationData(
detector_name=karabo_id,
sensor_bias_voltage=bias_voltage,
exposure_time=exposure_time,
exposure_period=exposure_period,
acquisition_rate=acquisition_rate,
single_photon=single_photon,
event_at=creation_time,
client=rest_cfg.calibration_client(),
)
# Keep as long as it is essential to correct
# RAW data (FXE p003225) before the data mapping was added to CALCAT.
try: # in case local constants are used with old RAW data. This can be removed in the future.
for mod_info in g2_cal.physical_detector_units.values():
da_to_pdu[mod_info["karabo_da"]] = mod_info["physical_name"]
db_modules = [da_to_pdu[da] for da in karabo_da]
except CalCatError as e:
print(e)
db_modules = [None] * len(karabo_da)
if constants_file:
for mod in karabo_da:
const_data[mod] = dict()
# load constants temporarily using defined local paths.
with h5py.File(constants_file, "r") as cfile:
const_data[mod]["LUTGotthard2"] = cfile["LUT"][()]
const_data[mod]["OffsetGotthard2"] = cfile["offset_map"][()].astype(np.float32)
const_data[mod]["RelativeGainGotthard2"] = cfile["gain_map"][()].astype(np.float32)
const_data[mod]["Mask"] = cfile["bpix_ff"][()].astype(np.uint32)
else:
if const_yaml:
const_data = dict()
for mod in karabo_da:
const_data[mod] = dict()
for cname, mdata in const_yaml[mod]["constants"].items():
const_data[mod][cname] = dict()
if mdata["creation-time"]:
with h5py.File(mdata["path"], "r") as cf:
const_data[mod][cname] = np.copy(
cf[f"{mdata['dataset']}/data"])
else:
mdata_dict = {"constants": dict()}
constant_names = ["LUTGotthard2", "OffsetGotthard2", "BadPixelsDarkGotthard2"]
if gain_correction:
constant_names += ["RelativeGainGotthard2", "BadPixelsFFGotthard2"]
# Retrieve metadata for all pnccd constants.
const_data = g2_cal.ndarray_map(constant_names)
# Validate the constants availability and raise/warn correspondingly.
for mod, calibrations in const_data.items():
dark_constants = {"LUTGotthard2"}
if offset_correction:
dark_constants |= {"OffsetGotthard2", "BadPixelsDarkGotthard2"}
missing_dark_constants = dark_constants - set(calibrations)
if missing_dark_constants:
karabo_da.remove(mod)
warning(f"Dark constants {missing_dark_constants} are not available to correct {mod}.") # noqa
missing_gain_constants = {
"BadPixelsFFGotthard2", "RelativeGainGotthard2"} - set(calibrations)
if gain_correction and missing_gain_constants:
warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}."
"Gain correction is disabled for this module")
warning(f"Gain constants {missing_gain_constants} are not retrieved for mod {mod}.")
# Create the mask array.
bpix = const_data[mod].get("BadPixelsDarkGotthard2")
if bpix is None:
bpix = np.zeros((1280, 2, 3), dtype=np.uint32)
if const_data[mod].get("BadPixelsFFGotthard2") is not None:
bpix |= const_data[mod]["BadPixelsFFGotthard2"]
const_data[mod]["Mask"] = bpix
# Prepare empty arrays for missing constants.
if const_data[mod].get("OffsetGotthard2") is None:
const_data[mod]["OffsetGotthard2"] = np.zeros(
(1280, 2, 3), dtype=np.float32)
if const_data[mod].get("RelativeGainGotthard2") is None:
const_data[mod]["RelativeGainGotthard2"] = np.ones(
(1280, 2, 3), dtype=np.float32)
if gain_correction:
gain_correction = False
warning("Gain correction is disabled for this module.")
const_data[mod]["RelativeGainGotthard2"] = const_data[mod]["RelativeGainGotthard2"].astype( # noqa
np.float32, copy=False) # Old gain constants are not float32.
if not karabo_da:
raise ValueError("Dark constants are not available for all modules.")
```
%% Cell type:code id:23fcf7f4-351a-4df7-8829-d8497d94fecc tags:
``` python
context = psh.ProcessContext(num_workers=23)
```
%% Cell type:code id:daecd662-26d2-4cb8-aa70-383a579cf9f9 tags:
``` python
def correct_train(wid, index, d):
g = gain[index]
gotthard2algs.convert_to_10bit(d, const_data[mod]["LUTGotthard2"], data_corr[index, ...])
gotthard2algs.correct_train(
data_corr[index, ...],
mask[index, ...],
g,
const_data[mod]["OffsetGotthard2"],
const_data[mod]["RelativeGainGotthard2"],
const_data[mod]["Mask"],
apply_offset=offset_correction,
apply_gain=gain_correction,
)
```
%% Cell type:code id:f88c1aa6-a735-4b72-adce-b30162f5daea tags:
``` python
for mod in karabo_da:
# This is used in case receiver template consists of
# karabo data aggregator index. e.g. detector at DETLAB
instr_mod_src = instrument_src.format(mod[-2:])
data_path = "INSTRUMENT/" + instr_mod_src + "/data"
for raw_file in seq_files:
step_timer.start()
dc = H5File(raw_file)
out_file = out_folder / raw_file.name.replace("RAW", "CORR")
# Select module INSTRUMENT source and deselect empty trains.
dc = dc.select(instr_mod_src, require_all=True)
data = dc[instr_mod_src, "data.adc"].ndarray()
gain = dc[instr_mod_src, "data.gain"].ndarray()
step_timer.done_step("preparing raw data")
dshape = data.shape
step_timer.start()
# Allocate shared arrays.
data_corr = context.alloc(shape=dshape, dtype=np.float32)
mask = context.alloc(shape=dshape, dtype=np.uint32)
context.map(correct_train, data)
step_timer.done_step("Correcting one sequence file")
step_timer.start()
# Provided PSI gain map has 0 values. Set inf values to nan.
# TODO: This can maybe be removed after creating XFEL gain maps.?
data_corr[np.isinf(data_corr)] = np.nan
# Create CORR files and add corrected data sections.
image_counts = dc[instrument_src, "data.adc"].data_counts(labelled=False)
with DataFile(out_file, "w") as ofile:
# Create INDEX datasets.
ofile.create_index(dc.train_ids, from_file=dc.files[0])
# Create METDATA datasets
ofile.create_metadata(
like=dc,
sequence=dc.run_metadata()["sequenceNumber"],
instrument_channels=(f"{instrument_src}/data",)
)
# Create Instrument section to later add corrected datasets.
outp_source = ofile.create_instrument_source(instrument_src)
# Create count/first datasets at INDEX source.
outp_source.create_index(data=image_counts)
# Store uncorrected trainId in the corrected file.
outp_source.create_key(
f"data.trainId", data=dc.train_ids,
chunks=min(50, len(dc.train_ids))
)
# Create datasets with the available corrected data
for field_name, field_data in {
"adc": data_corr,
"gain": gain,
}.items():
outp_source.create_key(
f"data.{field_name}", data=field_data,
chunks=((chunks_data,) + data_corr.shape[1:])
)
for field in ["bunchId", "memoryCell", "frameNumber", "timestamp"]:
outp_source.create_key(
f"data.{field}", data=dc[instr_mod_src, f"data.{field}"].ndarray(),
chunks=(chunks_data, data_corr.shape[1])
)
outp_source.create_compressed_key(f"data.mask", data=mask)
step_timer.done_step("Storing data")
```
%% Cell type:code id:94b8e4d2-9f8c-4c23-a509-39238dd8435c tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:code id:0ccc7f7e-2a3f-4ac0-b854-7d505410d2fd tags:
``` python
if skip_plots:
print("Skipping plots")
import sys
sys.exit(0)
```
%% Cell type:code id:ff203f77-3811-46f3-bf7d-226d2dcab13f tags:
``` python
mod_dcs = {}
first_seq_raw = seq_files[0]
first_seq_corr = out_folder / first_seq_raw.name.replace("RAW", "CORR")
for mod in karabo_da:
mod_dcs[mod] = {}
with H5File(first_seq_corr) as out_dc:
tid, mod_dcs[mod]["train_corr_data"] = next(
out_dc[instr_mod_src, "data.adc"].trains()
)
with H5File(first_seq_raw) as in_dc:
train_dict = in_dc.train_from_id(tid)[1][instr_mod_src]
mod_dcs[mod]["train_raw_data"] = train_dict["data.adc"]
mod_dcs[mod]["train_raw_gain"] = train_dict["data.gain"]
```
%% Cell type:code id:1b379438-eb1d-42b2-ac83-eb8cf88c46db tags:
``` python
display(Markdown("### Mean RAW and CORRECTED across pulses for one train:"))
display(Markdown(f"Train: {tid}"))
step_timer.start()
for mod, pdu in zip(karabo_da, db_modules):
fig, ax = plt.subplots(figsize=(20, 10))
raw_data = mod_dcs[mod]["train_raw_data"]
im = ax.plot(np.mean(raw_data, axis=0))
ax.set_title(f"RAW module {mod} ({pdu})")
ax.set_xlabel("Strip #", size=20)
ax.set_ylabel("12-bit ADC output", size=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
pass
fig, ax = plt.subplots(figsize=(20, 10))
corr_data = mod_dcs[mod]["train_corr_data"]
im = ax.plot(np.mean(corr_data, axis=0))
ax.set_title(f"CORRECTED module {mod} ({pdu})")
ax.set_xlabel("Strip #", size=20)
ax.set_ylabel("10-bit KeV. output", size=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
pass
step_timer.done_step("Plotting mean data")
```
%% Cell type:code id:58a6a276 tags:
``` python
display(Markdown(f"### RAW and CORRECTED strips across pulses for train {tid}"))
step_timer.start()
for mod, pdu in zip(karabo_da, db_modules):
for plt_data, dname in zip(
["train_raw_data", "train_corr_data"], ["RAW", "CORRECTED"]
):
fig, ax = plt.subplots(figsize=(15, 20))
plt.rcParams.update({"font.size": 20})
heatmapPlot(
mod_dcs[mod][plt_data],
y_label="Pulses",
x_label="Strips",
title=f"{dname} module {mod} ({pdu})",
use_axis=ax,
)
pass
step_timer.done_step("Plotting RAW and CORRECTED data for one train")
```
%% Cell type:code id:cd8f5e08-fcee-4bff-ba63-6452b3d892a2 tags:
``` python
# Validate given "pulse_idx_preview"
if pulse_idx_preview + 1 > data.shape[1]:
print(
f"WARNING: selected pulse_idx_preview {pulse_idx_preview} is not available in data."
" Previewing 1st pulse."
)
pulse_idx_preview = 1
if data.shape[1] == 1:
odd_pulse = 1
even_pulse = None
else:
odd_pulse = pulse_idx_preview if pulse_idx_preview % 2 else pulse_idx_preview + 1
even_pulse = (
pulse_idx_preview if not (pulse_idx_preview % 2) else pulse_idx_preview + 1
)
if pulse_idx_preview + 1 > data.shape[1]:
pulse_idx_preview = 1
if data.shape[1] > 1:
pulse_idx_preview = 2
```
%% Cell type:code id:e5f0d4d8-e32c-4f2c-8469-4ebbfd3f644c tags:
``` python
display(Markdown("### RAW and CORRECTED even/odd pulses for one train:"))
display(Markdown(f"Train: {tid}"))
for mod, pdu in zip(karabo_da, db_modules):
fig, ax = plt.subplots(figsize=(20, 20))
raw_data = mod_dcs[mod]["train_raw_data"]
corr_data = mod_dcs[mod]["train_corr_data"]
ax.plot(raw_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
if even_pulse:
ax.plot(raw_data[even_pulse], label=f"Even Pulse {even_pulse}")
ax.set_title(f"RAW module {mod} ({pdu})")
ax.set_xlabel("Strip #", size=20)
ax.set_ylabel("12-bit ADC RAW", size=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
ax.legend()
pass
fig, ax = plt.subplots(figsize=(20, 20))
ax.plot(corr_data[odd_pulse], label=f"Odd Pulse {odd_pulse}")
if even_pulse:
ax.plot(corr_data[even_pulse], label=f"Even Pulse {even_pulse}")
ax.set_title(f"CORRECTED module {mod} ({pdu})")
ax.set_xlabel("Strip #", size=20)
ax.set_ylabel("10-bit KeV CORRECTED", size=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
ax.legend()
pass
step_timer.done_step("Plotting RAW and CORRECTED odd/even pulses.")
```
......
This diff is collapsed.
%% Cell type:markdown id: tags:
# LPD Offline Correction #
Author: European XFEL Data Analysis Group
%% Cell type:code id: tags:
``` python
# Input parameters
in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/schmidtp/random/LPD_test" # the folder to output to, required
metadata_folder = '' # Directory containing calibration_metadata.yml when run by xfel-calibrate.
sequences = [-1] # Sequences to correct, use [-1] for all
modules = [-1] # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
karabo_da = [''] # Data aggregators names to correct, use [''] for all
run = 10 # run to process, required
# Source parameters
karabo_id = 'FXE_DET_LPD1M-1' # Karabo domain for detector.
input_source = '{karabo_id}/DET/{module_index}CH0:xtdf' # Input fast data source.
output_source = '' # Output fast data source, empty to use same as input.
xgm_source = 'SA1_XTD2_XGM/DOOCS/MAIN'
xgm_pulse_count_key = 'pulseEnergy.numberOfSa1BunchesActual'
# CalCat parameters
creation_time = "" # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
cal_db_interface = '' # Not needed, compatibility with current webservice.
cal_db_timeout = 0 # Not needed, compatbility with current webservice.
cal_db_root = '/gpfs/exfel/d/cal/caldb_store'
cal_db_root = '/gpfs/exfel/d/cal/caldb_store' # The calibration database root path to access constant files. For example accessing constants from the test database.
# Operating conditions
mem_cells = 512 # Memory cells, LPD constants are always taken with 512 cells.
bias_voltage = 250.0 # Detector bias voltage.
capacitor = '5pF' # Capacitor setting: 5pF or 50pF
photon_energy = 9.2 # Photon energy in keV.
category = 0 # Whom to blame.
use_cell_order = False # Whether to use memory cell order as a detector condition (not stored for older constants)
use_cell_order = 'auto' # Whether to use memory cell order as a detector condition; auto/always/never
# Correction parameters
offset_corr = True # Offset correction.
rel_gain = True # Gain correction based on RelativeGain constant.
ff_map = True # Gain correction based on FFMap constant.
gain_amp_map = True # Gain correction based on GainAmpMap constant.
# Output options
ignore_no_frames_no_pulses = False # Whether to run without SA1 pulses AND frames.
overwrite = True # set to True if existing data should be overwritten
chunks_data = 1 # HDF chunk size for pixel data in number of frames.
chunks_ids = 32 # HDF chunk size for cellId and pulseId datasets.
create_virtual_cxi_in = '' # Folder to create virtual CXI files in (for each sequence).
# Parallelization options
sequences_per_node = 1 # Sequence files to process per node
max_nodes = 8 # Maximum number of SLURM jobs to split correction work into
num_workers = 8 # Worker processes per node, 8 is safe on 768G nodes but won't work on 512G.
num_threads_per_worker = 32 # Number of threads per worker.
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da, max_nodes=max_nodes)
```
%% Cell type:code id: tags:
``` python
from collections import OrderedDict
from logging import warning
from pathlib import Path
from time import perf_counter
import gc
import re
import warnings
import numpy as np
import h5py
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
%matplotlib inline
from calibration_client import CalibrationClient
from calibration_client.modules import CalibrationConstantVersion
import extra_data as xd
import extra_geom as xg
import pasha as psh
from extra_data.components import LPD1M
import cal_tools.restful_config as rest_cfg
from cal_tools.calcat_interface import CalCatError, LPD_CalibrationData
from cal_tools.lpdalgs import correct_lpd_frames
from cal_tools.lpdlib import get_mem_cell_order
from cal_tools.tools import CalibrationMetadata, calcat_creation_time
from cal_tools.lpdlib import get_mem_cell_pattern, make_cell_order_condition
from cal_tools.tools import (
CalibrationMetadata,
calcat_creation_time,
write_constants_fragment,
)
from cal_tools.files import DataFile
from cal_tools.restful_config import restful_config
```
%% Cell type:markdown id: tags:
# Prepare environment
%% Cell type:code id: tags:
``` python
file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$') # This should probably move to cal_tools
run_folder = Path(in_folder) / f'r{run:04d}'
out_folder = Path(out_folder)
out_folder.mkdir(exist_ok=True)
output_source = output_source or input_source
cal_db_root = Path(cal_db_root)
metadata = CalibrationMetadata(metadata_folder or out_folder)
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f'Using {creation_time.isoformat()} as creation time')
# Pick all modules/aggregators or those selected.
if not karabo_da or karabo_da == ['']:
if not modules or modules == [-1]:
if karabo_da == ['']:
if modules == [-1]:
modules = list(range(16))
karabo_da = [f'LPD{i:02d}' for i in modules]
else:
modules = [int(x[-2:]) for x in karabo_da]
# Pick all sequences or those selected.
if not sequences or sequences == [-1]:
do_sequence = lambda seq: True
else:
do_sequence = [int(x) for x in sequences].__contains__
# List of detector sources.
det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
if use_cell_order not in {'auto', 'always', 'never'}:
raise ValueError("use_cell_order must be auto/always/never")
```
%% Cell type:markdown id: tags:
# Select data to process
%% Cell type:code id: tags:
``` python
data_to_process = []
for inp_path in run_folder.glob('RAW-*.h5'):
match = file_re.match(inp_path.stem)
if match[2] not in karabo_da or not do_sequence(int(match[3])):
continue
outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
run=int(match[1]), aggregator=match[2], seq=int(match[3]))
data_to_process.append((match[2], inp_path, outp_path))
print('Files to process:')
for data_descr in sorted(data_to_process, key=lambda x: f'{x[0]}{x[1]}'):
print(f'{data_descr[0]}\t{data_descr[1]}')
# Collect the train ID contained in the input LPD files.
inp_lpd_dc = xd.DataCollection.from_paths([x[1] for x in data_to_process])
frame_count = sum([
int(inp_lpd_dc[source, 'image.data'].data_counts(labelled=False).sum())
for source in inp_lpd_dc.all_sources], 0)
if frame_count == 0:
inp_dc = xd.RunDirectory(run_folder) \
.select_trains(xd.by_id[inp_lpd_dc.train_ids])
try:
pulse_count = int(inp_dc[xgm_source, xgm_pulse_count_key].ndarray().sum())
except xd.SourceNameError:
warning(f'Missing XGM source `{xgm_source}`')
pulse_count = None
except xd.PropertyNameError:
warning(f'Missing XGM pulse count key `{xgm_pulse_count_key}`')
pulse_count = None
if pulse_count == 0 and not ignore_no_frames_no_pulses:
warning(f'Affected files contain neither LPD frames nor SA1 pulses '
f'according to {xgm_source}, processing is skipped. If this '
f'incorrect, please contact da-support@xfel.eu')
from sys import exit
exit(0)
elif pulse_count is None:
raise ValueError('Affected files contain no LPD frames and SA1 pulses '
'could not be inferred from XGM data')
else:
raise ValueError('Affected files contain no LPD frames but SA1 pulses')
else:
print(f'Total number of LPD pulses across all modules: {frame_count}')
```
%% Cell type:markdown id: tags:
# Obtain and prepare calibration constants
%% Cell type:code id: tags:
``` python
# Connect to CalCat.
calcat_config = restful_config['calcat']
client = CalibrationClient(
base_api_url=calcat_config['base-api-url'],
use_oauth2=calcat_config['use-oauth2'],
client_id=calcat_config['user-id'],
client_secret=calcat_config['user-secret'],
user_email=calcat_config['user-email'],
token_url=calcat_config['token-url'],
refresh_url=calcat_config['refresh-url'],
auth_url=calcat_config['auth-url'],
scope='')
```
start = perf_counter()
%% Cell type:code id: tags:
cell_ids_pattern_s = None
if use_cell_order != 'never':
# Read the order of memory cells used
raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
cell_ids_pattern_s = make_cell_order_condition(
use_cell_order, get_mem_cell_pattern(raw_data, det_inp_sources)
)
print("Memory cells order:", cell_ids_pattern_s)
lpd_cal = LPD_CalibrationData(
detector_name=karabo_id,
modules=karabo_da,
sensor_bias_voltage=bias_voltage,
memory_cells=mem_cells,
feedback_capacitor=capacitor,
source_energy=photon_energy,
memory_cell_order=cell_ids_pattern_s,
category=category,
event_at=creation_time,
client=rest_cfg.calibration_client(),
caldb_root=Path(cal_db_root),
)
lpd_metadata = lpd_cal.metadata(["Offset", "BadPixelsDark"])
try:
illum_metadata = lpd_cal.metadata(lpd_cal.illuminated_calibrations)
for key, value in illum_metadata.items():
lpd_metadata.setdefault(key, {}).update(value)
except CalCatError as e: # TODO: replace when API errors are improved.
warning(f"CalCatError: {e}")
``` python
metadata = CalibrationMetadata(metadata_folder or out_folder)
# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
const_yaml = metadata.setdefault("retrieved-constants", {})
total_time = perf_counter() - start
print(f'Looking up constants {total_time:.1f}s')
```
%% Cell type:code id: tags:
``` python
const_data = {}
const_load_mp = psh.ProcessContext(num_workers=24)
if const_yaml: # Read constants from YAML file.
start = perf_counter()
for da, ccvs in const_yaml.items():
# Validate the constants availability and raise/warn accordingly.
for mod, calibrations in lpd_metadata.items():
missing_offset = {"Offset"} - set(calibrations)
warn_missing_constants = {
"BadPixelsDark", "BadPixelsFF", "GainAmpMap",
"FFMap", "RelativeGain"} - set(calibrations)
if missing_offset:
warning(f"Offset constant is not available to correct {mod}.")
karabo_da.remove(mod)
if warn_missing_constants:
warning(f"Constants {warn_missing_constants} were not retrieved for {mod}.")
if not karabo_da: # Offsets are missing for all modules.
raise Exception("Could not find offset constants for any modules, will not correct data.")
for calibration_name, ccv in ccvs['constants'].items():
if ccv['file-path'] is None:
warnings.warn(f"Missing {calibration_name} for {da}")
continue
dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
const_data[(da, calibration_name)] = dict(
path=Path(ccv['file-path']),
dataset=ccv['dataset-name'],
data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
)
else: # Retrieve constants from CALCAT.
dark_calibrations = {
1: 'Offset', # np.float32
14: 'BadPixelsDark' # should be np.uint32, but is np.float64
}
base_condition = [
dict(parameter_id=1, value=bias_voltage), # Sensor bias voltage
dict(parameter_id=7, value=mem_cells), # Memory cells
dict(parameter_id=15, value=capacitor), # Feedback capacitor
dict(parameter_id=13, value=256), # Pixels X
dict(parameter_id=14, value=256), # Pixels Y
]
if use_cell_order:
# Read the order of memory cells used
raw_data = xd.DataCollection.from_paths([e[1] for e in data_to_process])
cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
print("Memory cells order:", cell_ids_pattern_s)
dark_condition = base_condition + [
dict(parameter_id=30, value=cell_ids_pattern_s), # Memory cell order
]
else:
dark_condition = base_condition.copy()
illuminated_calibrations = {
20: 'BadPixelsFF', # np.uint32
42: 'GainAmpMap', # np.float32
43: 'FFMap', # np.float32
44: 'RelativeGain' # np.float32
}
illuminated_condition = base_condition + [
dict(parameter_id=3, value=photon_energy), # Source energy
dict(parameter_id=25, value=category) # category
]
print('Querying calibration database', end='', flush=True)
start = perf_counter()
for calibrations, condition in [
(dark_calibrations, dark_condition),
(illuminated_calibrations, illuminated_condition)
]:
resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
client, karabo_id, list(calibrations.keys()),
{'parameters_conditions_attributes': condition},
karabo_da='', event_at=creation_time.isoformat()
)
if not resp['success']:
raise RuntimeError(resp)
for ccv in resp['data']:
cc = ccv['calibration_constant']
da = ccv['physical_detector_unit']['karabo_da']
calibration_name = calibrations[cc['calibration_id']]
dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
const_data[(da, calibration_name)] = dict(
path=Path(ccv['path_to_file']) / ccv['file_name'],
dataset=ccv['data_set_name'],
data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
)
print('.', end='', flush=True)
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
# Remove skipped correction modules from data_to_process
data_to_process = [(mod, in_f, out_f) for mod, in_f, out_f in data_to_process if mod in karabo_da]
```
%% Cell type:code id: tags:
``` python
def load_constant_dataset(wid, index, const_descr):
ccv_entry = const_data[const_descr]
# write constants metadata to fragment YAML
write_constants_fragment(
out_folder=(metadata_folder or out_folder),
det_metadata=lpd_metadata,
caldb_root=lpd_cal.caldb_root,
)
with h5py.File(cal_db_root / ccv_entry['path'], 'r') as fp:
fp[ccv_entry['dataset'] + '/data'].read_direct(ccv_entry['data'])
print('.', end='', flush=True)
print('Loading calibration data', end='', flush=True)
start = perf_counter()
const_load_mp.map(load_constant_dataset, list(const_data.keys()))
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
# Load constants data for all constants
const_data = lpd_cal.ndarray_map(metadata=lpd_metadata)
```
%% Cell type:code id: tags:
``` python
# These are intended in order cell, X, Y, gain
ccv_offsets = {}
ccv_gains = {}
ccv_masks = {}
ccv_shape = (mem_cells, 256, 256, 3)
constant_order = {
'Offset': (2, 1, 0, 3),
'BadPixelsDark': (2, 1, 0, 3),
'RelativeGain': (2, 1, 0, 3),
'RelativeGain': (2, 0, 1, 3),
'FFMap': (2, 0, 1, 3),
'BadPixelsFF': (2, 0, 1, 3),
'GainAmpMap': (2, 0, 1, 3),
}
def prepare_constants(wid, index, aggregator):
consts = {calibration_name: entry['data']
for (aggregator_, calibration_name), entry
in const_data.items()
if aggregator == aggregator_}
consts = const_data.get(aggregator, {})
def _prepare_data(calibration_name, dtype):
# Some old BadPixels constants have <f8 dtype.
# Convert nan to float 0 to avoid having 2147483648 after
# converting float64 to uint32.
if "BadPixels" in calibration_name and consts[calibration_name].dtype != np.uint32:
consts[calibration_name] = np.nan_to_num(
consts[calibration_name], nan=0.0)
return consts[calibration_name] \
.transpose(constant_order[calibration_name]) \
.astype(dtype, copy=True) # Make sure array is contiguous.
if offset_corr and 'Offset' in consts:
ccv_offsets[aggregator] = _prepare_data('Offset', np.float32)
else:
ccv_offsets[aggregator] = np.zeros(ccv_shape, dtype=np.float32)
ccv_gains[aggregator] = np.ones(ccv_shape, dtype=np.float32)
if 'BadPixelsDark' in consts:
ccv_masks[aggregator] = _prepare_data('BadPixelsDark', np.uint32)
else:
ccv_masks[aggregator] = np.zeros(ccv_shape, dtype=np.uint32)
if rel_gain and 'RelativeGain' in consts:
ccv_gains[aggregator] *= _prepare_data('RelativeGain', np.float32)
if ff_map and 'FFMap' in consts:
ccv_gains[aggregator] *= _prepare_data('FFMap', np.float32)
if 'BadPixelsFF' in consts:
np.bitwise_or(ccv_masks[aggregator], _prepare_data('BadPixelsFF', np.uint32),
out=ccv_masks[aggregator])
if gain_amp_map and 'GainAmpMap' in consts:
ccv_gains[aggregator] *= _prepare_data('GainAmpMap', np.float32)
print('.', end='', flush=True)
print('Preparing constants', end='', flush=True)
start = perf_counter()
psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
const_data.clear() # Clear raw constants data now to save memory.
gc.collect();
```
%% Cell type:code id: tags:
``` python
def correct_file(wid, index, work):
aggregator, inp_path, outp_path = work
module_index = int(aggregator[-2:])
start = perf_counter()
dc = xd.H5File(inp_path, inc_suspect_trains=False).select('*', 'image.*', require_all=True)
inp_source = dc[input_source.format(karabo_id=karabo_id, module_index=module_index)]
open_time = perf_counter() - start
# Load raw data for this file.
# Reshaping gets rid of the extra 1-len dimensions without
# mangling the frame axis for an actual frame count of 1.
start = perf_counter()
in_raw = inp_source['image.data'].ndarray().reshape(-1, 256, 256)
in_cell = inp_source['image.cellId'].ndarray().reshape(-1)
in_pulse = inp_source['image.pulseId'].ndarray().reshape(-1)
read_time = perf_counter() - start
# Allocate output arrays.
out_data = np.zeros((in_raw.shape[0], 256, 256), dtype=np.float32)
out_gain = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint8)
out_mask = np.zeros((in_raw.shape[0], 256, 256), dtype=np.uint32)
start = perf_counter()
correct_lpd_frames(in_raw, in_cell,
out_data, out_gain, out_mask,
ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
num_threads=num_threads_per_worker)
correct_time = perf_counter() - start
image_counts = inp_source['image.data'].data_counts(labelled=False)
start = perf_counter()
if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)
with DataFile(outp_path, 'w') as outp_file:
outp_file.create_index(dc.train_ids, from_file=dc.files[0])
outp_file.create_metadata(like=dc, instrument_channels=(f'{outp_source_name}/image',))
outp_source = outp_file.create_instrument_source(outp_source_name)
outp_source.create_index(image=image_counts)
outp_source.create_key('image.cellId', data=in_cell,
chunks=(min(chunks_ids, in_cell.shape[0]),))
outp_source.create_key('image.pulseId', data=in_pulse,
chunks=(min(chunks_ids, in_pulse.shape[0]),))
outp_source.create_key('image.data', data=out_data,
chunks=(min(chunks_data, out_data.shape[0]), 256, 256))
outp_source.create_compressed_key('image.gain', data=out_gain)
outp_source.create_compressed_key('image.mask', data=out_mask)
write_time = perf_counter() - start
total_time = open_time + read_time + correct_time + write_time
frame_rate = in_raw.shape[0] / total_time
print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
in_raw.shape[0], frame_rate))
in_raw = None
in_cell = None
in_pulse = None
out_data = None
out_gain = None
out_mask = None
gc.collect()
print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
start = perf_counter()
psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
total_time = perf_counter() - start
print(f'Total time: {total_time:.1f}s')
```
%% Cell type:markdown id: tags:
# Data preview for first train
%% Cell type:code id: tags:
``` python
geom = xg.LPD_1MGeometry.from_quad_positions(
[(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])
output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]
if not output_paths:
warning('Data preview is skipped as there are no existing output paths')
from sys import exit
exit(0)
dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])
det = LPD1M(dc, detector_name=karabo_id)
data = det.get_array('image.data')
```
%% Cell type:markdown id: tags:
### Intensity histogram across all cells
%% Cell type:code id: tags:
``` python
left_edge_ratio = 0.01
right_edge_ratio = 0.99
fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
values, bins, _ = ax.hist(np.ravel(data.data), bins=2000, range=(-1500, 2000))
def find_nearest_index(array, value):
return (np.abs(array - value)).argmin()
cum_values = np.cumsum(values)
vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]
max_value = values.max()
ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
color='red', ha='center', va='bottom', size='large')
ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
color='red', ha='center', va='bottom', size='large')
ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
color='red', rotation=90, ha='left', va='center', size='x-large')
ax.set_xlim(vmin-(vmax-vmin)*0.1, vmax+(vmax-vmin)*0.1)
ax.set_ylim(0, max_value*1.1)
pass
```
%% Cell type:markdown id: tags:
### First memory cell
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
pass
```
%% Cell type:markdown id: tags:
### Train average
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
pass
```
%% Cell type:markdown id: tags:
### Lowest gain stage per pixel
%% Cell type:code id: tags:
``` python
highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))
fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);
cb = ax.images[0].colorbar
cb.set_ticks([0, 1, 2])
cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
```
%% Cell type:markdown id: tags:
### Create virtual CXI file
%% Cell type:code id: tags:
``` python
if create_virtual_cxi_in:
vcxi_folder = Path(create_virtual_cxi_in.format(
run=run, proposal_folder=str(Path(in_folder).parent)))
vcxi_folder.mkdir(parents=True, exist_ok=True)
def sort_files_by_seq(by_seq, outp_path):
by_seq.setdefault(int(outp_path.stem[-5:]), []).append(outp_path)
return by_seq
from functools import reduce
reduce(sort_files_by_seq, output_paths, output_by_seq := {})
for seq_number, seq_output_paths in output_by_seq.items():
# Create data collection and detector components only for this sequence.
try:
det = LPD1M(xd.DataCollection.from_paths(seq_output_paths), detector_name=karabo_id, min_modules=4)
except ValueError: # Couldn't find enough data for min_modules
continue
det.write_virtual_cxi(vcxi_folder / f'VCXI-LPD-R{run:04d}-S{seq_number:05d}.cxi')
```
......
%% Cell type:markdown id: tags:
# LPD Retrieving Constants Pre-correction #
Author: European XFEL Detector Group, Version: 1.0
The following notebook provides a constants metadata in a YAML file to use while correcting LPD images.
%% Cell type:code id: tags:
``` python
# Input parameters
in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/LPD_test" # the folder to output to, required
metadata_folder = '' # Directory containing calibration_metadata.yml when run by xfel-calibrate.
modules = [-1] # Modules indices to correct, use [-1] for all, only used when karabo_da is empty
karabo_da = [''] # Data aggregators names to correct, use [''] for all
run = 10 # run to process, required
# Source parameters
karabo_id = 'FXE_DET_LPD1M-1' # Karabo domain for detector.
input_source = '{karabo_id}/DET/{module_index}CH0:xtdf' # Input fast data source.
# CalCat parameters
creation_time = "" # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
# Operating conditions
mem_cells = 512 # Memory cells, LPD constants are always taken with 512 cells.
bias_voltage = 250.0 # Detector bias voltage.
capacitor = '5pF' # Capacitor setting: 5pF or 50pF
photon_energy = 9.2 # Photon energy in keV.
category = 0 # Whom to blame.
use_cell_order = False # Whether to use memory cell order as a detector condition (not stored for older constants)
```
%% Cell type:code id: tags:
``` python
from pathlib import Path
from time import perf_counter
import numpy as np
from calibration_client import CalibrationClient
from calibration_client.modules import CalibrationConstantVersion
import extra_data as xd
from cal_tools.lpdlib import get_mem_cell_order
from cal_tools.tools import (
CalibrationMetadata,
calcat_creation_time,
save_constant_metadata,
)
from cal_tools.restful_config import restful_config
```
%% Cell type:code id: tags:
``` python
out_folder = Path(out_folder)
out_folder.mkdir(exist_ok=True)
metadata = CalibrationMetadata(metadata_folder or out_folder)
# Constant paths & timestamps are saved under retrieved-constants in calibration_metadata.yml
retrieved_constants = metadata.setdefault("retrieved-constants", {})
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f'Using {creation_time.isoformat()} as creation time')
# Pick all modules/aggregators or those selected.
if not karabo_da or karabo_da == ['']:
if not modules or modules == [-1]:
modules = list(range(16))
karabo_da = [f'LPD{i:02d}' for i in modules]
# List of detector sources.
det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
```
%% Cell type:code id: tags:
``` python
# Connect to CalCat.
calcat_config = restful_config['calcat']
client = CalibrationClient(
base_api_url=calcat_config['base-api-url'],
use_oauth2=calcat_config['use-oauth2'],
client_id=calcat_config['user-id'],
client_secret=calcat_config['user-secret'],
user_email=calcat_config['user-email'],
token_url=calcat_config['token-url'],
refresh_url=calcat_config['refresh-url'],
auth_url=calcat_config['auth-url'],
scope='')
```
%% Cell type:code id: tags:
``` python
dark_calibrations = {
1: 'Offset',
14: 'BadPixelsDark',
}
base_condition = [
dict(parameter_id=1, value=bias_voltage), # Sensor bias voltage
dict(parameter_id=7, value=mem_cells), # Memory cells
dict(parameter_id=15, value=capacitor), # Feedback capacitor
dict(parameter_id=13, value=256), # Pixels X
dict(parameter_id=14, value=256), # Pixels Y
]
if use_cell_order:
# Read the order of memory cells used
raw_data = xd.RunDirectory(Path(in_folder, f'r{run:04d}'))
cell_ids_pattern_s = get_mem_cell_order(raw_data, det_inp_sources)
print("Memory cells order:", cell_ids_pattern_s)
dark_condition = base_condition + [
dict(parameter_id=30, value=cell_ids_pattern_s), # Memory cell order
]
else:
dark_condition = base_condition.copy()
illuminated_calibrations = {
20: 'BadPixelsFF',
42: 'GainAmpMap',
43: 'FFMap',
44: 'RelativeGain',
}
illuminated_condition = base_condition + [
dict(parameter_id=3, value=photon_energy), # Source energy
dict(parameter_id=25, value=category) # category
]
```
%% Cell type:code id: tags:
``` python
const_data = {}
print('Querying calibration database', end='', flush=True)
start = perf_counter()
for k_da in karabo_da:
pdu = None
retrieved_constants[k_da] = dict()
const_mdata = retrieved_constants[k_da]["constants"] = dict()
for calibrations, condition in [
(dark_calibrations, dark_condition),
(illuminated_calibrations, illuminated_condition)
]:
resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
client, karabo_id, list(calibrations.keys()),
{'parameters_conditions_attributes': condition},
karabo_da=k_da, event_at=creation_time.isoformat())
if not resp["success"]:
print(f"ERROR: Constants {list(calibrations.values())} "
f"were not retrieved, {resp['app_info']}")
for cname in calibrations.values():
const_mdata[cname] = dict()
const_mdata[cname]["file-path"] = None
const_mdata[cname]["dataset-name"] = None
const_mdata[cname]["creation-time"] = None
continue
for ccv in resp["data"]:
cc = ccv['calibration_constant']
cname = calibrations[cc['calibration_id']]
const_mdata[cname] = dict()
const_mdata[cname]["file-path"] = str(Path(ccv['path_to_file']) / ccv['file_name'])
const_mdata[cname]["dataset-name"] = ccv['data_set_name']
const_mdata[cname]["creation-time"] = ccv['begin_at']
pdu = ccv['physical_detector_unit']['physical_name']
print('.', end='', flush=True)
retrieved_constants[k_da]["physical-detector-unit"] = pdu
metadata.save()
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
print(f"Stored retrieved constants in {metadata.filename}")
```
%% Cell type:markdown id: tags:
# LPD Mini Offset, Noise and Dead Pixels Characterization #
Author: M. Karnevskiy, S. Hauf
This notebook performs re-characterize of dark images to derive offset, noise and bad-pixel maps. All three types of constants are evaluated per-pixel and per-memory cell.
The notebook will correctly handle veto settings, but note that if you veto cells you will not be able to use these offsets for runs with different veto settings - vetoed cells will have zero offset.
The evaluated calibration constants are stored locally and injected in the calibration data base.
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/FXE/202231/p900316/raw/" # path to input data, required
out_folder = "/gpfs/exfel/data/scratch/kluyvert/darks-lpdmini" # path to output to, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
run_high = 10 # run number in which high gain data was recorded, required
run_med = 11 # run number in which medium gain data was recorded, required
run_low = 12 # run number in which low gain data was recorded, required
karabo_id = "FXE_DET_LPD_MINI" # karabo karabo_id
karabo_da = [''] # a list of data aggregators names with module number, e.g. 'LPDMINI00/2', Default [''] for selecting all data aggregators
source_name = "{}/DET/0CH0:xtdf" # Source name for raw detector data
control_source_name = "{}/FPGA/FEM" # Source name for control device
creation_time = "" # Override the timestamp taken from the run (format '2023-03-30 15:04:31')
cal_db_interface = "tcp://max-exfl016:8015#8025" # the database interface to use
cal_db_timeout = 300000 # timeout on caldb requests
local_output = True # output constants locally
db_output = False # output constants to database
capacitor_setting = 5 # capacitor_setting for which data was taken
bias_voltage_0 = -1 # bias voltage for minis 1, 3, 5, 7; Setting -1 will read the value from files
bias_voltage_1 = -1 # bias voltage for minis 2, 4, 6, 8; Setting -1 will read the value from files
thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset
thresholds_offset_hard = [400, 1500] # bad pixel hard threshold
thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise
thresholds_noise_hard = [1, 35] # bad pixel hard threshold
skip_first_ntrains = 10 # Number of first trains to skip
ntrains = 500 # number of trains to use
min_trains = 370 # minimum number of trains required for each gain stage
min_trains = 370 # minimum number of trains needed for each gain stage
high_res_badpix_3d = False # plot bad-pixel summary in high resolution
test_for_normality = False # permorm normality test
inject_cell_order = False # Include memory cell order as part of the detector condition
operation_mode = "" # This in given as a default input argument through the webservice in production. Don't remove!
```
%% Cell type:code id: tags:
``` python
import multiprocessing
import os
import warnings
warnings.filterwarnings('ignore')
import matplotlib
import pasha as psh
import scipy.stats
from IPython.display import Latex, Markdown, display
matplotlib.use("agg")
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tabulate
import yaml
from extra_data import RunDirectory
from iCalibrationDB import Conditions, Constants
from XFELDetAna.plotting.heatmap import heatmapPlot
from XFELDetAna.plotting.simpleplot import simplePlot
from cal_tools.calcat_interface import CalCatApi
from cal_tools.enums import BadPixels
from cal_tools.plotting import plot_badpix_3d
from cal_tools.restful_config import calibration_client
from cal_tools.tools import (
calcat_creation_time,
get_from_db,
get_report,
run_prop_seq_from_path,
save_const_to_h5,
send_to_db,
)
```
%% Cell type:code id: tags:
``` python
mem_cells = 512
gain_names = ['High', 'Medium', 'Low']
const_shape = (mem_cells, 32, 256, 3) # cells, slow_scan, fast_scan, gain
gain_runs = {}
if capacitor_setting == 5:
gain_runs["high_5pf"] = run_high
gain_runs["med_5pf"] = run_med
gain_runs["low_5pf"] = run_low
elif capacitor_setting == 50:
gain_runs["high_50pf"] = run_high
gain_runs["med_50pf"] = run_med
gain_runs["low_50pf"] = run_low
capacitor_settings = [capacitor_setting]
capacitor_settings = ['{}pf'.format(c) for c in capacitor_settings]
creation_time = calcat_creation_time(in_folder, run_high, creation_time)
print(f"Using {creation_time} as creation time")
source_name = source_name.format(karabo_id)
control_source_name = control_source_name.format(karabo_id)
if -1 in {bias_voltage_0, bias_voltage_1}:
run_data = RunDirectory(os.path.join(in_folder, f"r{run_high:04d}"))
if bias_voltage_0 == -1:
bias_voltage_0 = run_data[control_source_name, 'sensorBiasVoltage0'].as_single_value(atol=5.)
if bias_voltage_1 == -1:
bias_voltage_1 = run_data[control_source_name, 'sensorBiasVoltage1'].as_single_value(atol=5.)
run, prop, seq = run_prop_seq_from_path(in_folder)
display(Markdown('## Evaluated parameters'))
print(f'CalDB Interface {cal_db_interface}')
print(f"Proposal: {prop}")
print(f"Memory cells: {mem_cells}")
print(f"Runs: {run_high}, {run_med}, {run_low}")
print(f"Using DB: {db_output}")
print(f"Input: {in_folder}")
print(f"Output: {out_folder}")
print(f"Bias voltage: {bias_voltage_0}V & {bias_voltage_1}V")
```
%% Cell type:code id: tags:
``` python
calcat = CalCatApi(client=calibration_client())
detector_id = calcat.detector(karabo_id)['id']
pdus_by_da = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
pdu_name_by_da = {da: p['physical_name'] for (da, p) in pdus_by_da.items()}
if karabo_da and karabo_da != ['']:
karabo_da = [da for da in karabo_da if da in pdu_name_by_da]
pdu_name_by_da = {da: pdu_name_by_da[da] for da in karabo_da}
else:
karabo_da = sorted(pdu_name_by_da.keys())
modules = [int(x.split('/')[-1]) for x in karabo_da]
```
%% Cell type:code id: tags:
``` python
print("Minis in use (1-8) and PDUs:")
for mod_num, karabo_da_m in zip(modules, karabo_da):
print(f"Mini {mod_num} -> {pdu_name_by_da[karabo_da_m]}")
```
%% Cell type:markdown id: tags:
## Data processing
%% Cell type:code id: tags:
``` python
parallel_num_procs = min(6, len(modules)*3)
parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs
# the actual characterization
def characterize_detector(run_path, gg):
run = RunDirectory(run_path, parallelize=False)
det_source = source_name.format(karabo_id)
data = run[det_source, 'image.data'].drop_empty_trains()
data = data[skip_first_ntrains : skip_first_ntrains + ntrains]
cell_ids = run[det_source, 'image.cellId'].drop_empty_trains()
cell_ids = cell_ids[skip_first_ntrains : skip_first_ntrains + ntrains]
if len(data.train_ids) < min_trains:
raise Exception(f"Run {run_path} only contains {len(data.train_ids)} trains, but {min_ntrains} required")
im = data.ndarray()
if im.ndim > 3:
im = im[:, 0] # Drop extra dimension
cellid = cell_ids.ndarray()
cellid_pattern = cell_ids[0].ndarray()
if cellid.ndim > 1:
cellid = cellid[:, 0]
cellid_pattern = cellid_pattern[:, 0]
# Mask off gain bits, leaving only data
im &= 0b0000111111111111
im = im.astype(np.float32)
context = psh.context.ThreadContext(num_workers=parallel_num_threads)
# Results here should have shape (512, [<= 256], 256)
offset = context.alloc(shape=(mem_cells,) + im.shape[1:], dtype=np.float64)
noise = context.alloc(like=offset)
normal_test = context.alloc(like=offset)
def process_cell(worker_id, array_index, cc):
idx = cellid == cc
im_slice = im[idx]
if np.any(idx):
offset[cc] = np.median(im_slice, axis=0)
noise[cc] = np.std(im_slice, axis=0)
if test_for_normality:
_, normal_test[cc] = scipy.stats.normaltest(im_slice, axis=0)
context.map(process_cell, np.unique(cellid))
# bad pixels
bp = np.zeros(offset.shape, np.uint32)
# offset related bad pixels
offset_mn = np.nanmedian(offset, axis=(1, 2)).reshape(-1, 1, 1) # add some axes to make broadcasting work
offset_std = np.nanstd(offset, axis=(1, 2)).reshape(-1, 1, 1)
bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
(offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
bp[(offset < thresholds_offset_hard[0]) | (
offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
# noise related bad pixels
noise_mn = np.nanmedian(noise, axis=(1, 2)).reshape(-1, 1, 1) # add some axes to make broadcasting work
noise_std = np.nanstd(noise, axis=(1, 2)).reshape(-1, 1, 1)
bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
(noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
bp[(noise < thresholds_noise_hard[0]) | (
noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
idx = (cellid == cellid[0])
return offset, noise, gg, bp, im[idx, 12::32, 12], normal_test, cellid_pattern
```
%% Cell type:code id: tags:
``` python
def modno_to_slice(m):
return slice((m-1) * 32, m * 32)
```
%% Cell type:code id: tags:
``` python
offset_consts = {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
noise_consts = {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
badpix_consts = {m: np.zeros(const_shape, dtype=np.uint32) for m in modules}
normal_tests = {m: np.zeros(const_shape, dtype=np.float64) for m in modules}
data_samples = {m: np.full((ntrains, 3), np.nan) for m in modules} # pixel (12, 12) in first frame of each train
# Should be the same cell order for all modules & all gain stages
cellid_pattern_prev = None
gg = 0
inp = []
for gain_i, (gain, run_num) in enumerate(gain_runs.items()):
run_path = os.path.join(in_folder, f"r{run_num:04d}")
print("Process run: ", run_path)
inp.append((run_path, gain_i))
with multiprocessing.Pool(processes=parallel_num_procs) as pool:
results = pool.starmap(characterize_detector, inp)
for ir, r in enumerate(results):
offset, noise, gg, bp, data, normal, cellid_pattern = r
if cellid_pattern_prev is not None and not np.array_equal(cellid_pattern, cellid_pattern_prev):
raise ValueError("Inconsistent cell ID pattern between gain stages")
cellid_pattern_prev = cellid_pattern
# Split results up by module
for m in modules:
mod_slice = modno_to_slice(m)
offset_consts[m][..., gg] = offset[:, mod_slice]
noise_consts[m][..., gg] = noise[:, mod_slice]
badpix_consts[m][..., gg] = bp[:, mod_slice]
data_samples[m][..., gg] = data[:, m - 1]
data_samples[m][..., gg][:data.shape[0], ...] = data[:, m - 1]
normal_tests[m][..., gg] = normal[:, mod_slice]
print(f"{gain_names[gg]} gain. "
f"Number of processed trains per cell: {data.shape[0]}.")
```
%% Cell type:code id: tags:
``` python
# Read report path and create file location tuple to add with the injection
proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
file_loc = 'proposal:{} runs:{} {} {}'.format(proposal, run_low, run_med, run_high)
report = get_report(metadata_folder)
```
%% Cell type:code id: tags:
``` python
# Retrieve existing constants for comparison
clist = ["Offset", "Noise", "BadPixelsDark"]
old_const = {}
old_mdata = {}
print('Retrieve pre-existing constants for comparison.')
cap = capacitor_settings[0]
for mod_num, karabo_da_m in zip(modules, karabo_da):
db_module = pdu_name_by_da[karabo_da_m]
old_const[mod_num] = {}
old_mdata[mod_num] = {}
if inject_cell_order:
mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
else:
mem_cell_order = None
# mod_num is from 1 to 8, so b_v_0 applies to odd numbers
bias_voltage = bias_voltage_0 if (mod_num % 2 == 1) else bias_voltage_1
condition = Conditions.Dark.LPD(
memory_cells=mem_cells,
pixels_x=256,
pixels_y=32,
bias_voltage=bias_voltage,
capacitor=cap,
memory_cell_order=mem_cell_order,
)
for const in clist:
constant = getattr(Constants.LPD, const)()
data, mdata = get_from_db(karabo_id, karabo_da_m,
constant,
condition, None,
cal_db_interface,
creation_time=creation_time,
verbosity=2, timeout=cal_db_timeout)
old_const[mod_num][const] = data
if mdata is None or data is None:
old_mdata[mod_num][const] = {
"timestamp": "Not found",
"filepath": None,
"h5path": None
}
else:
old_mdata[mod_num][const] = {
"timestamp": mdata.calibration_constant_version.begin_at.isoformat(),
"filepath": os.path.join(
mdata.calibration_constant_version.hdf5path,
mdata.calibration_constant_version.filename
),
"h5path": mdata.calibration_constant_version.h5path,
}
with open(f"{out_folder}/module_metadata_{mod_num}.yml","w") as fd:
yaml.safe_dump(
{
"module": mod_num,
"pdu": db_module,
"old-constants": old_mdata[mod_num]
}, fd)
```
%% Cell type:code id: tags:
``` python
const_types = {
'Offset': offset_consts,
'Noise': noise_consts,
'BadPixelsDark': badpix_consts,
}
```
%% Cell type:code id: tags:
``` python
# Save constants in the calibration DB
md = None
cap = capacitor_settings[0]
for mod_num, karabo_da_m in zip(modules, karabo_da):
db_module = pdu_name_by_da[karabo_da_m]
print(f"Storing constants for PDU {db_module}")
if inject_cell_order:
mem_cell_order = ",".join([str(c) for c in cellid_pattern]) + ","
else:
mem_cell_order = None
# Do not store empty constants
# In case of 0 trains data_g is initiated with nans and never refilled.
if np.count_nonzero(~np.isnan(data_samples[mod_num]))==0:
print(f"Constant ({cap}, {mod_num}) would be empty, skipping saving")
continue
for const_name, const_dict in const_types.items():
dconst = getattr(Constants.LPD, const_name)()
dconst.data = const_dict[mod_num]
# mod_num is from 1 to 8, so b_v_0 applies to odd numbers
bias_voltage = bias_voltage_0 if (mod_num % 2 == 1) else bias_voltage_1
# set the operating condition
condition = Conditions.Dark.LPD(
memory_cells=mem_cells,
pixels_x=256,
pixels_y=32,
bias_voltage=bias_voltage,
capacitor=cap,
memory_cell_order=mem_cell_order,
)
if db_output:
md = send_to_db(db_module, karabo_id, dconst, condition,
file_loc, report_path=report,
cal_db_interface=cal_db_interface,
creation_time=creation_time,
timeout=cal_db_timeout)
if local_output:
md = save_const_to_h5(db_module, karabo_id, dconst, condition,
dconst.data, file_loc, report, creation_time, out_folder)
print(f"Calibration constant {const_name} is stored locally.\n")
print("Constants parameter conditions are:\n")
print(f"• memory_cells: {mem_cells}\n"
f"• bias_voltage: {bias_voltage}\n"
f"• capacitor: {cap}\n"
f"• memory cell order: {mem_cell_order}\n"
f"• creation_time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
```
%% Cell type:markdown id: tags:
## Raw pedestal distribution ##
Distribution of a pedestal (ADUs) over trains for the pixel (12,12), memory cell 12. A median of the distribution is shown in yellow. A standard deviation is shown in red. The green line shows average over all pixels for a given memory cell and gain stage.
%% Cell type:code id: tags:
``` python
for i in modules:
fig, grid = plt.subplots(3, 1, sharex="col", sharey="row", figsize=(10, 7))
fig.suptitle(f"Module {i}")
fig.subplots_adjust(wspace=0, hspace=0)
if np.count_nonzero(~np.isnan(data_samples[i])) == 0:
break
for gain in range(3):
data = data_samples[i][:, gain]
offset = np.nanmedian(data)
noise = np.nanstd(data)
xrange = [np.nanmin(data_samples[i]), np.nanmax(data_samples[i])]
if xrange[1] == xrange[0]:
xrange = [0, xrange[0]+xrange[0]//2]
nbins = data_samples[i].shape[0]
else:
nbins = int(xrange[1] - xrange[0])
hn, cn = np.histogram(data, bins=nbins, range=xrange)
grid[gain].hist(data, range=xrange, bins=nbins)
grid[gain].plot([offset-noise, offset-noise], [0, np.nanmax(hn)],
linewidth=1.5, color='red',
label='1 $\sigma$ deviation')
grid[gain].plot([offset+noise, offset+noise],
[0, np.nanmax(hn)], linewidth=1.5, color='red')
grid[gain].plot([offset, offset], [0, 0],
linewidth=1.5, color='y', label='median')
grid[gain].plot([np.nanmedian(offset_consts[i][:, :, 12, gain]),
np.nanmedian(offset_consts[i][:, :, 12, gain])],
[0, np.nanmax(hn)], linewidth=1.5, color='green',
label='average over pixels')
grid[gain].set_xlim(xrange)
grid[gain].set_ylim(0, np.nanmax(hn)*1.1)
grid[gain].set_xlabel("Offset value [ADU]")
grid[gain].set_ylabel("# of occurance")
if gain == 0:
leg = grid[gain].legend(
loc='upper center', ncol=3,
bbox_to_anchor=(0.1, 0.25, 0.7, 1.0))
grid[gain].text(xrange[0], np.nanmax(hn)*0.4,
"{} gain".format(gain_names[gain]), fontsize=20)
a = plt.axes([.125, .1, 0.775, .8], frame_on=False)
a.patch.set_alpha(0.05)
a.set_xlim(xrange)
plt.plot([offset, offset], [0, 1], linewidth=1.5, color='y')
plt.xticks([])
plt.yticks([])
ypos = 0.9
x1pos = (np.nanmedian(data_samples[i][:, 0]) +
np.nanmedian(data_samples[i][:, 2]))/2.
x2pos = (np.nanmedian(data_samples[i][:, 2]) +
np.nanmedian(data_samples[i][:, 1]))/2.-10
plt.annotate("", xy=(np.nanmedian(data_samples[i][:, 0]), ypos), xycoords='data',
xytext=(np.nanmedian(data_samples[i][:, 2]), ypos), textcoords='data',
arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))
plt.annotate('{}'.format(np.nanmedian(data_samples[i][:, 0])-np.nanmedian(data_samples[i][:, 2])),
xy=(x1pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')
plt.annotate("", xy=(np.nanmedian(data_samples[i][:, 2]), ypos), xycoords='data',
xytext=(np.nanmedian(data_samples[i][:, 1]), ypos), textcoords='data',
arrowprops=dict(arrowstyle="<->", connectionstyle="arc3"))
plt.annotate('{}'.format(np.nanmedian(data_samples[i][:, 2])-np.nanmedian(data_samples[i][:, 1])),
xy=(x2pos, ypos), xycoords='data', xytext=(5, 5), textcoords='offset points')
plt.show()
```
%% Cell type:code id: tags:
``` python
if not test_for_normality:
print('Normality test was not requested. Flag `test_for_normality` False')
else:
for i in modules:
data = np.copy(normal_tests[i])
data[badpix_consts[i] > 0] = 1.01
hn,cn = np.histogram(data[:,:,:,0], bins=100)
d = [{'x': np.arange(100)*0.01+0.01,
'y': np.histogram(data[:,:,:,0], bins=100)[0],
'drawstyle': 'steps-pre',
'label' : 'High gain',
},
{'x': np.arange(100)*0.01+0.01,
'y': np.histogram(data[:,:,:,1], bins=100)[0],
'drawstyle': 'steps-pre',
'label' : 'Medium gain',
},
{'x': np.arange(100)*0.01+0.01,
'y': np.histogram(data[:,:,:,2], bins=100)[0],
'drawstyle': 'steps-pre',
'label' : 'Low gain',
},
]
fig = plt.figure(figsize=(15,15), tight_layout={'pad': 0.5, 'w_pad': 0.3})
for gain in range(3):
ax = fig.add_subplot(2, 2, 1+gain)
heatmapPlot(data[:,:,12,gain], add_panels=False, cmap='viridis', figsize=(10,10),
y_label='Rows', x_label='Columns',
lut_label='p-Value',
use_axis=ax,
title='p-Value for cell 12, {} gain'.format(gain_names[gain]) )
ax = fig.add_subplot(2, 2, 4)
_ = simplePlot(d, #aspect=1.6,
x_label = "p-Value".format(gain),
y_label="# of occurance",
use_axis=ax,
y_log=False, legend='outside-top-ncol3-frame', legend_pad=0.05, legend_size='5%')
ax.ticklabel_format(style='sci', axis='y', scilimits=(4,6))
```
%% Cell type:raw id: tags:
.. raw:: latex
\newpage
%% Cell type:markdown id: tags:
## Single-Cell Overviews ##
Single cell overviews allow to identify potential effects on all memory cells, e.g. on a sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.
%% Cell type:code id: tags:
``` python
for gain in range(3):
display(
Markdown('### Cell-12 overview - {} gain'.format(gain_names[gain])))
fig = plt.figure(figsize=(18, 22) , tight_layout={'pad': 0.1, 'w_pad': 0.1})
for i in modules:
for iconst, const in enumerate(['Offset', 'Noise', 'BadPixelsDark']):
ax = fig.add_subplot(3, 2, 1+iconst)
data = const_types[const][i][12, :, :, gain]
vmax = 1.5 * np.nanmedian(data)
title = const
label = f'{const} value [ADU]'
title = f'{const} value'
if const == 'BadPixelsDark':
vmax = 4
bpix_code = data.astype(np.float32)
bpix_code[bpix_code == 0] = np.nan
title = 'Bad pixel code'
label = title
cb_labels = ['1 {}'.format(BadPixels.NOISE_OUT_OF_THRESHOLD.name),
'2 {}'.format(BadPixels.OFFSET_NOISE_EVAL_ERROR.name),
'3 {}'.format(BadPixels.OFFSET_OUT_OF_THRESHOLD.name),
'4 {}'.format('MIXED')]
heatmapPlot(bpix_code, add_panels=False, cmap='viridis',
y_label='Rows', x_label='Columns',
lut_label='', vmax=vmax, aspect=1.,
use_axis=ax, cb_ticklabels=cb_labels, cb_ticks = np.arange(4)+1, cb_loc='bottom',
title=title)
del bpix_code
else:
heatmapPlot(data, add_panels=False, cmap='viridis',
y_label='Rows', x_label='Columns',
lut_label=label, vmax=vmax, aspect=1.,
use_axis=ax, cb_loc='bottom',
title=title)
for i in modules:
for iconst, const in enumerate(['Offset', 'Noise']):
data = const_types[const][i]
dataBP = np.copy(data)
dataBP[badpix_consts[i] > 0] = -1
x_ranges = [[0, 1500], [0, 40]]
hn, cn = np.histogram(
data[:, :, :, gain], bins=100, range=x_ranges[iconst])
hnBP, cnBP = np.histogram(dataBP[:, :, :, gain], bins=cn)
d = [{'x': cn[:-1],
'y': hn,
'drawstyle': 'steps-pre',
'label': 'All data',
},
{'x': cnBP[:-1],
'y': hnBP,
'drawstyle': 'steps-pre',
'label': 'Bad pixels masked',
},
]
ax = fig.add_subplot(3, 2, 5+iconst)
_ = simplePlot(d, figsize=(5, 7), aspect=1,
x_label=f"{const} value [ADU]",
y_label="# of occurence",
title='', legend_pad=0.1, legend_size='10%',
use_axis=ax,
y_log=True, legend='outside-top-2col-frame')
plt.show()
```
%% Cell type:raw id: tags:
.. raw:: latex
\newpage
%% Cell type:markdown id: tags:
%% Cell type:code id: tags:
``` python
cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}
if high_res_badpix_3d:
display(Markdown("""
## Global Bad Pixel Behaviour ##
The following plots shows the results of a bad pixel evaluation for all evaluated memory cells.
Cells are stacked in the Z-dimension, while pixels values in x/y are re-binned with a factor of 2.
This excludes single bad pixels present only in disconnected pixels.
Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
Colors encode the bad pixel type, or mixed type.
"""))
# Switch rebin to 1 for full resolution and
# no interpolation for badpixel values.
rebin = 2
for gain in range(3):
display(Markdown('### Bad pixel behaviour - {} gain ###'.format(gain_names[gain])))
for mod, data in badpix_consts.items():
plot_badpix_3d(data[...,gain], cols, title='', rebin_fac=rebin)
ax = plt.gca()
leg = ax.get_legend()
leg.set(alpha=0.5)
plt.show()
```
%% Cell type:raw id: tags:
.. raw:: latex
\newpage
%% Cell type:markdown id: tags:
## Summary across tiles ##
Plots give an overview of calibration constants averaged across tiles. A bad pixel mask is applied. Constants are compared with pre-existing constants retrieved from the calibration database. Differences $\Delta$ between the old and new constants is shown.
%% Cell type:code id: tags:
``` python
time_summary = []
time_summary.append(f"The following pre-existing constants are used for comparison:")
for mod_num, mod_data in old_mdata.items():
time_summary.append(f"- Module {mod_num}")
for const, const_data in mod_data.items():
time_summary.append(f" - {const} created at {const_data['timestamp']}")
display(Markdown("\n".join(time_summary)))
```
%% Cell type:code id: tags:
``` python
for i in modules:
for gain in range(3):
display(Markdown('### Summary across tiles - {} gain'.format(gain_names[gain])))
for const in const_types:
data = np.copy(const_types[const][i][:, :, :, gain])
label = 'Fraction of bad pixels'
if const != 'BadPixelsDark':
data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
label = '{} value [ADU]'.format(const)
else:
data[data>0] = 1.0
# Split tiles, making shape (mem_cells, slow_scan, tiles(=2), fast_scan)
data = data.reshape(data.shape[0], 32, 2, 128)
# Average within each tile
data = np.nanmean(data, axis=(1, 3))
fig = plt.figure(figsize=(15, 6))
ax = fig.add_subplot(1, 2, 1)
_ = heatmapPlot(data[:510, :], add_panels=True,
y_label='Memory Cell ID', x_label='Tile ID',
lut_label=label, use_axis=ax,
panel_y_label=label, panel_x_label=label,
cmap='viridis', # cb_loc='right',cb_aspect=15,
x_ticklabels=np.arange(2)+1,
x_ticks=np.arange(2)+0.5)
if old_const[i][const] is not None:
ax = fig.add_subplot(1, 2, 2)
dataold = np.copy(old_const[i][const][:, :, :, gain])
label = '$\Delta$ {}'.format(label)
if const != 'BadPixelsDark':
if old_const[i]['BadPixelsDark'] is not None:
dataold[old_const[i]['BadPixelsDark'][:, :, :, gain] > 0] = np.nan
else:
dataold[:] = np.nan
else:
dataold[dataold>0]=1.0
dataold = dataold.reshape(dataold.shape[0], 32, 2, 128)
dataold = np.nanmean(dataold, axis=(1, 3))
dataold = dataold - data
_ = heatmapPlot(dataold[:510, :], add_panels=True,
y_label='Memory Cell ID', x_label='Tile ID',
lut_label=label, use_axis=ax,
panel_y_label=label, panel_x_label=label,
cmap='viridis', # cb_loc='right',cb_aspect=15,
x_ticklabels=np.arange(2)+1,
x_ticks=np.arange(2)+0.5)
plt.show()
```
%% Cell type:raw id: tags:
.. raw:: latex
\newpage
%% Cell type:markdown id: tags:
## Variation of offset and noise across Tiles and ASICs ##
The following plots show a standard deviation $\sigma$ of the calibration constant. The plot of standard deviations across tiles show pixels of one tile ($128 \times 32$). Value for each pixel shows a standard deviation across 16 tiles. The standard deviation across ASICs are shown overall tiles. The plot shows pixels of one ASIC ($16 \times 32$), where the value shows a standard deviation across all ACIS of the module.
%% Cell type:code id: tags:
``` python
for i in modules:
for gain in range(3):
display(Markdown('### Variation of offset and noise across ASICs - {} gain'.format(gain_names[gain])))
fig = plt.figure(figsize=(15, 6))
for iconst, const in enumerate(['Offset', 'Noise']):
data = np.copy(const_types[const][i][:, :, :, gain])
data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
label = '$\sigma$ {} [ADU]'.format(const)
dataA = np.nanmean(data, axis=0) # average over cells
dataA = dataA.reshape(32, 16, 16)
dataA = np.nanstd(dataA, axis=1) # average across ASICs
ax = fig.add_subplot(1, 2, 1+iconst)
_ = heatmapPlot(dataA, add_panels=True,
y_label='rows', x_label='columns',
lut_label=label, use_axis=ax,
panel_y_label=label, panel_x_label=label,
cmap='viridis'
)
plt.show()
```
%% Cell type:code id: tags:
``` python
for i in modules:
for gain in range(3):
display(Markdown('### Variation of offset and noise across tiles - {} gain'.format(gain_names[gain])))
fig = plt.figure(figsize=(15, 6))
for iconst, const in enumerate(['Offset', 'Noise']):
data = np.copy(const_types[const][i][:, :, :, gain])
data[badpix_consts[i][:, :, :, gain] > 0] = np.nan
label = '$\sigma$ {} [ADU]'.format(const)
dataT = data.reshape(data.shape[0], 32, 2, 128)
dataT = np.nanstd(dataT, axis=2)
dataT = np.nanmean(dataT, axis=0)
ax = fig.add_subplot(121+iconst)
_ = heatmapPlot(dataT, add_panels=True,
y_label='rows', x_label='columns',
lut_label=label, use_axis=ax,
panel_y_label=label, panel_x_label=label,
cmap='viridis')
plt.show()
```
%% Cell type:raw id: tags:
.. raw:: latex
\newpage
%% Cell type:markdown id: tags:
## Aggregate values and per cell behaviour ##
The following tables and plots give an overview of statistical aggregates for each constant, as well as per-cell behavior, averaged across pixels.
%% Cell type:code id: tags:
``` python
for i in modules:
for gain in range(3):
display(Markdown('### Mean over pixels - {} gain'.format(gain_names[gain])))
fig = plt.figure(figsize=(9,11))
for iconst, const in enumerate(const_types):
ax = fig.add_subplot(3, 1, 1+iconst)
data = const_types[const][i][:510,:,:,gain]
if const == 'BadPixelsDark':
data[data>0] = 1.0
dataBP = np.copy(data)
dataBP[badpix_consts[i][:510,:,:,gain] > 0] = -10
data = np.nanmean(data, axis=(1, 2))
dataBP = np.nanmean(dataBP, axis=(1, 2))
d = [{'y': data,
'x': np.arange(data.shape[0]),
'drawstyle': 'steps-mid',
'label' : 'All data'
}
]
if const != 'BadPixelsDark':
d.append({'y': dataBP,
'x': np.arange(data.shape[0]),
'drawstyle': 'steps-mid',
'label' : 'good pixels only'
})
y_title = f"{const} value [ADU]"
title = f"{const} value, {gain_names[gain]} gain"
else:
y_title = "Fraction of Bad Pixels"
title = f"Fraction of Bad Pixels, {gain_names[gain]} gain"
data_min = np.min([data, dataBP]) if const != 'BadPixelsDark' else np.min([data])
data_max = np.max([data[20:], dataBP[20:]])
data_dif = data_max - data_min
local_max = np.max([data[200:300], dataBP[200:300]])
frac = 0.35
new_max = (local_max - data_min*(1-frac))/frac
new_max = np.max([data_max, new_max])
_ = simplePlot(d, figsize=(10,10), aspect=2, xrange=(-12, 510),
x_label = 'Memory Cell ID',
y_label=y_title, use_axis=ax,
title=title,
title_position=[0.5, 1.15],
inset='xy-coord-right', inset_x_range=(0,20), inset_indicated=True,
inset_labeled=True, inset_coord=[0.2,0.5,0.6,0.95],
inset_lw = 1.0, y_range = [data_min-data_dif*0.05, new_max+data_dif*0.05],
y_log=False, legend='outside-top-ncol2-frame', legend_size='18%',
legend_pad=0.00)
plt.tight_layout(pad=1.08, h_pad=0.35)
plt.show()
```
%% Cell type:raw id: tags:
.. raw:: latex
\newpage
%% Cell type:markdown id: tags:
## Summary tables ##
The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.
%% Cell type:code id: tags:
``` python
table = []
bits = [BadPixels.NOISE_OUT_OF_THRESHOLD, BadPixels.OFFSET_OUT_OF_THRESHOLD, BadPixels.OFFSET_NOISE_EVAL_ERROR]
for mod_num in modules:
for gain in range(3):
l_data = []
l_data_old = []
data = np.copy(badpix_consts[mod_num][:,:,:,gain])
l_data.append(len(data[data>0].flatten()))
for bit in bits:
l_data.append(np.count_nonzero(badpix_consts[mod_num][:,:,:,gain] & bit.value))
if old_const[mod_num]['BadPixelsDark'] is not None:
old_const[mod_num]['BadPixelsDark'] = old_const[mod_num]['BadPixelsDark'].astype(np.uint32)
dataold = np.copy(old_const[mod_num]['BadPixelsDark'][:, :, :, gain])
l_data_old.append(len(dataold[dataold>0].flatten()))
for bit in bits:
l_data_old.append(np.count_nonzero(old_const[mod_num]['BadPixelsDark'][:, :, :, gain] & bit.value))
l_data_name = ['All bad pixels', 'NOISE_OUT_OF_THRESHOLD',
'OFFSET_OUT_OF_THRESHOLD', 'OFFSET_NOISE_EVAL_ERROR']
l_threshold = ['', f'{thresholds_noise_sigma}', f'{thresholds_offset_sigma}',
f'{thresholds_offset_hard}/{thresholds_noise_hard}']
for i in range(len(l_data)):
line = [f'{l_data_name[i]}, gain {gain_names[gain]}', l_threshold[i], l_data[i]]
if old_const[mod_num]['BadPixelsDark'] is not None:
line += [l_data_old[i]]
else:
line += ['-']
table.append(line)
table.append(['', '', '', ''])
display(Markdown('''
### Number of bad pixels ###
One pixel can be bad for different reasons, therefore, the sum of all types of bad pixels can be more than the number of all bad pixels.
'''))
if len(table)>0:
md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
headers=["Pixel type", "Threshold",
"New constant", "Old constant"])))
```
%% Cell type:code id: tags:
``` python
header = ['Parameter',
"New constant", "Old constant ",
"New constant", "Old constant ",
"New constant", "Old constant "]
for const in ['Offset', 'Noise']:
table = [['','High gain', 'High gain', 'Medium gain', 'Medium gain', 'Low gain', 'Low gain']]
for mod_num in modules:
data = np.copy(const_types[const][mod_num])
data[badpix_consts[mod_num] > 0] = np.nan
if old_const[mod_num][const] is not None and old_const[mod_num]['BadPixelsDark'] is not None :
dataold = np.copy(old_const[mod_num][const])
dataold[old_const[mod_num]['BadPixelsDark']>0] = np.nan
f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']
for i, f in enumerate(f_list):
line = [n_list[i]]
for gain in range(3):
line.append('{:6.1f}'.format(f(data[...,gain])))
if old_const[mod_num][const] is not None and old_const[mod_num]['BadPixelsDark'] is not None:
line.append('{:6.1f}'.format(f(dataold[...,gain])))
else:
line.append('-')
table.append(line)
display(Markdown('### {} [ADU], good pixels only ###'.format(const)))
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
```
......
%% Cell type:markdown id: tags:
# Injecting LPD-Mini calibration constant data to the database
Author: European XFEL Detector Group, Version: 1.0
This notebook is used to read HDF5 files with LPDMINI constants to inject into the CALCAT database.
The notebook expects an explicit filename format: <calibration_constant_name>_<karabo_da>.h5 for example `RelativeGain_LPDMINI00_2.h5`
- <calibration_constant_name>: The calibration name as it is named in CALCAT.
- <karabo_da>: The data aggregator name. As LPDMini aggregators have `/` we replace it with `_`.
%% Cell type:code id: tags:
``` python
# calibration constant parameters:
constant_names = [""] # calibration constant name, required.
in_folder = "" # calibration constants folder, required.
out_folder = "" # output folder to store report path in case the notebook is executed by CLI, required.
proposal = "" # Add proposal number to be sent to the database as a part of Raw data location.
runs = [""] # Add list of runs to be sent to the database as a part of Raw data location.
# detector parameters:
karabo_id = "FXE_DET_LPD_MINI" # detector identifier.
karabo_da = [""] # karabo data aggregators. default "all" for all 8 karabo data aggregator names.
# calibration database parameters:
cal_db_interface = "tcp://max-exfl016:8015#8045" # calibration DB zmq address.
# calibration constant conditions:
memory_cells = 512 # Number of memory cells. Used for constant conditions.
bias_voltage_0 = 250 # bias voltage for minis 1, 3, 5, 7
bias_voltage_1 = 250 # bias voltage for minis 2, 4, 6, 8
capacitor = 5 # capacitor value. Used for constant conditions.
photon_energy = 9.2 # calibration constant photon energy. Used for constant conditions.
creation_time = '2023-05-07T15:10:07' # creation time for the injected constants. required format '2019-01-20T14:12:06'
```
%% Cell type:code id: tags:
``` python
import multiprocessing
from datetime import datetime
from logging import warning
from pathlib import Path
from typing import List, Tuple
import h5py
from cal_tools.calcat_interface import CalCatApi
from cal_tools.restful_config import calibration_client
from cal_tools.tools import get_report, send_to_db
from iCalibrationDB import Conditions, Constants
```
%% Cell type:code id: tags:
``` python
pixels_x = 256
pixels_y = 32
calcat_client = calibration_client()
calcat = CalCatApi(client=calcat_client)
# Look up PDUs
detector_id = calcat.detector(karabo_id)['id']
pdus_by_da = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)
if not karabo_da or karabo_da == [""]:
karabo_da = sorted(pdus_by_da.keys())
else:
karabo_da = karabo_da
# if proposal or runs are given assign file_loc
# for calibration constant versions metadata.
file_loc = ""
if proposal:
file_loc += f"proposal:{proposal}"
if runs[0] != "":
file_loc += f"runs: {runs}"
if file_loc == "":
print(
"No proposal or runs were given for constant source."
" No \"Raw data location\" will be injected with the constants.\n"
)
```
%% Cell type:code id: tags:
``` python
def validate_input_paths(
in_folder: str,
creation_time: str,
constant_names: List[str],
) -> Tuple[str, datetime]:
# Validate input parameters:
if not (in_folder):
raise ValueError(
"ERROR: \"in_folder\" is not given."
" Please provide the constants input folder."
)
c_folder = Path(in_folder)
if not c_folder.is_dir():
raise ValueError(
f"ERROR: in_folder {in_folder} directory doesn't exist."
)
try:
creation_time = datetime.strptime(creation_time, '%Y-%m-%dT%H:%M:%S')
except ValueError:
raise ValueError(
"Incorrect data format, "
"should be YYYY-MM-DDTHH:MM:SS i.e. 2019-01-20T14:12:06"
)
for constant in constant_names:
if not hasattr(Constants.LPD, constant):
raise ValueError(
f"ERROR: Constant name \"{constant}\" is not a known LPD constant. "
f"Available LPD Constants are {[c for c in dir(Constants.LPD) if c[0] != '_']}"
)
return c_folder, creation_time
```
%% Cell type:code id: tags:
``` python
def inject_constants(
constant_name: str,
mod_da: str,
physical_unit: str,
):
mod_num = int(mod_da.split('/')[-1])
# mod_num is from 1 to 8, so b_v_0 applies to odd numbers
bias_voltage = bias_voltage_0 if mod_num % 2 == 1 else bias_voltage_1
# Calibration constants condition object.
condition = Conditions.Illuminated.LPD(
memory_cells=memory_cells,
bias_voltage=bias_voltage,
photon_energy=photon_energy,
pixels_x=pixels_x,
pixels_y=pixels_y,
capacitor=capacitor,
category=None,
)
constant = getattr(Constants.LPD, constant_name)()
cfile = c_folder / f"{constant_name}_{mod_da.replace('/', '_')}.h5"
if not cfile.exists():
warning(f"Constant file {cfile} doesn't exist.\n")
return
# load constant data.
with h5py.File(cfile, "r") as f:
cdata = f[constant_name][()]
# Validate for only LPD at the moment.
if not cdata.shape == (memory_cells, pixels_y, pixels_x, 3):
raise ValueError(
f"ERROR: {const} constant data shape is not as expected."
f" {cdata.shape} != ({memory_cells}, {pixels_y}, {pixels_x}, 3).\n"
)
constant.data = cdata
send_to_db(
db_module=physical_unit,
karabo_id=karabo_id,
constant=constant,
condition=condition,
file_loc=file_loc,
report_path=report_path,
cal_db_interface=cal_db_interface,
creation_time=creation_time,
)
```
%% Cell type:code id: tags:
``` python
c_folder, creation_time = validate_input_paths(
in_folder,
creation_time,
constant_names,
)
# create a report path for calibration constant versions metadata.
report_name = f"No_report/LPD_{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
report_path = get_report(
out_folder=in_folder,
default_path=report_name
)
mod_mapping = {mod: pdus_by_da[mod]["physical_name"] for mod in karabo_da}
print("Physical detector units retrieved are: ", mod_mapping, "\n")
inp = []
for const in constant_names:
for k_da, pdu in mod_mapping.items():
inp.append((const, k_da, pdu))
with multiprocessing.Pool(processes=5) as pool:
results = pool.starmap(inject_constants, inp)
```
This diff is collapsed.
%% Cell type:markdown id: tags:
# ePix100 Dark Characterization
Author: European XFEL Detector Group, Version: 2.0
The following notebook provides dark image analysis and calibration constants of the ePix100 detector.
Dark characterization evaluates offset and noise of the detector and gives information about bad pixels.
Noise and bad pixels maps are calculated independently for each of the 4 ASICs of ePix100, since their noise behaviour can be significantly different.
Common mode correction can be applied to increase sensitivity to noise related bad pixels. Common mode correction is achieved by subtracting the median of all pixels that are read out at the same time along a row/column. This is done in an iterative process, by which a new bad pixels map is calculated and used to mask data as the common mode values across the rows/columns is updated.
Resulting maps are saved as .h5 files for a later use and injected to calibration DB.
%% Cell type:code id: tags:
``` python
in_folder = '/gpfs/exfel/exp/HED/202201/p002804/raw' # input folder, required
in_folder = '/gpfs/exfel/exp/HED/202330/p900338/raw' # input folder, required
out_folder = '' # output folder, required
metadata_folder = '' # Directory containing calibration_metadata.yml when run by xfel-calibrate
sequence = 0 # sequence file to use
run = 281 # which run to read data from, required
run = 176 # which run to read data from, required
# Parameters for accessing the raw data.
karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
karabo_da = ["EPIX01"] # data aggregators
receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files
# Parameters for the calibration database.
use_dir_creation_date = True
cal_db_interface = "tcp://max-exfl016:8020" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
db_output = False # Output constants to the calibration database
local_output = True # Output constants locally
# Conditions used for injected calibration constants.
bias_voltage = 200 # Bias voltage
in_vacuum = False # Detector operated in vacuum
fix_integration_time = -1 # Integration time. Set to -1 to read from .h5 file
fix_temperature = -1 # Fixed temperature in Kelvin. Set to -1 to read from .h5 file
temp_limits = 5 # Limit for parameter Operational temperature
badpixel_threshold_sigma = 5. # Bad pixels defined by values outside n times this std from median. Default: 5
badpixel_noise_sigma = 5 # Bad pixels defined by noise value outside n * std from median. Default: 5
badpixel_offset_sigma = 2 # Bad pixels defined by offset value outside n * std from median. Default: 2
CM_N_iterations = 2 # Number of iterations for common mode correction. Set to 0 to skip it
# Parameters used during selecting raw data trains.
min_trains = 1 # Minimum number of trains that should be available to process dark constants. Default 1.
max_trains = 1000 # Maximum number of trains to use for processing dark constants. Set to 0 to use all available trains.
# Don't delete! myMDC sends this by default.
operation_mode = '' # Detector operation mode, optional
# TODO: delete after removing from calibration_configurations
db_module = '' # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.
```
%% Cell type:code id: tags:
``` python
import os
import warnings
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display, Markdown
from prettytable import PrettyTable
from extra_data import RunDirectory
from pathlib import Path
import XFELDetAna.xfelprofiler as xprof
from XFELDetAna import xfelpyanatools as xana
from XFELDetAna import xfelpycaltools as xcal
from XFELDetAna.plotting.util import prettyPlotting
from cal_tools.enums import BadPixels
from cal_tools.step_timing import StepTimer
from cal_tools.epix100 import epix100lib
from cal_tools.tools import (
get_dir_creation_date,
get_pdu_from_db,
get_report,
save_const_to_h5,
send_to_db,
)
from iCalibrationDB import Conditions, Constants
```
%% Cell type:code id: tags:
``` python
%matplotlib inline
warnings.filterwarnings('ignore')
prettyPlotting = True
profiler = xprof.Profiler()
profiler.disable()
instrument_src = instrument_source_template.format(karabo_id, receiver_template)
```
%% Cell type:code id: tags:
``` python
# Read report path and create file location tuple to add with the injection
proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
file_loc = f'proposal:{proposal} runs:{run}'
report = get_report(metadata_folder)
ped_dir = os.path.join(in_folder, f"r{run:04d}")
fp_name = path_template.format(run, karabo_da[0]).format(sequence)
run_dir = RunDirectory(ped_dir)
print(f"Reading from: {Path(ped_dir) / fp_name}")
print(f"Run number: {run}")
print(f"Instrument H5File source: {instrument_src}")
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
print(f"Using {creation_time.isoformat()} as creation time")
os.makedirs(out_folder, exist_ok=True)
```
%% Cell type:code id: tags:
``` python
# Read sensor size
sensor_size = run_dir[instrument_src, 'data.image.dims'].as_single_value(reduce_by='first') # (x=768, y=708) expected
sensor_size = sensor_size[sensor_size != 1] # data.image.dims for old data is [768, 708, 1]
assert np.array_equal(sensor_size, [768, 708]), 'Unexpected sensor dimensions.'
# Path to pixels ADC values
pixels_src = (instrument_src, "data.image.pixels")
# Specifies total number of images to proceed
n_trains = run_dir.get_data_counts(*pixels_src).shape[0]
# Modify n_trains to process based on given maximum
# and minimun number of trains.
if max_trains:
n_trains = min(max_trains, n_trains)
if n_trains < min_trains:
raise ValueError(
f"Less than {min_trains} trains are available in RAW data."
" Not enough data to process darks.")
print(f"Number of dark images to analyze: {n_trains}")
```
%% Cell type:code id: tags:
``` python
ctrl_data = epix100lib.epix100Ctrl(
run_dc=run_dir,
instrument_src=instrument_src,
ctrl_src=f"{karabo_id}/DET/CONTROL",
)
# Read integration time
if fix_integration_time == -1:
integration_time = ctrl_data.get_integration_time()
integration_time_str_add = ''
else:
integration_time = fix_integration_time
integration_time_str_add = '(manual input)'
# Read temperature
if fix_temperature == -1:
temperature = ctrl_data.get_temprature()
temperature_k = temperature + 273.15
temp_str_add = ''
else:
temperature_k = fix_temperature
temperature = fix_temperature - 273.15
temp_str_add = '(manual input)'
# Print operating conditions
print(f"Bias voltage: {bias_voltage} V")
print(f"Detector integration time: {integration_time} \u03BCs {integration_time_str_add}")
print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
print(f"Operated in vacuum: {in_vacuum}")
```
%% Cell type:code id: tags:
``` python
# Passing repetitive code along the notebook to a function
def stats_calc(data):
'''
Calculates basic statistical parameters of the input data:
mean, standard deviation, median, minimum and maximum.
Returns dictionary with keys: 'mean', 'std', 'median',
'min', 'max' and 'legend'.
Parameters
----------
data : ndarray
Data to analyse.
'''
stats = {}
stats['mean'] = np.nanmean(data)
stats['std'] = np.nanstd(data)
stats['median'] = np.nanmedian(data)
stats['min'] = np.nanmin(data)
stats['max'] = np.nanmax(data)
stats['legend'] = ('mean: ' + str(np.round(stats['mean'],2)) +
'\nstd: ' + str(np.round(stats['std'],2)) +
'\nmedian: ' + str(np.round(stats['median'],2)) +
'\nmin: ' + str(np.round(stats['min'],2)) +
'\nmax: ' + str(np.round(stats['max'],2)))
return stats
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
step_timer.start()
# Read data
data_dc = run_dir.select(
*pixels_src, require_all=True).select_trains(np.s_[:n_trains])
data = data_dc[pixels_src].ndarray()
# Instantiate constant maps to be filled with offset, noise and bad pixels maps
constant_maps = {}
step_timer.done_step('Darks loaded. Elapsed Time')
```
%% Cell type:markdown id: tags:
## Offset Map
%% Cell type:code id: tags:
``` python
step_timer.start()
# Calculate offset per pixel and store it in constant_maps
constant_maps['Offset'] = np.nanmean(data, axis=0)[..., np.newaxis]
# Calculate basic statistical parameters
stats = stats_calc(constant_maps['Offset'].flatten())
# Offset map
fig = xana.heatmapPlot(
constant_maps['Offset'].squeeze(),
lut_label='[ADU]',
x_label='Column',
y_label='Row',
vmin=max(0, np.round((stats['median']-stats['std'])/250)*250), # Force cb label to be multiple of 250 for reproducibility
vmax=min(np.power(2,14)-1, np.round((stats['median']+stats['std'])/250)*250)
)
fig.suptitle('Offset Map', x=.48, y=.9, fontsize=16)
fig.set_size_inches(h=15, w=15)
# Offset Histogram
h, c = np.histogram(
constant_maps['Offset'].flatten(),
bins = np.arange(stats['min'], stats['max'], stats['std']/100)
)
d = {'x': c[:-1],
'y': h,
'color': 'blue'
}
fig = xana.simplePlot(
d,
aspect=1.5,
x_label='Offset [ADU]',
y_label='Counts',
x_range=(0, np.power(2,14)-1),
y_range=(0, max(h)*1.1),
y_log=True
)
fig.text(
s=stats['legend'],
x=.7,
y=.7,
fontsize=14,
bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
)
fig.suptitle('Offset Map Histogram', x=.5,y =.92, fontsize=16)
step_timer.done_step('Offset Map created. Elapsed Time')
```
%% Cell type:markdown id: tags:
## Noise Map
%% Cell type:code id: tags:
``` python
step_timer.start()
# Calculate noise per pixel and store it in constant_maps
constant_maps['Noise'] = np.nanstd(data, axis=0)[..., np.newaxis]
# Calculate basic statistical parameters
stats = stats_calc(constant_maps['Noise'].flatten())
# Noise heat map
fig = xana.heatmapPlot(
constant_maps['Noise'].squeeze(),
lut_label='[ADU]',
x_label='Column',
y_label='Row',
vmin=max(0, np.round((stats['median'] - badpixel_threshold_sigma*stats['std']))),
vmax=np.round(stats['median'] + badpixel_threshold_sigma*stats['std'])
vmin=max(0, np.round((stats['median'] - badpixel_noise_sigma*stats['std']))),
vmax=np.round(stats['median'] + badpixel_noise_sigma*stats['std'])
)
fig.suptitle('Noise Map', x=.5, y=.9, fontsize=16)
fig.set_size_inches(h=15, w=15)
# Calculate overall noise histogram
bins = np.arange(max(0, stats['mean'] - badpixel_threshold_sigma*stats['std']),
stats['mean'] + badpixel_threshold_sigma*stats['std'],
bins = np.arange(max(0, stats['mean'] - badpixel_noise_sigma*stats['std']),
stats['mean'] + badpixel_noise_sigma*stats['std'],
stats['std']/100)
h, c = np.histogram(
constant_maps['Noise'].flatten(),
bins = bins
)
d = [{'x': c[:-1],
'y': h,
'color': 'black',
'label': 'Total'
}]
# Calculate per ASIC histogram
asic = []
asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), int(sensor_size[0]//2):],
'label': 'ASIC 0 (bottom right)',
'color': 'blue'})
asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, int(sensor_size[0]//2):],
'label': 'ASIC 1 (top right)',
'color': 'red'})
asic.append({'noise': constant_maps['Noise'][int(sensor_size[1]//2):, :int(sensor_size[0]//2)],
'label': 'ASIC 2 (top left)',
'color': 'green'})
asic.append({'noise': constant_maps['Noise'][:int(sensor_size[1]//2), :int(sensor_size[0]//2)],
'label': 'ASIC 3 (bottom left)',
'color': 'magenta'})
min_std = np.inf
for a in asic:
h, c = np.histogram(a['noise'].flatten(), bins=bins)
d.append({'x': c[:-1], 'y': h, 'color': a['color'], 'label': a['label']})
min_std = np.nanmin((min_std, np.nanstd(a['noise'])))
print(a['label'][:6] +
': median = ' + "{:.2f}".format(np.round(np.nanmedian(a['noise']),2)) +
' | std = ' + "{:.2f}".format(np.round(np.nanstd(a['noise']),2)) +
a['label'][6:])
arg_max_median = 0
for i in range(0,np.size(d)):
arg_max_median = np.max((arg_max_median, np.argmax(d[i]['y'])))
# Plot noise histogram
fig = xana.simplePlot(
d,
aspect=1.5,
x_label='Noise [ADU]',
y_label='Counts',
x_range=(max(0, stats['median'] - badpixel_threshold_sigma*stats['std']),
stats['median'] + badpixel_threshold_sigma*stats['std']),
x_range=(max(0, stats['median'] - badpixel_noise_sigma*stats['std']),
stats['median'] + badpixel_noise_sigma*stats['std']),
y_range=(0, max(d[0]['y'])*1.1),
)
plt.grid(linestyle = ':')
leg = fig.legend(bbox_to_anchor=(.42, .88),fontsize = 14)
fig.text(
s=stats['legend'],
x=.75,
y=.7,
fontsize=14,
bbox=dict(facecolor='yellow', edgecolor='black', alpha=.1)
)
fig.suptitle('Noise Map Histogram',x=.5,y=.92,fontsize=16)
step_timer.done_step('\nNoise Map created. Elapsed Time')
```
%% Cell type:markdown id: tags:
## Bad Pixels
The bad pixel map is deduced by comparing offset and noise of each pixel ($v_i$) against the median value of the respective maps for each ASIC ($v_k$):
$$
v_i > \mathrm{median}(v_{k}) + n \sigma_{v_{k}}
$$
or
$$
v_i < \mathrm{median}(v_{k}) - n \sigma_{v_{k}}
$$
Values are encoded in a 32 bit mask, where for the dark image deduced bad pixels the following non-zero entries are relevant:
%% Cell type:code id: tags:
``` python
def print_bp_entry(bp):
'''
Prints bad pixels bit encoding.
Parameters
----------
bp : enum 'BadPixels'
'''
print('{:<23s}: {:032b} ({})'.format(bp.name, bp.value, bp.real))
print_bp_entry(BadPixels.OFFSET_OUT_OF_THRESHOLD)
print_bp_entry(BadPixels.NOISE_OUT_OF_THRESHOLD)
print_bp_entry(BadPixels.OFFSET_NOISE_EVAL_ERROR)
```
%% Cell type:code id: tags:
``` python
def eval_bpidx(const_map, n_sigma, block_size):
'''
Evaluates bad pixels by comparing the average offset and
noise of each pixel against the median value of the
respective maps of each ASIC.
Returns boolean array.
Parameters
----------
const_map : ndarray
Offset or noise constant map to input.
n_sigma : float
Standard deviation multiplicity interval outside
which bad pixels are defined.
block_size : ndarray
dimensions ([x,y]) of each ASIC.
'''
blocks = {} # Each block corresponds to 1 ASIC
blocks['0'] = const_map[:int(block_size[1]), int(block_size[0]):] # bottom right
blocks['1'] = const_map[int(block_size[1]):, int(block_size[0]):] # top right
blocks['2'] = const_map[int(block_size[1]):, :int(block_size[0])] # top left
blocks['3'] = const_map[:int(block_size[1]), :int(block_size[0])] # bottom left
idx = {}
for b in range(0, len(blocks)):
mdn = np.nanmedian(blocks[str(b)])
std = np.nanstd(blocks[str(b)])
idx[str(b)] = ( (blocks[str(b)] > mdn + n_sigma*std) | (blocks[str(b)] < mdn - n_sigma*std) )
idx_output = np.zeros(const_map.shape, dtype=bool)
idx_output[:int(block_size[1]), int(block_size[0]):] = idx['0']
idx_output[int(block_size[1]):, int(block_size[0]):] = idx['1']
idx_output[int(block_size[1]):, :int(block_size[0])] = idx['2']
idx_output[:int(block_size[1]), :int(block_size[0])] = idx['3']
return idx_output
```
%% Cell type:code id: tags:
``` python
def bp_analysis_table(bad_pixels_map, title = ''):
'''
Prints a table with short analysis of the number and
percentage of bad pixels on count of offset or noise
out of threshold, or evaluation error.
Returns PrettyTable.
Parameters
----------
bad_pixels_map : ndarray
Bad pixel map to analyse.
title : string, optional
Table title to be used.
'''
num_bp = np.sum(bad_pixels_map!=0)
offset_bp = np.sum(bad_pixels_map==1)
noise_bp = np.sum(bad_pixels_map==2)
eval_error_bp = np.sum(bad_pixels_map==4)
t = PrettyTable()
t.field_names = [title]
t.add_row(['Total number of Bad Pixels : {:<5} ({:<.2f}%)'.format(num_bp, 100*num_bp/np.prod(bad_pixels_map.shape))])
t.add_row([' Offset out of threshold : {:<5} ({:<.2f}%)'.format(offset_bp, 100*offset_bp/np.prod(bad_pixels_map.shape))])
t.add_row([' Noise out of threshold : {:<5} ({:<.2f}%)'.format(noise_bp, 100*noise_bp/np.prod(bad_pixels_map.shape))])
t.add_row([' Evaluation error : {:<5} ({:<.2f}%)'.format(eval_error_bp, 100*eval_error_bp/np.prod(bad_pixels_map.shape))])
return t
```
%% Cell type:code id: tags:
``` python
# Add bad pixels from darks to constant_maps
constant_maps['BadPixelsDark'] = np.zeros(constant_maps['Offset'].shape, np.uint32)
# Find noise related bad pixels
constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Noise'], badpixel_noise_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Noise'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
# Find offset related bad pixels
constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_threshold_sigma, sensor_size//2)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
constant_maps['BadPixelsDark'][eval_bpidx(constant_maps['Offset'], badpixel_offset_sigma, sensor_size//2)] = BadPixels.OFFSET_OUT_OF_THRESHOLD.value
constant_maps['BadPixelsDark'][~np.isfinite(constant_maps['Offset'])] = BadPixels.OFFSET_NOISE_EVAL_ERROR.value
# Plot Bad Pixels Map
fig = xana.heatmapPlot(
np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']), neginf=np.nan)).squeeze(), # convert zeros to NaN
lut_label='Bad pixel value [ADU]', # for plotting purposes
x_label='Column',
y_label='Row',
x_range=(0, sensor_size[0]),
y_range=(0, sensor_size[1])
)
fig.suptitle('Initial Bad Pixels Map', x=.5, y=.9, fontsize=16)
fig.set_size_inches(h=15, w=15)
step_timer.done_step('Initial Bad Pixels Map created. Elapsed Time')
print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Initial Bad Pixel Analysis'))
```
%% Cell type:markdown id: tags:
## Common Mode Correction
Common mode correction is applied here to increase sensitivy to bad pixels. This is done in an iterative process. Each iteration is composed by the follwing steps:
1. Common mode noise is calculated and subtracted from data.
2. Noise map is recalculated.
3. Bad pixels are recalulated based on the new noise map.
4. Data is masked based on the new bad pixels map.
%% Cell type:code id: tags:
``` python
if CM_N_iterations < 1:
print('Common mode correction not applied.')
else:
commonModeBlockSize = sensor_size//2
commonModeBlockSize = (sensor_size//[8,2]).astype(int) # bank size (x=96,y=354) pixels
# Instantiate common mode calculators for column and row CM correction
cmCorrection_col = xcal.CommonModeCorrection(
sensor_size.tolist(),
commonModeBlockSize.tolist(),
'col',
parallel=False)
cmCorrection_row = xcal.CommonModeCorrection(
sensor_size.tolist(),
commonModeBlockSize.tolist(),
'row',
parallel=False)
```
%% Cell type:code id: tags:
``` python
if CM_N_iterations > 0:
data = data.astype(float) # This conversion is needed for offset subtraction
# Subtract Offset
data -= constant_maps['Offset'].squeeze()
noise_map_corrected = np.copy(constant_maps['Noise'])
bp_offset = [np.sum(constant_maps['BadPixelsDark']==1)]
bp_noise = [np.sum(constant_maps['BadPixelsDark']==2)]
for it in range (0,CM_N_iterations):
step_timer.start()
# Mask bad pixels
BadPixels_mask = np.squeeze(constant_maps['BadPixelsDark'] != 0)
BadPixels_mask = np.repeat(BadPixels_mask[np.newaxis,...],data.shape[0],axis=0)
data[BadPixels_mask] = np.nan
# Common mode correction
data = np.swapaxes(data,0,-1)
data = cmCorrection_col.correct(data)
data = cmCorrection_row.correct(data)
data = np.swapaxes(data,0,-1)
# Update noise map
noise_map_corrected = np.nanstd(data, axis=0)[..., np.newaxis]
# Update bad pixels map
constant_maps['BadPixelsDark'][eval_bpidx(noise_map_corrected, badpixel_threshold_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
constant_maps['BadPixelsDark'][eval_bpidx(noise_map_corrected, badpixel_noise_sigma, sensor_size//2)] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
bp_offset.append(np.sum(constant_maps['BadPixelsDark']==1))
bp_noise.append(np.sum(constant_maps['BadPixelsDark']==2))
print(bp_analysis_table(constant_maps['BadPixelsDark'], title=f'{it+1} CM correction iterations'))
step_timer.done_step('Elapsed Time')
print('\n')
# Apply final bad pixels mask
BadPixels_mask = np.broadcast_to(np.squeeze(constant_maps['BadPixelsDark'] != 0), data.shape)
data[BadPixels_mask] = np.nan
it = np.arange(0, CM_N_iterations+1)
plt.figure()
plt.plot(it, np.sum((bp_offset,bp_noise),axis=0), c = 'black', ls = '--', marker = 'o', label = 'Total')
plt.plot(it, bp_noise, c = 'red', ls = '--', marker = 'v', label = 'Noise out of threshold')
plt.plot(it, bp_offset, c = 'blue', ls = '--', marker = 's',label = 'Offset out of threshold')
plt.xticks(it)
plt.xlabel('CM correction iteration')
plt.ylabel('# Bad Pixels')
plt.legend()
plt.grid(linestyle = ':')
```
%% Cell type:code id: tags:
``` python
if CM_N_iterations > 0:
display(Markdown(f'## Common-Mode Corrected Bad Pixels Map\n'))
```
%% Cell type:code id: tags:
``` python
if CM_N_iterations > 0:
# Plot final bad pixels map
fig = xana.heatmapPlot(
np.exp(np.nan_to_num(np.log(constant_maps['BadPixelsDark']),neginf=np.nan)).squeeze(), # convert zeros to NaN
lut_label='Bad pixel value [ADU]', # for plotting purposes
x_label='Column',
y_label='Row',
x_range=(0, sensor_size[0]),
y_range=(0, sensor_size[1])
)
fig.suptitle('Final Bad Pixels Map', x=.5, y=.9, fontsize=16)
fig.set_size_inches(h=15, w=15)
print(bp_analysis_table(constant_maps['BadPixelsDark'], title='Final Bad Pixels Analysis'))
```
%% Cell type:markdown id: tags:
## Calibration Constants DB
Send the dark constants to the database and/or save them locally.
%% Cell type:code id: tags:
``` python
# Save constants to DB
md = None
for const_name in constant_maps.keys():
const = getattr(Constants.ePix100, const_name)()
const.data = constant_maps[const_name].data
# Set the operating condition
condition = Conditions.Dark.ePix100(
bias_voltage=bias_voltage,
integration_time=integration_time,
temperature=temperature_k,
in_vacuum=in_vacuum)
for parm in condition.parameters:
if parm.name == "Sensor Temperature":
parm.lower_deviation = temp_limits
parm.upper_deviation = temp_limits
# Get physical detector unit
db_module = get_pdu_from_db(
karabo_id=karabo_id,
karabo_da=karabo_da,
constant=const,
condition=condition,
cal_db_interface=cal_db_interface,
snapshot_at=creation_time)[0]
# Inject or save calibration constants
if db_output:
md = send_to_db(
db_module=db_module,
karabo_id=karabo_id,
constant=const,
condition=condition,
file_loc=file_loc,
report_path=report,
cal_db_interface=cal_db_interface,
creation_time=creation_time,
timeout=cal_db_timeout
)
if local_output:
md = save_const_to_h5(
db_module=db_module,
karabo_id=karabo_id,
constant=const,
condition=condition,
data=const.data,
file_loc=file_loc,
report=report,
creation_time=creation_time,
out_folder=out_folder
)
print(f"Calibration constant {const_name} is stored locally at {out_folder} \n")
print("Constants parameter conditions are:\n"
f"• Bias voltage: {bias_voltage}\n"
f"• Integration time: {integration_time}\n"
f"• Temperature: {temperature_k}\n"
f"• In Vacuum: {in_vacuum}\n"
f"• Creation time: {md.calibration_constant_version.begin_at if md is not None else creation_time}\n")
```
......
%% Cell type:markdown id: tags:
# pnCCD Data Correction #
Authors: DET Group, Modified by Kiana Setoodehnia - Version 5.0
The following notebook provides offset, common mode, relative gain, split events and pattern classification corrections of images acquired with the pnCCD. This notebook *does not* yet correct for charge transfer inefficiency.
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SQS/202031/p900166/raw" # input folder
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/pnccd_correct" # output folder
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
run = 347 # which run to read data from
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
sequences_per_node = 1 # number of sequences running on the same slurm node.
karabo_da = 'PNCCD01' # data aggregators
karabo_id = "SQS_NQS_PNCCD1MP" # karabo prefix of PNCCD devices
receiver_id = "PNCCD_FMT-0" # inset for receiver devices
path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
instrument_source_template = '{}/CAL/{}:output' # template for data source name, will be filled with karabo_id and receiver_id.
# Parameters affecting data correction.
commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column
commonModeBlockSize = [512, 512] # size of the detector in pixels for common mode calculations
split_evt_primary_threshold = 4. # primary threshold for split event classification in terms of n sigma noise
split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise
saturated_threshold = 32000. # full well capacity in ADU
# Conditions for retrieving calibration constants
fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.
fix_temperature_bot = 0. # fix temperature for bottom senspr in K, set to 0. to use value from slow data.
gain = -1 # the detector's gain setting. Set to -1 to use the value from the slow data.
bias_voltage = 0. # the detector's bias voltage. set to 0. to use value from slow data.
integration_time = 70 # detector's integration time
photon_energy = 1.6 # Al fluorescence in keV
# Parameters for the calibration database.
cal_db_interface = "tcp://max-exfl016:8015" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
creation_time = "" # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
remove_bias_voltage_if_zero = True # This flag enables removing bias voltage from the conditions if a 0 value is read from RAW data. This is useful when the corresponding constants for old RAW had no bias voltage because of a mistake in control data. e.g. p002857
# Booleans for selecting corrections to apply.
only_offset = False # Only, apply offset.
common_mode = True # Apply common mode correction
relgain = True # Apply relative gain correction
pattern_classification = True # classify split events
# parameters affecting stored output data.
chunk_size_idim = 1 # H5 chunking size of output data
limit_trains = 0 # this parameter is used for limiting number of images to correct from a sequence file.
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
# Here the herarichy and dependability for correction booleans are defined
corr_bools = {}
corr_bools["only_offset"] = only_offset
# Apply offset only.
if not only_offset:
corr_bools["relgain"] = relgain
corr_bools["common_mode"] = common_mode
corr_bools["pattern_class"] = pattern_classification
```
%% Cell type:code id: tags:
``` python
import os
import sys
import warnings
from logging import warning
from pathlib import Path
warnings.filterwarnings('ignore')
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pasha as psh
from IPython.display import Markdown, display
from extra_data import H5File, RunDirectory
from prettytable import PrettyTable
%matplotlib inline
import cal_tools.restful_config as rest_cfg
from XFELDetAna import xfelpyanatools as xana
from XFELDetAna import xfelpycaltools as xcal
from cal_tools import pnccdlib
from cal_tools.files import DataFile
from cal_tools.calcat_interface import CalCatError, PNCCD_CalibrationData
from cal_tools.tools import (
calcat_creation_time,
write_constants_fragment,
)
from cal_tools.step_timing import StepTimer
```
%% Cell type:code id: tags:
``` python
# Calibration Database Settings, and Some Initial Run Parameters & Paths:
display(Markdown('### Initial Settings and Paths'))
# Sensor size and block size definitions (important for common mode and other corrections):
pixels_x = 1024 # rows of pnCCD in pixels
pixels_y = 1024 # columns of pnCCD in pixels
in_folder = Path(in_folder)
sensorSize = [pixels_x, pixels_y]
# For xcal.HistogramCalculators.
blockSize = [pixels_x//2, pixels_y//2] # sensor area will be analysed according to blockSize.
print(f"pnCCD size is: {pixels_x}x{pixels_y} pixels.")
print(f'Calibration database interface selected: {cal_db_interface}')
# Paths to the data:
instrument_src = instrument_source_template.format(karabo_id, receiver_id)
print(f"Instrument H5File source: {instrument_src}\n")
# Run's creation time:
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f"Creation time: {creation_time}")
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id: tags:
``` python
run_dc = RunDirectory(in_folder / f"r{run:04d}", _use_voview=False)
# Output Folder Creation:
os.makedirs(out_folder, exist_ok=True)
# extract control data
step_timer.start()
ctrl_data = pnccdlib.PnccdCtrl(run_dc, karabo_id)
if bias_voltage == 0.:
bias_voltage = ctrl_data.get_bias_voltage()
if gain == -1:
gain = ctrl_data.get_gain()
if fix_temperature_top == 0:
fix_temperature_top = ctrl_data.get_fix_temperature_top()
if fix_temperature_bot == 0:
fix_temperature_bot = ctrl_data.get_fix_temperature_bot()
step_timer.done_step("Reading control parameters.")
# Printing the Parameters Read from the Data File:
display(Markdown('### Detector Parameters'))
print(f"Bias voltage is {bias_voltage:0.1f} V.")
print(f"Detector gain is set to 1/{int(gain)}.")
print(f"Detector integration time is set to {integration_time} ms")
print(f"Top pnCCD sensor is at temperature of {fix_temperature_top:0.2f} K")
print(f"Bottom pnCCD sensor is at temperature of {fix_temperature_bot:0.2f} K")
```
%% Cell type:code id: tags:
``` python
seq_files = []
for f in run_dc.select(instrument_src).files:
fpath = Path(f.filename)
if fpath.match(f"*{karabo_da}*.h5"):
seq_files.append(fpath)
if sequences != [-1]:
seq_files = sorted([f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)])
print(f"Processing a total of {len(seq_files)} sequence files:")
print(*seq_files, sep='\n')
```
%% Cell type:code id: tags:
``` python
gain_k = [k for k, v in pnccdlib.VALID_GAINS.items() if v == gain][0]
if gain_k == 'a':
split_evt_mip_threshold = 1000. # MIP threshold in ADU for event classification (10 times average noise)
# Each xcal.HistogramCalculator requires a total number of bins and a binning range. We define these
# using a dictionary:
# For all xcal histograms:
Hist_Bin_Dict = {
"bins": 35000, # number of bins
"bin_range": [0, 35000]
}
# For the numpy histograms on the last cell of the notebook:
Event_Bin_Dict = {
"event_bins": 1000, # number of bins
"b_range": [0, 35000] # bin range
}
elif gain_k == 'b':
split_evt_mip_threshold = 270. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 10000,
"bin_range": [0, 10000]
}
Event_Bin_Dict = {
"event_bins": 1000,
"b_range": [0, 10000]
}
elif gain_k == 'c':
split_evt_mip_threshold = 110. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 3000,
"bin_range": [0, 3000]
}
Event_Bin_Dict = {
"event_bins": 1000,
"b_range": [0, 3000]
}
elif gain_k == 'd':
split_evt_mip_threshold = 90. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 1000,
"bin_range": [0, 1000]
}
Event_Bin_Dict = {
"event_bins": 1000,
"b_range": [0, 1000]
}
elif gain_k == 'e':
split_evt_mip_threshold = 90. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 500,
"bin_range": [0, 500]
}
Event_Bin_Dict = {
"event_bins": 500,
"b_range": [0, 500]
}
else:
split_evt_mip_threshold = 90. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 220,
"bin_range": [0, 220]
}
Event_Bin_Dict = {
"event_bins": 220,
"b_range": [0, 220]
}
bins = Hist_Bin_Dict["bins"]
bin_range = Hist_Bin_Dict["bin_range"]
event_bins = Event_Bin_Dict["event_bins"]
b_range = Event_Bin_Dict["b_range"]
```
%% Cell type:markdown id: tags:
As a first step, calibration constants have to be retrieved from the calibration database
%% Cell type:code id: tags:
``` python
display(Markdown("### Constants retrieval"))
step_timer.start()
constant_names = ["OffsetCCD", "NoiseCCD", "BadPixelsDarkCCD"]
if relgain:
constant_names += ["RelativeGainCCD"]
# In the case of an older proposal (e.g., proposal 002857),
# it is possible that the bias voltage was 0
# resulting in the absence of bias voltage values in
# the previously injected dark constants. This situation can be
# attributed to a feature that is currently not available in iCalibrationDB.
if bias_voltage == 0 and remove_bias_voltage_if_zero:
bias_voltage = None
pnccd_cal = PNCCD_CalibrationData(
detector_name=karabo_id,
sensor_bias_voltage=bias_voltage,
integration_time=integration_time,
sensor_temperature=fix_temperature_top,
gain_setting=gain,
event_at=creation_time,
source_energy=photon_energy,
client=rest_cfg.calibration_client(),
)
try:
pnccd_metadata = pnccd_cal.metadata(calibrations=constant_names)
except CalCatError as e:
warning(e)
pnccd_metadata = pnccd_cal.metadata(calibrations=pnccd_cal.dark_calibrations)
if relgain:
try:
gain_metadata = pnccd_cal.metadata(calibrations=["RelativeGainCCD"])
for mod, md in gain_metadata.items():
pnccd_metadata[mod].update(md)
except CalCatError as e: # TODO: fix after getting new exceptions.
warning(f"{e} While asking for {pnccd_cal.illuminated_calibrations}")
warning("RelativeGainEPix100 is not retrieved from the calibration database. "
"Relative gain correction is disabled.")
corr_bools['relgain'] = False
mod_to_pdu = pnccd_cal.mod_to_pdu
ccvs_url = "https://in.xfel.eu/calibration/calibration_constant_versions/"
for mod, mod_md in pnccd_metadata.items():
display(Markdown(f"{mod}({mod_to_pdu[mod]}):"))
for cname, c_mdata in mod_md.items():
display(Markdown(
f"- [{cname}]({ccvs_url}/{c_mdata['ccv_id']}): {c_mdata['begin_validity_at']}"))
constants = pnccd_cal.ndarray_map(metadata=pnccd_metadata).get(karabo_da, {})
metadata = pnccd_metadata[karabo_da]
# Validate the constants availability and raise/warn correspondingly.
missing_dark_constants = set(
c for c in ["OffsetCCD", "NoiseCCD", "BadPixelsDarkCCD"] if c not in constants.keys())
c for c in pnccd_cal.dark_calibrations if c not in metadata.keys())
if missing_dark_constants:
raise KeyError(
f"Dark constants {missing_dark_constants} are not available for correction.")
if corr_bools.get('relgain') and "RelativeGainCCD" not in constants.keys():
warning("RelativeGainEPix100 is not found in the calibration database.")
corr_bools['relgain'] = False
step_timer.done_step("Constants retrieval")
```
%% Cell type:code id: tags:
``` python
# Record constant details in YAML metadata
write_constants_fragment(
out_folder=(metadata_folder or out_folder),
det_metadata=pnccd_metadata,
caldb_root=pnccd_cal.caldb_root,
)
)
# load constants arrays after storing fragment YAML file
# and validating constants availability.
constants = pnccd_cal.ndarray_map(metadata=pnccd_metadata).get(karabo_da, {})
step_timer.done_step("Constants retrieval")
```
%% Cell type:code id: tags:
``` python
fig = xana.heatmapPlot(constants["OffsetCCD"][:,:,0], x_label='Columns', y_label='Rows', lut_label='Offset (ADU)',
aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x), vmax=16000,
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Dark Offset Map')
fig = xana.heatmapPlot(constants["NoiseCCD"][:,:,0], x_label='Columns', y_label='Rows',
lut_label='Corrected Noise (ADU)',
aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Dark Noise Map')
fig = xana.heatmapPlot(np.log2(constants["BadPixelsDarkCCD"][:,:,0]), x_label='Columns', y_label='Rows',
lut_label='Bad Pixel Value (ADU)',
aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Dark Bad Pixels Map')
if corr_bools.get('relgain'):
fig = xana.heatmapPlot(constants["RelativeGainCCD"], figsize=(8, 8), x_label='Columns', y_label='Rows',
lut_label='Relative Gain',
aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0.8, vmax=1.2,
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
panel_top_low_lim = 0.5, panel_top_high_lim = 1.5, panel_side_low_lim = 0.5,
panel_side_high_lim = 1.5,
title = f'Relative Gain Map for pnCCD (Gain = 1/{int(gain)})')
```
%% Cell type:code id: tags:
``` python
#************************ Calculators ************************#
if corr_bools.get('common_mode'):
# Common Mode Correction Calculator:
cmCorrection = xcal.CommonModeCorrection([pixels_x, pixels_y],
commonModeBlockSize,
commonModeAxis,
parallel=False, dType=np.float32, stride=1,
noiseMap=constants["NoiseCCD"].astype(np.float32), minFrac=0.25)
if corr_bools.get('pattern_class'):
# Pattern Classifier Calculator:
# Left Hemisphere:
patternClassifierLH = xcal.PatternClassifier([pixels_x, pixels_y//2],
constants["NoiseCCD"][:, :pixels_y//2],
split_evt_primary_threshold,
split_evt_secondary_threshold,
split_evt_mip_threshold,
tagFirstSingles=3, # track along y-axis, left to right (see
nCells=1, # split_event.py file in pydetlib/lib/src/
allowElongated=False, # XFELDetAna/algorithms)
blockSize=[pixels_x, pixels_y//2],
parallel=False)
# Right Hemisphere:
patternClassifierRH = xcal.PatternClassifier([pixels_x, pixels_y//2],
constants["NoiseCCD"][:, pixels_y//2:],
split_evt_primary_threshold,
split_evt_secondary_threshold,
split_evt_mip_threshold,
tagFirstSingles=4, # track along y-axis, right to left
nCells=1,
allowElongated=False,
blockSize=[pixels_x, pixels_y//2],
parallel=False)
patternClassifierLH._imagesPerChunk = 1
patternClassifierRH._imagesPerChunk = 1
patternClassifierLH._noisemap = constants["NoiseCCD"][:, :pixels_x//2]
patternClassifierRH._noisemap = constants["NoiseCCD"][:, pixels_x//2:]
# Setting bad pixels:
patternClassifierLH.setBadPixelMask(constants["BadPixelsDarkCCD"][:, :pixels_x//2] != 0)
patternClassifierRH.setBadPixelMask(constants["BadPixelsDarkCCD"][:, pixels_x//2:] != 0)
```
%% Cell type:code id: tags:
``` python
#***************** Histogram Calculators ******************#
# Will contain uncorrected data:
histCalRaw = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
# Will contain offset corrected data:
histCalOffsetCor = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
if corr_bools.get('common_mode'):
# Will contain common mode corrected data:
histCalCommonModeCor = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
if corr_bools.get('pattern_class'):
# Will contain split events pattern data:
histCalPcorr = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
# Will contain singles events data:
histCalPcorrS = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
if corr_bools.get('relgain'):
# Will contain gain corrected data:
histCalGainCor = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
```
%% Cell type:markdown id: tags:
## Applying corrections to the raw data
%% Cell type:code id: tags:
``` python
def offset_correction(wid, index, d):
"""offset correction.
Equating bad pixels' values to np.nan,
so that the pattern classifier ignores them:
"""
d = d.copy()
# TODO: To clear this up. Is it on purpose to save corrected data with nans?
d[bpix != 0] = np.nan
d -= offset # offset correction
# TODO: to clear this up. why save the badpixels map in the corrected data?
bpix_data[index, ...] = bpix
data[index, ...] = d
def common_mode(wid, index, d):
"""common-mode correction.
Discarding events caused by saturated pixels:
"""
d = np.squeeze(cmCorrection.correct(d, cellTable=np.zeros(pixels_y, np.int32)))
# we equate these values to np.nan so that the pattern classifier ignores them:
d[d >= saturated_threshold] = np.nan
data[index, ...] = d
def gain_correction(wid, index, d):
"""relative gain correction."""
d /= relativegain
data[index, ...] = d
def pattern_classification_correction(wid, index, d):
"""pattern classification correction.
data set to save split event corrected images
The calculation of the cluster map:]
Dividing the data into left and right hemispheres:
"""
# pattern classification on corrected data
dataLH, patternsLH = patternClassifierLH.classify(d[:, :pixels_x//2])
dataRH, patternsRH = patternClassifierRH.classify(d[:, pixels_x//2:])
d[:, :pixels_x//2] = np.squeeze(dataLH)
d[:, pixels_x//2:] = np.squeeze(dataRH)
patterns = np.zeros(d.shape, patternsLH.dtype)
patterns[:, :pixels_x//2] = np.squeeze(patternsLH)
patterns[:, pixels_x//2:] = np.squeeze(patternsRH)
d[d < split_evt_primary_threshold*noise] = 0
data[index, ...] = d
ptrn_data[index, ...] = patterns
d[patterns != 100] = np.nan # Discard doubles, triples, quadruple, clusters, first singles
filtered_data[index, ...] = d
```
%% Cell type:code id: tags:
``` python
# 10 is a number chosen after testing 1 ... 71 parallel threads for a node with 72 cpus.
parallel_num_threads = 10
context = psh.context.ThreadContext(num_workers=parallel_num_threads)
data_path = "INSTRUMENT/"+instrument_src+"/data/"
offset = np.squeeze(constants["OffsetCCD"])
noise = np.squeeze(constants["NoiseCCD"])
bpix = np.squeeze(constants["BadPixelsDarkCCD"])
relativegain = constants.get("RelativeGainCCD")
```
%% Cell type:code id: tags:
``` python
def write_datasets(seq_dc, corr_arrays, out_file, instrument_src):
"""
Creating datasets first then adding data.
To have metadata together available at the start of the file,
so it's quick to see what the file contains.
"""
# Create CORR files and add corrected data sections.
image_counts = seq_dc[instrument_src, "data.image"].data_counts(labelled=False)
dataset_chunk = ((chunk_size_idim,) + corr_arrays["pixels"].shape[1:]) # e.g. (1, pixels_x, pixels_y)
with DataFile(out_file, 'w') as ofile:
# Create INDEX datasets.
ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
# Create METDATA datasets
ofile.create_metadata(
like=seq_dc,
sequence=seq_dc.run_metadata()["sequenceNumber"],
instrument_channels=(f"{instrument_src}/data",)
)
# Create Instrument section to later add corrected datasets.
outp_source = ofile.create_instrument_source(instrument_src)
# Create count/first datasets at INDEX source.
outp_source.create_index(data=image_counts)
# Store uncorrected trainId in the corrected file.
outp_source.create_key(
f"data.trainId", data=seq_dc.train_ids,
chunks=min(50, len(seq_dc.train_ids))
)
# TODO: gain dataset is just the RelativeGain constant
# and it doesn't make sense to write it into corrected data.
comp_fields = ["gain", "patterns", "pixels_classified"]
# TODO: to clear this up: why save corrected data
# in data/pixels rather than data/image.
for field, data in corr_arrays.items():
if field in comp_fields: # Write compressed corrected data.
outp_source.create_compressed_key(f"data.{field}", data=data)
else:
outp_source.create_key(
f"data.{field}", data=data,
chunks=dataset_chunk
)
```
%% Cell type:code id: tags:
``` python
# Data corrections and event classifications happen here.
# Also, the corrected data are written to datasets:
empty_seq = 0
for seq_n, seq_f in enumerate(seq_files):
seq_dc = H5File(seq_f)
out_file = f"{out_folder}/{seq_f.name}".replace("RAW", "CORR")
step_timer.start()
img_dc = seq_dc[instrument_src, "data.image"]
dshape = seq_dc[instrument_src, "data.image"].shape
n_trains = dshape[0]
corr_ntrains = dshape[0] # number of available trains to correct.
all_train_ids = img_dc.train_ids # All trains including trains with no data.
# Raise a WARNING if this sequence has no trains to correct.
# Otherwise, print number of trains with no data.
if corr_ntrains == 0:
warning(f"No trains to correct for {seq_f.name}: "
"Skipping the processing of this file.")
empty_seq += 1
continue
elif len(all_train_ids) != corr_ntrains:
print(
f"{seq_f.name} has {len(all_train_ids) - corr_ntrains} "
"trains with missing data."
)
# If you want to analyze only a certain number of frames
# instead of all available good frames.
if limit_trains > 0:
print(f"\nCorrected trains are limited to: {limit_trains} trains")
corr_ntrains = min(corr_ntrains, limit_trains)
data_shape = (corr_ntrains, dshape[1], dshape[2])
print(f"Correcting file {seq_f} of {corr_ntrains} trains.")
# Overwrite seq_dc after eliminating empty trains or/and applying limited images.
seq_dc = seq_dc.select(
instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])
raw_data = seq_dc[instrument_src, "data.image"].ndarray().astype(np.float32)
to_store_arrays = {"image": raw_data}
# TODO: move the parts for reading data to plot to later cells.
if seq_n == 0:
raw_plt = raw_data.copy() # plot first sequence only
step_timer.start()
# Allocating shared arrays for data arrays for each correction stage.
data = context.alloc(shape=data_shape, dtype=np.float32)
bpix_data = context.alloc(shape=data_shape, dtype=np.uint32)
histCalRaw.fill(raw_data) # filling histogram with raw uncorrected data
# Applying offset correction
context.map(offset_correction, raw_data)
histCalOffsetCor.fill(data) # filling histogram with offset corrected data
if seq_n == 0:
off_data = data.copy() # plot first sequence only
to_store_arrays["pixels"] = data.copy()
to_store_arrays["mask"] = bpix_data
step_timer.done_step(f'offset correction.')
if corr_bools.get('common_mode'):
step_timer.start()
# Applying common mode correction
context.map(common_mode, data)
if seq_n == 0:
cm_data = data.copy() # plot first sequence only
to_store_arrays["pixels_cm"] = data.copy()
histCalCommonModeCor.fill(data) # filling histogram with common mode corrected data
step_timer.done_step(f'common-mode correction.')
if corr_bools.get('relgain'):
step_timer.start()
# Applying gain correction
context.map(gain_correction, data)
if seq_n == 0:
rg_data = data.copy() # plot first sequence only
# TODO: Why storing a repeated constant for each image in corrected files.
to_store_arrays["gain"] = np.repeat(relativegain[np.newaxis, ...], corr_ntrains, axis=0).astype(np.float32) # noqa
histCalGainCor.fill(data) # filling histogram with gain corrected data
step_timer.done_step(f'gain correction.')
if corr_bools.get('pattern_class'):
step_timer.start()
ptrn_data = context.alloc(shape=data_shape, dtype=np.int32)
filtered_data = context.alloc(shape=data_shape, dtype=np.int32)
# Applying pattern classification correction
# Even though data is indeed of dtype np.float32,
# not specifying this again screw with the data quality.
context.map(pattern_classification_correction, data.astype(np.float32))
if seq_n == 0:
cls_data = data.copy() # plot first sequence only
# split event corrected images plotted for first sequence only
# (also these events are only singles events):
to_store_arrays["pixels_classified"] = data.copy()
to_store_arrays["patterns"] = ptrn_data
histCalPcorr.fill(data) # filling histogram with split events corrected data
# filling histogram with corr data after discarding doubles, triples, quadruple, clusters, and first singles
histCalPcorrS.fill(filtered_data)
step_timer.done_step(f'pattern classification correction.')
step_timer.start()
# Storing corrected data sources.
write_datasets(
seq_dc=seq_dc,
corr_arrays=to_store_arrays,
out_file=out_file,
instrument_src=instrument_src,
)
step_timer.done_step(f'Storing data.')
# Exit and raise warning if there are no data to correct for all sequences.
if empty_seq == len(seq_files):
warning("No valid trains for RAW data to correct.")
sys.exit(0)
```
%% Cell type:code id: tags:
``` python
print("In addition to offset correction, the following corrections were performed:")
for k, v in corr_bools.items():
if v:
print(" -", k.upper())
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
print("In addition to offset correction, the following corrections were performed:")
for k, v in corr_bools.items():
if v:
print(" -", k.upper())
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
# Histograming the resulting spectra:
# First _ refers to the bin edges and second _ refers to statistics and we ignore both.
# if you use histCalRaw.get(cumulatative = True) and so on, the cumulatative = True turns the counts array such as
# RawHistVals and so on into a 1D array instead of keeping the original shape:
RawHistVals, _, RawHistMids, _ = histCalRaw.get()
off_cor_HistVals, _, off_cor_HistMids, _ = histCalOffsetCor.get()
if corr_bools.get('common_mode'):
cm_cor_HistVals, _, cm_HistMids, _ = histCalCommonModeCor.get()
if corr_bools.get('relgain'):
gain_cor_HistVals, _, gain_cor_HistMids, _ = histCalGainCor.get()
if corr_bools.get('pattern_class'):
split_HistVals, _, split_HistMids, _ = histCalPcorr.get() # split events corrected
singles_HistVals, _, singles_HistMids, _ = histCalPcorrS.get() # last s in variable names: singles events
```
%% Cell type:code id: tags:
``` python
# Saving intermediate data to disk:
step_timer.start()
np.savez(os.path.join(out_folder, 'Raw_Events.npz'), RawHistMids, RawHistVals)
np.savez(os.path.join(out_folder, 'Offset_Corrected_Events.npz'), off_cor_HistMids, off_cor_HistVals)
if corr_bools.get('common_mode'):
np.savez(os.path.join(out_folder, 'Common_Mode_Corrected_Events.npz'), cm_HistMids, cm_cor_HistVals)
if corr_bools.get('relgain'):
np.savez(os.path.join(out_folder, 'Gain_Corrected_Events.npz'), gain_cor_HistMids, gain_cor_HistVals)
if corr_bools.get('pattern_class'):
np.savez(os.path.join(out_folder, 'Split_Events_Corrected_Events.npz'), split_HistMids, split_HistVals)
np.savez(os.path.join(out_folder, 'Singles_Events.npz'), singles_HistMids, singles_HistVals)
step_timer.done_step(f'Saving intermediate data to disk.')
print("Various spectra are saved to disk in the form of histograms. Please check {}".format(out_folder))
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Raw vs. Corrected Spectra'))
step_timer.start()
figure = [{'x': RawHistMids,
'y': RawHistVals,
'y_err': np.sqrt(RawHistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Uncorrected'
},
{'x': off_cor_HistMids,
'y': off_cor_HistVals,
'y_err': np.sqrt(off_cor_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Offset Corrected'
}]
if corr_bools.get('common_mode'):
figure.append({'x': cm_HistMids,
'y': cm_cor_HistVals,
'y_err': np.sqrt(cm_cor_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Common Mode Corrected'})
if corr_bools.get('relgain'):
xrange = bin_range
figure.append({'x': gain_cor_HistMids,
'y': gain_cor_HistVals,
'y_err': np.sqrt(gain_cor_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Gain Corrected'})
if corr_bools.get('pattern_class'):
figure.extend([{'x': split_HistMids,
'y': split_HistVals,
'y_err': np.sqrt(split_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Split Events Corrected'
},
{'x': singles_HistMids,
'y': singles_HistVals,
'y_err': np.sqrt(singles_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Singles Events'
}])
fig = xana.simplePlot(figure, aspect=1, x_label='ADU', y_label='Number of Occurrences', figsize='2col',
y_log=True, x_range=bin_range, title = '1 ADU per bin is used.',
legend='top-right-frame-1col')
step_timer.done_step('Plotting')
```
%% Cell type:code id: tags:
``` python
# This function plots pattern statistics:
def classification_plot(patternStats, hemisphere):
print("****************** {} HEMISPHERE ******************\n"
.format(hemisphere))
fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(4, 4, 1)
sfields = ["singles", "first singles", "clusters"]
mfields = ["doubles", "triples", "quads"]
relativeOccurances = []
labels = []
for i, f in enumerate(sfields):
relativeOccurances.append(patternStats[f])
labels.append(f)
for i, f in enumerate(mfields):
for k in range(len(patternStats[f])):
relativeOccurances.append(patternStats[f][k])
labels.append("{}({})".format(f, k))
relativeOccurances = np.array(relativeOccurances, np.float)
relativeOccurances /= np.sum(relativeOccurances)
pie = ax.pie(relativeOccurances, labels=labels, autopct='%1.1f%%', shadow=True)
ax.set_title("Pattern Occurrence")
# Set aspect ratio to be equal so that pie is drawn as a circle.
a = ax.axis('equal')
smaps = ["singlemap", "firstsinglemap", "clustermap"]
for i, m in enumerate(smaps):
ax = fig.add_subplot(4, 4, 2+i)
pmap = ax.imshow(patternStats[m], interpolation="nearest", vmax=2*np.nanmedian(patternStats[m]))
ax.set_title(m)
cb = fig.colorbar(pmap)
mmaps = ["doublemap", "triplemap", "quadmap"]
k = 0
for i, m in enumerate(mmaps):
for j in range(4):
ax = fig.add_subplot(4, 4, 2+len(smaps)+k)
pmap = ax.imshow(patternStats[m][j], interpolation="nearest", vmax=2*np.median(patternStats[m][j]))
ax.set_title("{}({})".format(m,j))
cb = fig.colorbar(pmap)
k+=1
```
%% Cell type:code id: tags:
``` python
# The next two cells plot the classification results for left and right hemispheres, respectively:
display(Markdown('### Classification Results - Plots'))
if corr_bools.get('pattern_class'):
patternStatsLH = patternClassifierLH.getPatternStats()
classification_plot(patternStatsLH, 'Left')
```
%% Cell type:code id: tags:
``` python
if corr_bools.get('pattern_class'):
patternStatsRH = patternClassifierRH.getPatternStats()
classification_plot(patternStatsRH, 'Right')
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Classification Results - Tabulated Statistics'))
if corr_bools.get('pattern_class'):
step_timer.start()
t0 = PrettyTable()
t0.title = "Total Number of Counts after All Corrections"
t0.field_names = ["Hemisphere", "Singles", "First-Singles", "Clusters"]
t0.add_row(["LH", patternStatsLH['singles'], patternStatsLH['first singles'], patternStatsLH['clusters']])
t0.add_row(["RH", patternStatsRH['singles'], patternStatsRH['first singles'], patternStatsRH['clusters']])
print(t0)
print("Abbreviations: D (Doubles), T (Triples), Q (Quadruples), L (Left), R (Right), and H (Hemisphere).")
t1 = PrettyTable()
t1.field_names = ["Index", "D-LH", "D-RH", "T-LH", "T-RH", "Q-LH", "Q-RH"]
t1.add_row([0, patternStatsLH['doubles'][0], patternStatsRH['doubles'][0], patternStatsLH['triples'][0],
patternStatsRH['triples'][0], patternStatsLH['quads'][0], patternStatsRH['quads'][0]])
t1.add_row([1, patternStatsLH['doubles'][1], patternStatsRH['doubles'][1], patternStatsLH['triples'][1],
patternStatsRH['triples'][1], patternStatsLH['quads'][1], patternStatsRH['quads'][1]])
t1.add_row([2, patternStatsLH['doubles'][2], patternStatsRH['doubles'][2], patternStatsLH['triples'][2],
patternStatsRH['triples'][2], patternStatsLH['quads'][2], patternStatsRH['quads'][2]])
t1.add_row([3, patternStatsLH['doubles'][3], patternStatsRH['doubles'][3], patternStatsLH['triples'][3],
patternStatsRH['triples'][3], patternStatsLH['quads'][3], patternStatsRH['quads'][3]])
print(t1)
step_timer.done_step('Classification Results - Tabulated Statistics')
```
%% Cell type:code id: tags:
``` python
if corr_bools.get('pattern_class'):
doublesLH = patternStatsLH['doubles'][0] + patternStatsLH['doubles'][1] + patternStatsLH['doubles'][2] + \
patternStatsLH['doubles'][3]
triplesLH = patternStatsLH['triples'][0] + patternStatsLH['triples'][1] + patternStatsLH['triples'][2] + \
patternStatsLH['triples'][3]
quadsLH = patternStatsLH['quads'][0] + patternStatsLH['quads'][1] + patternStatsLH['quads'][2] + \
patternStatsLH['quads'][3]
allsinglesLH = patternStatsLH['singles'] + patternStatsLH['first singles']
eventsLH = allsinglesLH + doublesLH + triplesLH + quadsLH
doublesRH = patternStatsRH['doubles'][0] + patternStatsRH['doubles'][1] + patternStatsRH['doubles'][2] + \
patternStatsRH['doubles'][3]
triplesRH = patternStatsRH['triples'][0] + patternStatsRH['triples'][1] + patternStatsRH['triples'][2] + \
patternStatsRH['triples'][3]
quadsRH = patternStatsRH['quads'][0] + patternStatsRH['quads'][1] + patternStatsRH['quads'][2] + \
patternStatsRH['quads'][3]
allsinglesRH = patternStatsRH['singles'] + patternStatsRH['first singles']
eventsRH = allsinglesRH + doublesRH + triplesRH + quadsRH
if eventsLH > 0.:
reloccurLH = np.array([allsinglesLH/eventsLH, doublesLH/eventsLH, triplesLH/eventsLH, quadsLH/eventsLH])
else:
reloccurLH = np.array([0]*4)
if eventsRH > 0.:
reloccurRH = np.array([allsinglesRH/eventsRH, doublesRH/eventsRH, triplesRH/eventsRH, quadsRH/eventsRH])
else:
reloccurRH = np.array([0]*4)
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Classification Results - Pie Charts'))
if corr_bools.get('pattern_class'):
step_timer.start()
fig = plt.figure(figsize=(12, 7))
ax = fig.add_subplot(1, 2, 1)
labels = ['Singles', 'Doubles', 'Triples', 'Quads']
pie = ax.pie(reloccurLH, labels=labels, autopct='%1.1f%%', shadow=True)
ax.set_title("Pattern Occurrence in LH")
# Set aspect ratio to be equal so that pie is drawn as a circle.
a = ax.axis('equal')
ax = fig.add_subplot(1, 2, 2)
pie = ax.pie(reloccurRH, labels=labels, autopct='%1.1f%%', shadow=True)
ax.set_title("Pattern Occurrence in RH")
# Set aspect ratio to be equal so that pie is drawn as a circle.
a = ax.axis('equal')
step_timer.done_step('Classification Results - Pie Charts')
```
%% Cell type:markdown id: tags:
### Various Images Averaged Over All Frames of Only the First Sequence ###
%% Cell type:code id: tags:
``` python
step_timer.start()
uncor_mean_im = np.nanmean(raw_data, axis=0)
offset_mean_im = np.nanmean(off_data, axis=0)
if corr_bools.get('common_mode'):
cm_mean_im = np.nanmean(cm_data, axis=0)
if corr_bools.get('relgain'):
gain_mean_im = np.nanmean(rg_data, axis=0)
if corr_bools.get('pattern_class'):
mean_im_cc = np.nanmean(cls_data, axis=0)
fig = xana.heatmapPlot(uncor_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Uncorrected Image Averaged over Frames in the First Sequence')
fig = xana.heatmapPlot(offset_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Offset Corrected Image Averaged over Frames in the First Sequence')
if corr_bools.get('common_mode'):
fig = xana.heatmapPlot(cm_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Common Mode Corrected Image Averaged over Frames in the First Sequence')
if corr_bools.get('relgain'):
fig = xana.heatmapPlot(gain_mean_im, x_label='Columns', y_label='Rows',
lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Gain Corrected Image Averaged over Frames in the First Sequence')
if corr_bools.get('pattern_class'):
fig = xana.heatmapPlot(mean_im_cc, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0, vmax= 18000,
title = 'Image of Single Events Averaged over Frames in the First Sequence')
step_timer.done_step("Plotting")
```
%% Cell type:markdown id: tags:
### Images of the First Frame of the First Sequence ###
%% Cell type:code id: tags:
``` python
step_timer.start()
fig = xana.heatmapPlot(raw_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Uncorrected Image (First Frame of the First Sequence)')
fig = xana.heatmapPlot(off_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Offset Corrected Image (First Frame of the First Sequence)')
if corr_bools.get('common_mode'):
fig = xana.heatmapPlot(cm_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Common Mode Corrected Image (First Frame of the First Sequence)')
if corr_bools.get('relgain'):
fig = xana.heatmapPlot(rg_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Gain Corrected Image (First Frame of the First Sequence)')
if corr_bools.get('pattern_class'):
fig = xana.heatmapPlot(cls_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Image of Single Events (First Frame of the First Sequence)')
step_timer.done_step("Plotting")
```
%% Cell type:code id: tags:
``` python
# Resetting the histogram calculators:
histCalRaw.reset()
histCalOffsetCor.reset()
if corr_bools.get('common_mode'):
histCalCommonModeCor.reset()
if corr_bools.get('relgain'):
histCalGainCor.reset()
if corr_bools.get('pattern_class'):
histCalPcorr.reset()
histCalPcorrS.reset()
```
%% Cell type:markdown id: tags:
Next, the corrected event patterns are read from the patterns/dataset created previously and are separated into 4 different categories (singles, doubles, triples and quadruples) using the pattern indices. However, this is done only for one sequence, corresponding to the seq_num variable, as an example.
Note that the number of bins and the bin range for the following histograms may be different from those presented above (depending on gain) to make the counts more noticible and the peaks more defined.
If you are interested in plotting the events from all sequences or the spectra of half of the sensor, execute the spectra_pnCCD_NBC.ipynb notebook.
%% Cell type:code id: tags:
``` python
if corr_bools.get('pattern_class'):
singles = []
doubles = []
triples = []
quads = []
with H5File(f"{out_folder}/{seq_files[0].name.replace('RAW', 'CORR')}") as dc: # noqa
data = dc[instrument_src, "data.pixels_classified"].ndarray()
patterns = dc[instrument_src, "data.patterns"].ndarray()
# events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),
# 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns,
# there are left, right, up, and down indices.
# Separating the events:
# Singles and First Singles:
for s in range(100, 102):
single = data.copy()
single[patterns != s] = np.nan
singles.append(single)
for d in range(200, 204):
double = data.copy()
double[patterns != d] = np.nan
doubles.append(double)
for t in range(300, 304):
triple = data.copy()
triple[patterns != t] = np.nan
triples.append(triple)
for q in range(400, 404):
quad = data.copy()
quad[patterns != q] = np.nan
quads.append(quad)
```
%% Cell type:code id: tags:
``` python
if corr_bools.get('pattern_class'):
step_timer.start()
hA = 0
h = 0
for single in singles:
hs, e = np.histogram(single.flatten(), bins=event_bins, range=b_range) # h: histogram counts, e: bin edges
h += hs
hA += hs # hA: counts all events (see below)
# bin edges array has one extra element => need to plot from 0 to the one before the last element to have the
# same size as h-array => in what follows, we use e[:-1] (-1 means one before the last element)
display(Markdown('### Histograms of Corrected Events for One Sequence Only'))
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111)
ax.step(e[:-1], h, color='blue', label='Events Involving Single Pixels Only')
ax.semilogy() # y-axis is log, x-axis is linear
ax.set_xlabel("Energy (ADU) [{} bins per {} ADU]".format(event_bins, b_range[1]-b_range[0]))
ax.set_ylabel("Corrected Events for One Sequence (counts)")
ax.set_xlim(b_range)
h = 0
for double in doubles:
hd, e = np.histogram(double.flatten(), bins=event_bins, range=b_range)
h += hd
hA += hd
ax.step(e[:-1], h, color='red', label='Events Splitting on Double Pixels')
h = 0
for triple in triples:
ht, e = np.histogram(triple.flatten(), bins=event_bins, range=b_range)
h += ht
hA += ht
ax.step(e[:-1], h, color='green', label='Events Splitting on Triple Pixels')
h = 0
for quad in quads:
hq, e = np.histogram(quad.flatten(), bins=event_bins, range=b_range)
h += hq
hA += hq
ax.step(e[:-1], h, color='purple', label='Events Splitting on Quadruple Pixels')
ax.step(e[:-1], hA, color='grey', label='All Valid Events')
l = ax.legend()
step_timer.done_step("Plotting")
```
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
......
......@@ -108,7 +108,7 @@ install_requires = [
if "readthedocs.org" not in sys.executable:
install_requires += [
"iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.4.0", # noqa
"iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.4.1", # noqa
"XFELDetectorAnalysis @ git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git@2.7.0", # noqa
"CalParrot @ git+ssh://git@git.xfel.eu:10022/calibration/calparrot.git@0.1", # noqa
]
......
......@@ -122,7 +122,7 @@ class CalCatApi(metaclass=ClientWrapper):
return {
"parameters_conditions_attributes": [
{"parameter_id": self.parameter_id(k), "value": str(v)}
{"parameter_name": k, "value": str(v)}
for k, v in condition.items()
]
}
......@@ -1001,8 +1001,18 @@ class LPD_CalibrationData(SplitConditionCalibrationData):
"Pixels X",
"Pixels Y",
"Feedback capacitor",
"Memory cell order",
]
illuminated_parameters = [
"Sensor Bias Voltage",
"Memory cells",
"Pixels X",
"Pixels Y",
"Feedback capacitor",
"Source Energy",
"category"
]
illuminated_parameters = dark_parameters + ["Source Energy", "category"]
def __init__(
self,
......@@ -1013,6 +1023,7 @@ class LPD_CalibrationData(SplitConditionCalibrationData):
pixels_x=256,
pixels_y=256,
source_energy=9.2,
memory_cell_order=None,
category=1,
modules=None,
client=None,
......@@ -1034,6 +1045,7 @@ class LPD_CalibrationData(SplitConditionCalibrationData):
self.pixels_x = pixels_x
self.pixels_y = pixels_y
self.feedback_capacitor = feedback_capacitor
self.memory_cell_order = memory_cell_order
self.source_energy = source_energy
self.category = category
......@@ -1110,6 +1122,7 @@ class JUNGFRAU_CalibrationData(CalibrationData):
"Integration Time",
"Sensor temperature",
"Gain Setting",
"Gain mode",
]
def __init__(
......
......@@ -772,21 +772,38 @@ class LpdCorrections:
flat_fields)
def get_mem_cell_order(run, sources) -> str:
"""Load the memory cell order to use as a condition to find constants"""
res = set()
def get_mem_cell_pattern(run, sources) -> np.ndarray:
"""Load the memory cell order to use as a condition to find constants
This looks at the first train for each source, issuing a warning if the
pattern differs between sources.
"""
patterns = []
for source in sources:
cell_id_data = run[source, 'image.cellId'].drop_empty_trains()
if len(cell_id_data.train_ids) == 0:
continue # No data for this module
cell_ids = cell_id_data[0].ndarray()
# Trailing comma required so e.g. "...,1" doesn't match "...,10"
res.add(",".join([str(c) for c in cell_ids.flatten()]) + ",")
cell_ids = cell_id_data[0].ndarray().flatten()
if not any(np.array_equal(cell_ids, p) for p in patterns):
patterns.append(cell_ids)
if len(res) > 1:
if len(patterns) > 1:
warn("Memory cell order varies between detector modules: "
"; ".join([f"{s[:10]}...{s[-10:]}" for s in res]))
elif not res:
"; ".join([f"{s[:10]}...{s[-10:]}" for s in patterns]))
elif not patterns:
raise ValueError("Couldn't find memory cell order for any modules")
return res.pop()
return patterns[0]
def make_cell_order_condition(use_param, cellid_pattern) -> Optional[str]:
"""Convert the cell ID array to a condition string, or None if not used"""
if use_param == 'auto':
# auto -> use cell order if it wraps around (cells not filled monotonically)
use = len(cellid_pattern) > 2 and (
np.diff(cellid_pattern.astype(np.int32)) < 0
).any()
else:
use = (use_param == 'always')
return (",".join([str(c) for c in cellid_pattern]) + ",") if use else None
......@@ -447,7 +447,11 @@ def init_jungfrau_geom(
(-1, -1) for _ in range(4)] + [(1, 1) for _ in range(4)]
elif "JF1M" in karabo_id:
nmods = 2
expected_modules = [f"JNGFR{i:02d}" for i in range(1, nmods+1)]
st_modno = 1
# TODO: This is a temporary workaround. A proper solution is needed.
if karabo_id == "SPB_CFEL_JF1M":
st_modno = 9
expected_modules = [f"JNGFR{i:02d}" for i in range(st_modno, st_modno+nmods)]
module_pos = ((-mod_width//2, 33), (-mod_width//2, -mod_height-33))
orientations = [(-1,-1), (1,1)]
else: # e.g. HED_IA1_JF500K1, FXE_XAD_JF500K, FXE_XAD_JFHZ
......
......@@ -1030,3 +1030,17 @@ def write_compressed_frames(
dataset.id.write_direct_chunk(chunk_start, compressed)
return dataset
def reorder_axes(a, from_order, to_order):
"""Rearrange axes of array a from from_order to to_order
This does the same as np.transpose(), but making the before & after axes
more explicit. from_order is a sequence of strings labelling the axes of a,
and to_order is a similar sequence for the axes of the result.
"""
assert len(from_order) == a.ndim
assert sorted(from_order) == sorted(to_order)
from_order = list(from_order)
order = tuple([from_order.index(lbl) for lbl in to_order])
return a.transpose(order)
......@@ -351,7 +351,9 @@ def prepare_job(
if title_cell is not None:
title_cell.source = ''
set_figure_format(new_nb, args["vector_figs"])
new_name = f"{nb_path.stem}__{cparm}__{suffix}.ipynb"
# In some cases a suffix for example can have `/`. e.g. LPDMini and GH2
# have karabo_da with a `/`.
new_name = f"{nb_path.stem}__{cparm}__{suffix}.ipynb".replace("/", "-")
nbformat.write(new_nb, cal_work_dir / new_name)
......
......@@ -76,8 +76,6 @@ notebooks = {
"cluster cores": 8},
},
"CORRECT": {
"pre_notebooks": [
"notebooks/LPD/LPD_retrieve_constants_precorrection.ipynb"],
"notebook": "notebooks/LPD/LPD_Correct_Fast.ipynb",
"concurrency": {"parameter": "sequences",
"default concurrency": [-1],
......@@ -110,6 +108,12 @@ notebooks = {
"use function": "balance_sequences",
"cluster cores": 16},
},
"INJECT_CONSTANTS": {
"notebook": "notebooks/LPDMini/LPD_Mini_Inject_calibration_constants_from_h5files.ipynb",
"concurrency": {"parameter": None,
"default concurrency": None,
"cluster cores": 1},
}
},
"PNCCD": {
"DARK": {
......@@ -284,6 +288,17 @@ notebooks = {
},
},
},
"TIMEPIX": {
"CORRECT": {
"notebook": "notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb",
"concurrency": {
"parameter": None,
"use function": None,
"default concurrency": None,
"cluster cores": 1
},
},
},
"TEST": {
"TEST-CLI": {
"notebook": "notebooks/test/test-cli.ipynb",
......
......@@ -22,6 +22,7 @@ from cal_tools.tools import (
recursive_update,
send_to_db,
write_constants_fragment,
reorder_axes,
)
# AGIPD operating conditions.
......@@ -614,3 +615,13 @@ def test_write_constants_fragment(tmp_path: Path):
},
}
}
def test_reorder_axes():
a = np.zeros((10, 32, 256, 3))
from_order = ('cells', 'slow_scan', 'fast_scan', 'gain')
to_order = ('slow_scan', 'fast_scan', 'cells', 'gain')
assert reorder_axes(a, from_order, to_order).shape == (32, 256, 10, 3)
to_order = ('gain', 'fast_scan', 'slow_scan', 'cells')
assert reorder_axes(a, from_order, to_order).shape == (3, 256, 32, 10)
......@@ -70,6 +70,7 @@ def test_main(capsys):
"--rel-gain", "true",
"--webservice-address", "inproc://socket",
"--correct",
"--verbose"
],
):
with patch("zmq.Context", return_value=context):
......
......@@ -23,6 +23,11 @@ except ImportError:
log = logging.getLogger(__name__)
STATES_FINISHED = { # https://slurm.schedmd.com/squeue.html#lbAG
'BOOT_FAIL', 'CANCELLED', 'COMPLETED', 'DEADLINE', 'FAILED',
'OUT_OF_MEMORY', 'SPECIAL_EXIT', 'TIMEOUT',
}
class NoOpProducer:
"""Fills in for Kafka producer object when setting that up fails"""
......@@ -50,10 +55,10 @@ def slurm_status(filter_user=True):
:return: a dictionary indexed by slurm jobid and containing a tuple
of (status, run time) as values.
"""
cmd = ["squeue"]
cmd = ["squeue", "--states=all"]
if filter_user:
cmd += ["--me"]
res = run(cmd, stdout=PIPE)
res = run(cmd, stdout=PIPE, stderr=PIPE)
if res.returncode == 0:
rlines = res.stdout.decode().split("\n")
statii = {}
......@@ -65,6 +70,10 @@ def slurm_status(filter_user=True):
except ValueError: # not enough values to unpack in split
pass
return statii
else:
log.error("Running squeue failed. stdout: %r, stderr: %r",
res.stdout.decode(), res.stderr.decode())
return None
def slurm_job_status(jobid):
......@@ -148,15 +157,19 @@ class JobsMonitor:
Newly completed executions are present with an empty list.
"""
jobs_to_check = self.job_db.execute(
"SELECT job_id, exec_id FROM slurm_jobs WHERE finished = 0"
).fetchall()
if not jobs_to_check:
log.debug("No unfinished jobs to check")
return {}
statii = slurm_status()
# Check that slurm is giving proper feedback
if statii is None:
return {}
log.debug(f"SLURM info {statii}")
jobs_to_check = self.job_db.execute(
"SELECT job_id, exec_id FROM slurm_jobs WHERE finished = 0"
).fetchall()
ongoing_jobs_by_exn = {}
updates = []
for r in jobs_to_check:
......@@ -166,13 +179,13 @@ class JobsMonitor:
if str(r['job_id']) in statii:
# statii contains jobs which are still going (from squeue)
slstatus, runtime = statii[str(r['job_id'])]
finished = False
execn_ongoing_jobs.append(f"{slstatus}-{runtime}")
else:
# These jobs have finished (successfully or otherwise)
_, runtime, slstatus = slurm_job_status(r['job_id'])
finished = True
finished = slstatus in STATES_FINISHED
updates.append((finished, runtime, slstatus, r['job_id']))
......