Skip to content
Snippets Groups Projects
Commit ef0a008a authored by David Hammer's avatar David Hammer
Browse files

Flake8 and related fixes

- trailing whitespace
- unused variables
- unused imports
- misc formatting
parent 0284bae9
No related branches found
No related tags found
2 merge requests!451Feat/agipd fixed gain correction,!438Feat/agipd add fixed gain mode to dark notebook
%% Cell type:markdown id: tags:
# AGIPD Offline Correction #
Author: European XFEL Detector Group, Version: 2.0
Offline Calibration for the AGIPD Detector
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/HED/202031/p900174/raw" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/hibef_agipd2" # the folder to output to, required
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
modules = [-1] # modules to correct, set to -1 for all, range allowed
run = 155 # runs to process, required
karabo_id = "HED_DET_AGIPD500K2G" # karabo karabo_id
karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators
receiver_id = "{}CH0" # inset for receiver devices
path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images
h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images
h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP' # path to control information
karabo_id_control = "HED_EXP_AGIPD500K2G" # karabo-id for control device
karabo_da_control = 'AGIPD500K2G00' # karabo DA for control infromation
slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
use_dir_creation_date = True # use the creation data of the input dir for database queries
cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
cal_db_timeout = 30000 # in milli seconds
creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
max_cells = 0 # number of memory cells used, set to 0 to automatically infer
bias_voltage = 300 # Bias voltage
acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
photon_energy = 9.2 # photon energy in keV
overwrite = True # set to True if existing data should be overwritten
max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements.
mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries
cell_id_preview = 1 # cell Id used for preview in single-shot plots
# Correction parameters
blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted
cm_dark_fraction = 0.66 # threshold for fraction of empty pixels to consider module enough dark to perform CM correction
cm_dark_range = [-50.,30] # range for signal value ADU for pixel to be consider as a dark pixel
cm_n_itr = 4 # number of iterations for common mode correction
hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain
mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain
noisy_adc_threshold = 0.25 # threshold to mask complete adc
ff_gain = 7.2 # conversion gain for absolute FlatField constants, while applying xray_gain
# Correction Booleans
only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
rel_gain = False # do relative gain correction based on PC data
xray_gain = False # do relative gain correction based on xray data
blc_noise = False # if set, baseline correction via noise peak location is attempted
blc_stripes = False # if set, baseline corrected via stripes
blc_hmatch = False # if set, base line correction via histogram matching is attempted
match_asics = False # if set, inner ASIC borders are matched to the same signal level
adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
zero_nans = False # set NaN values in corrected data to 0
zero_orange = False # set to 0 very negative and very large values in corrected data
blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr
corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted
force_hg_if_below = False # set high gain if mg offset subtracted value is below hg_hard_threshold
force_mg_if_below = False # set medium gain if mg offset subtracted value is below mg_hard_threshold
mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold
common_mode = False # Common mode correction
melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels
mask_zero_std = False # Mask pixels with zero standard deviation across train
low_medium_gap = False # 5 sigma separation in thresholding between low and medium gain
# Paralellization parameters
chunk_size = 1000 # Size of chunk for image-weise correction
chunk_size_idim = 1 # chunking size of imaging dimension, adjust if user software is sensitive to this.
n_cores_correct = 16 # Number of chunks to be processed in parallel
n_cores_files = 4 # Number of files to be processed in parallel
sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
import copy
import gc
import itertools
import math
import re
import traceback
import warnings
from datetime import timedelta
from multiprocessing import Pool
from pathlib import Path
from time import perf_counter, sleep, time
from time import perf_counter
import tabulate
from dateutil import parser
from IPython.display import HTML, Latex, Markdown, display
from IPython.display import Latex, Markdown, display
warnings.filterwarnings('ignore')
import matplotlib
import matplotlib.pyplot as plt
import yaml
from extra_data import RunDirectory, stack_detector_data
from extra_geom import AGIPD_1MGeometry, AGIPD_500K2GGeometry
from iCalibrationDB import Detectors
from matplotlib import cm as colormap
from matplotlib.colors import LogNorm
from matplotlib.ticker import FormatStrFormatter, LinearLocator
from mpl_toolkits.mplot3d import Axes3D
matplotlib.use("agg")
%matplotlib inline
import numpy as np
import seaborn as sns
sns.set()
sns.set_context("paper", font_scale=1.4)
sns.set_style("ticks")
import seaborn as sns
from cal_tools.agipdlib import (AgipdCorrections, get_acq_rate, get_gain_mode,
get_gain_setting, get_num_cells)
from cal_tools.ana_tools import get_range
from cal_tools.cython import agipdalgs as calgs
from cal_tools.enums import AgipdGainMode, BadPixels
from cal_tools.step_timing import StepTimer
from cal_tools.tools import (CalibrationMetadata, get_dir_creation_date,
map_modules_from_folder, module_index_to_qm)
from cal_tools.tools import (get_dir_creation_date, map_modules_from_folder, module_index_to_qm)
sns.set()
sns.set_context("paper", font_scale=1.4)
sns.set_style("ticks")
```
%% Cell type:code id: tags:
``` python
in_folder = Path(in_folder)
out_folder = Path(out_folder)
```
%% Cell type:markdown id: tags:
## Evaluated parameters ##
%% Cell type:code id: tags:
``` python
# Fill dictionaries comprising bools and arguments for correction and data analysis
# Here the herarichy and dependability for correction booleans are defined
corr_bools = {}
# offset is at the bottom of AGIPD correction pyramid.
corr_bools["only_offset"] = only_offset
# Dont apply any corrections if only_offset is requested
if not only_offset:
corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
corr_bools["rel_gain"] = rel_gain
corr_bools["xray_corr"] = xray_gain
corr_bools["blc_noise"] = blc_noise
corr_bools["blc_stripes"] = blc_stripes
corr_bools["blc_hmatch"] = blc_hmatch
corr_bools["blc_set_min"] = blc_set_min
corr_bools["match_asics"] = match_asics
corr_bools["corr_asic_diag"] = corr_asic_diag
corr_bools["zero_nans"] = zero_nans
corr_bools["zero_orange"] = zero_orange
corr_bools["mask_noisy_adc"] = mask_noisy_adc
corr_bools["force_hg_if_below"] = force_hg_if_below
corr_bools["force_mg_if_below"] = force_mg_if_below
corr_bools["common_mode"] = common_mode
corr_bools["melt_snow"] = melt_snow
corr_bools["mask_zero_std"] = mask_zero_std
corr_bools["low_medium_gap"] = low_medium_gap
```
%% Cell type:code id: tags:
``` python
if sequences[0] == -1:
sequences = None
control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
h5path_ctrl = h5path_ctrl.format(karabo_id_control)
h5path = h5path.format(karabo_id, receiver_id)
h5path_idx = h5path_idx.format(karabo_id, receiver_id)
print(f'Path to control file {control_fn}')
```
%% Cell type:code id: tags:
``` python
# Create output folder
out_folder.mkdir(parents=True, exist_ok=True)
# Evaluate detector instance for mapping
instrument = karabo_id.split("_")[0]
if instrument == "SPB":
dinstance = "AGIPD1M1"
nmods = 16
elif instrument == "MID":
dinstance = "AGIPD1M2"
nmods = 16
elif instrument == "HED":
dinstance = "AGIPD500K"
nmods = 8
# Evaluate requested modules
if karabo_da[0] == '-1':
if modules[0] == -1:
modules = list(range(nmods))
karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
else:
modules = [int(x[-2:]) for x in karabo_da]
print("Process modules: ", ', '.join(
[module_index_to_qm(x) for x in modules]))
print(f"Detector in use is {karabo_id}")
print(f"Instrument {instrument}")
print(f"Detector instance {dinstance}")
```
%% Cell type:code id: tags:
``` python
# Display Information about the selected pulses indices for correction.
pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses
try:
if len(pulses_lst) > 1:
print("A range of {} pulse indices is selected: from {} to {} with a step of {}"
.format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),
pulses_lst[1] - pulses_lst[0]))
else:
print("one pulse is selected: a pulse of idx {}".format(pulses_lst[0]))
print(f"one pulse is selected: a pulse of idx {pulses_lst[0]}")
except Exception as e:
raise ValueError('max_pulses input Error: {}'.format(e))
raise ValueError(f"max_pulses input Error: {e}")
```
%% Cell type:code id: tags:
``` python
# set everything up filewise
mmf = map_modules_from_folder(str(in_folder), run, path_template,
karabo_da, sequences)
mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf
file_list = []
# ToDo: Split table over pages
print(f"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}")
table = []
ti = 0
for k, files in mapped_files.items():
i = 0
for f in list(files.queue):
file_list.append(f)
if i == 0:
table.append((ti, k, i, f))
else:
table.append((ti, "", i, f))
i += 1
ti += 1
md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
headers=["#", "module", "# module", "file"])))
file_list = sorted(file_list, key=lambda name: name[-10:])
```
%% Cell type:code id: tags:
``` python
filename = file_list[0]
channel = int(re.findall(r".*-AGIPD([0-9]+)-.*", filename)[0])
# Evaluate number of memory cells
mem_cells = get_num_cells(filename, karabo_id, channel)
if mem_cells is None:
raise ValueError(f"No raw images found in {filename}")
mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db
max_cells = mem_cells if max_cells == 0 else max_cells
# Evaluate aquisition rate
if acq_rate == 0:
acq_rate = get_acq_rate((filename, karabo_id, channel))
print(f"Maximum memory cells to calibrate: {max_cells}")
```
%% Cell type:code id: tags:
``` python
# Evaluate creation time
creation_time = None
if use_dir_creation_date:
creation_time = get_dir_creation_date(str(in_folder), run)
offset = parser.parse(creation_date_offset)
delta = timedelta(hours=offset.hour,
minutes=offset.minute, seconds=offset.second)
creation_time += delta
# Evaluate gain setting
if gain_setting == 0.1:
if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
print("Set gain-setting to None for runs taken before 2020-01-31")
gain_setting = None
else:
try:
gain_setting = get_gain_setting(str(control_fn), h5path_ctrl)
except Exception as e:
print(f'ERROR: while reading gain setting from: \n{control_fn}')
print(e)
print("Set gain setting to 0")
gain_setting = 0
# Evaluate gain mode (operation mode)
gain_mode = get_gain_mode(control_fn, h5path_ctrl)
```
%% Cell type:code id: tags:
``` python
print(f"Using {creation_time} as creation time")
print("Operating conditions are:")
print(f"• Bias voltage: {bias_voltage}")
print(f"• Memory cells: {mem_cells_db}")
print(f"• Acquisition rate: {acq_rate}")
print(f"• Gain setting: {gain_setting}")
print(f"• Gain mode: {gain_mode.name}")
print(f"• Photon Energy: {photon_energy}")
```
%% Cell type:markdown id: tags:
## Data processing ##
%% Cell type:code id: tags:
``` python
agipd_corr = AgipdCorrections(max_cells, max_pulses,
h5_data_path=h5path,
h5_index_path=h5path_idx,
corr_bools=corr_bools)
agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold
agipd_corr.hg_hard_threshold = hg_hard_threshold
agipd_corr.mg_hard_threshold = mg_hard_threshold
agipd_corr.cm_dark_min = cm_dark_range[0]
agipd_corr.cm_dark_max = cm_dark_range[1]
agipd_corr.cm_dark_fraction = cm_dark_fraction
agipd_corr.cm_n_itr = cm_n_itr
agipd_corr.noisy_adc_threshold = noisy_adc_threshold
agipd_corr.ff_gain = ff_gain
```
%% Cell type:code id: tags:
``` python
# Retrieve calibration constants to RAM
agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))
metadata = CalibrationMetadata(out_folder)
# NOTE: this notebook will not overwrite calibration metadata file
const_yaml = metadata.get("retrieved-constants", {})
# retrive constants
def retrieve_constants(mod):
"""
Retrieve calibration constants and load them to shared memory
Metadata for constants is taken from yml file or retrieved from the DB
"""
err = ''
# TODO: parallelize over modules
err = ""
k_da = karabo_da[mod]
try:
# check if there is a yaml file in out_folder that has the device constants.
if k_da in const_yaml:
when = agipd_corr.initialize_from_yaml(k_da, const_yaml, mod)
else:
# TODO: should we save what is found here in metadata?
when = agipd_corr.initialize_from_db(karabo_id, k_da, cal_db_interface, creation_time, mem_cells_db, bias_voltage,
photon_energy, gain_setting, acq_rate, mod, False)
when = agipd_corr.initialize_from_db(
karabo_id,
k_da,
cal_db_interface,
creation_time,
mem_cells_db,
bias_voltage,
photon_energy,
gain_setting,
acq_rate,
mod,
False,
)
except Exception as e:
err = f"Error: {e}\nError traceback: {traceback.format_exc()}"
when = None
return err, mod, when, k_da
ts = perf_counter()
with Pool(processes=len(modules)) as pool:
const_out = pool.map(retrieve_constants, modules)
print(f"Constants were loaded in {perf_counter()-ts:.01f}s")
```
%% Cell type:code id: tags:
``` python
# allocate memory for images and hists
n_images_max = max_cells*256
data_shape = (n_images_max, 512, 128)
agipd_corr.allocate_images(data_shape, n_cores_files)
```
%% Cell type:code id: tags:
``` python
def batches(l, batch_size):
"""Group a list into batches of (up to) batch_size elements"""
start = 0
while start < len(l):
yield l[start:start + batch_size]
start += batch_size
```
%% Cell type:code id: tags:
``` python
def imagewise_chunks(img_counts):
"""Break up the loaded data into chunks of up to chunk_size
Yields (file data slot, start index, stop index)
"""
for i_proc, n_img in enumerate(img_counts):
n_chunks = math.ceil(n_img / chunk_size)
for i in range(n_chunks):
yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id: tags:
``` python
with Pool() as pool:
for file_batch in batches(file_list, n_cores_files):
# TODO: Move some printed output to logging or similar
print(f"Processing next {len(file_batch)} files:")
for file_name in file_batch:
print(" ", file_name)
step_timer.start()
img_counts = pool.starmap(agipd_corr.read_file, zip(range(len(file_batch)), file_batch,
[not common_mode]*len(file_batch)))
step_timer.done_step(f'Loading data from files')
if mask_zero_std:
# Evaluate zero-data-std mask
pool.starmap(agipd_corr.mask_zero_std, itertools.product(
range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)
))
step_timer.done_step('Mask 0 std')
# Perform offset image-wise correction
pool.starmap(agipd_corr.offset_correction, imagewise_chunks(img_counts))
step_timer.done_step("Offset correction")
if blc_noise or blc_stripes or blc_hmatch:
# Perform image-wise correction
pool.starmap(agipd_corr.baseline_correction, imagewise_chunks(img_counts))
step_timer.done_step("Base-line shift correction")
if common_mode:
# Perform cross-file correction parallel over asics
pool.starmap(agipd_corr.cm_correction, itertools.product(
range(len(file_batch)), range(16) # 16 ASICs per module
))
step_timer.done_step("Common-mode correction")
img_counts = pool.map(agipd_corr.apply_selected_pulses, range(len(file_batch)))
step_timer.done_step("Applying selected pulses after common mode correction")
# Perform image-wise correction
pool.starmap(agipd_corr.gain_correction, imagewise_chunks(img_counts))
step_timer.done_step("Gain corrections")
# Save corrected data
pool.starmap(agipd_corr.write_file, [
(i_proc, file_name, str(out_folder / Path(file_name).name.replace("RAW", "CORR")))
for i_proc, file_name in enumerate(file_batch)
])
step_timer.done_step("Save")
```
%% Cell type:code id: tags:
``` python
print(f"Correction of {len(file_list)} files is finished")
print(f"Total processing time {step_timer.timespan():.01f} s")
print(f"Timing summary per batch of {n_cores_files} files:")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
# if the yml file contains "retrieved-constants", that means a leading
# notebook got processed and the reporting would be generated from it.
fst_print = True
timestamps = {}
for i, (error, modno, when, k_da) in enumerate(const_out):
qm = module_index_to_qm(modno)
# expose errors while applying correction
if error:
print("Error: {}".format(error) )
if k_da not in const_yaml:
if fst_print:
print("Constants are retrieved with creation time: ")
fst_print = False
module_timestamps = {}
# If correction is crashed
if not error:
print(f"{qm}:")
for key, item in when.items():
if hasattr(item, 'strftime'):
item = item.strftime('%y-%m-%d %H:%M')
when[key] = item
print('{:.<12s}'.format(key), item)
# Store few time stamps if exists
# Add NA to keep array structure
for key in ['Offset', 'SlopesPC', 'SlopesFF']:
if when and key in when and when[key]:
module_timestamps[key] = when[key]
else:
if error is not None:
module_timestamps[key] = "Err"
else:
module_timestamps[key] = "NA"
timestamps[qm] = module_timestamps
seq = sequences[0] if sequences else 0
if timestamps:
with open(f"{out_folder}/retrieved_constants_s{seq}.yml","w") as fd:
yaml.safe_dump({"time-summary": {f"S{seq}": timestamps}}, fd)
```
%% Cell type:code id: tags:
``` python
def do_3d_plot(data, edges, x_axis, y_axis):
fig = plt.figure(figsize=(10, 10))
ax = fig.gca(projection='3d')
# Make data.
X = edges[0][:-1]
Y = edges[1][:-1]
X, Y = np.meshgrid(X, Y)
Z = data.T
# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm,
linewidth=0, antialiased=False)
ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm, linewidth=0, antialiased=False)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_zlabel("Counts")
def do_2d_plot(data, edges, y_axis, x_axis):
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
extent = [np.min(edges[1]), np.max(edges[1]),
np.min(edges[0]), np.max(edges[0])]
im = ax.imshow(data[::-1, :], extent=extent, aspect="auto",
norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
cb = fig.colorbar(im)
cb.set_label("Counts")
```
%% Cell type:code id: tags:
``` python
def get_trains_data(run_folder, source, include, detector_id, tid=None, modules=16, fillvalue=np.nan):
"""Load single train for all module
:param run_folder: Path to folder with data
:param source: Data source to be loaded
:param include: Inset of file name to be considered
:param detector_id: The karabo id of the detector to get data for
:param tid: Train Id to be loaded. First train is considered if None is given
:param path: Path to find image data inside h5 file
"""
run_data = RunDirectory(run_folder, include)
if tid is not None:
tid, data = run_data.select(f'{detector_id}/DET/*', source).train_from_id(tid)
else:
tid, data = next(iter(run_data.select(f'{detector_id}/DET/*', source).trains(require_all=True)))
return tid, stack_detector_data(train=data, data=source, fillvalue=fillvalue, modules=modules)
```
%% Cell type:code id: tags:
``` python
if dinstance == "AGIPD500K":
geom = AGIPD_500K2GGeometry.from_origin()
else:
geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[
(-525, 625),
(-550, -10),
(520, -160),
(542.5, 475),
])
```
%% Cell type:code id: tags:
``` python
include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'
tid, corrected = get_trains_data(out_folder, 'image.data', include, karabo_id, modules=nmods)
_, gains = get_trains_data(out_folder, 'image.gain', include, karabo_id, tid, modules=nmods)
_, mask = get_trains_data(out_folder, 'image.mask', include, karabo_id, tid, modules=nmods)
_, blshift = get_trains_data(out_folder, 'image.blShift', include, karabo_id, tid, modules=nmods)
_, cellId = get_trains_data(out_folder, 'image.cellId', include, karabo_id, tid, modules=nmods)
_, pulseId = get_trains_data(out_folder, 'image.pulseId', include, karabo_id, tid, modules=nmods, fillvalue=0)
_, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, karabo_id, tid, modules=nmods)
```
%% Cell type:code id: tags:
``` python
display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\n'))
```
%% Cell type:markdown id: tags:
### Signal vs. Analogue Gain ###
%% Cell type:code id: tags:
``` python
hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),
raw[:,1,...].flatten().astype(np.float32),
bins=(100, 100),
range=[[4000, 8192], [4000, 8192]])
do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Analogue gain (ADU)")
```
%% Cell type:markdown id: tags:
### Signal vs. Digitized Gain ###
The following plot shows plots signal vs. digitized gain
%% Cell type:code id: tags:
``` python
hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),
gains.flatten().astype(np.float32), bins=(100, 3),
range=[[-50, 8192], [0, 3]])
do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Gain bit value")
```
%% Cell type:code id: tags:
``` python
print(f"Gain statistics in %")
table = [[f'{gains[gains==0].size/gains.size*100:.02f}',
f'{gains[gains==1].size/gains.size*100:.03f}',
f'{gains[gains==2].size/gains.size*100:.03f}']]
md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
headers=["High", "Medium", "Low"])))
```
%% Cell type:markdown id: tags:
### Intensity per Pulse ###
%% Cell type:code id: tags:
``` python
pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]
mean_data = np.nanmean(corrected, axis=(2, 3))
hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
pulseId.flatten().astype(np.float32),
bins=(100, int(pulse_range[1])),
range=[[-50, 1000], pulse_range])
do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),
pulseId.flatten().astype(np.float32),
bins=(100, int(pulse_range[1])),
range=[[-50, 200000], pulse_range])
do_2d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
do_3d_plot(hist, (bins_x, bins_y), "Signal (ADU)", "Pulse id")
```
%% Cell type:markdown id: tags:
### Baseline shift ###
Estimated base-line shift with respect to the total ADU counts of corrected image.
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
h = ax.hist(blshift.flatten(), bins=100, log=True)
_ = plt.xlabel('Baseline shift [ADU]')
_ = plt.ylabel('Counts')
_ = ax.grid()
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(10, 10))
corrected_ave = np.nansum(corrected, axis=(2, 3))
plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)
plt.xlim(-1, 1000)
plt.grid()
plt.xlabel('Illuminated corrected [MADU] ')
_ = plt.ylabel('Estimated baseline shift [ADU]')
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Raw preview ###\n'))
display(Markdown(f'Mean over images of the RAW data\n'))
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
data = np.mean(raw[:, 0, ...], axis=0)
vmin, vmax = get_range(data, 5)
ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
```
%% Cell type:code id: tags:
``` python
display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \n'))
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)
ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Corrected preview ###\n'))
display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \n'))
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
vmin, vmax = get_range(corrected[cell_id_preview], 7, -50)
vmin = - 50
ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap="jet", vmin=vmin, vmax=vmax)
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
vmin, vmax = get_range(corrected[cell_id_preview], 5, -50)
nbins = np.int((vmax + 50) / 2)
h = ax.hist(corrected[cell_id_preview].flatten(),
bins=nbins, range=(-50, vmax),
histtype='stepfilled', log=True)
_ = plt.xlabel('[ADU]')
_ = plt.ylabel('Counts')
_ = ax.grid()
plt.xlabel('[ADU]')
plt.ylabel('Counts')
ax.grid()
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Mean CORRECTED Preview ###\n'))
display(Markdown(f'A mean across one train \n'))
display(Markdown(f'A mean across one train\n'))
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
data = np.mean(corrected, axis=0)
vmin, vmax = get_range(data, 7)
ax = geom.plot_data_fast(data, ax=ax, cmap="jet", vmin=-50, vmax=vmax)
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
vmin, vmax = get_range(corrected, 10, -100)
vmax = np.nanmax(corrected)
if vmax > 50000:
vmax=50000
nbins = np.int((vmax + 100) / 5)
h = ax.hist(corrected.flatten(), bins=nbins,
range=(-100, vmax), histtype='step', log=True, label = 'All')
_ = ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
alpha=0.5, log=True, label='High gain', color='green')
_ = ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
alpha=0.5, log=True, label='Medium gain', color='red')
_ = ax.hist(corrected[gains == 2].flatten(), bins=nbins,
range=(-100, vmax), alpha=0.5, log=True, label='Low gain', color='yellow')
_ = ax.legend()
_ = ax.grid()
_ = plt.xlabel('[ADU]')
_ = plt.ylabel('Counts')
ax.hist(corrected[gains == 0].flatten(), bins=nbins, range=(-100, vmax),
alpha=0.5, log=True, label='High gain', color='green')
ax.hist(corrected[gains == 1].flatten(), bins=nbins, range=(-100, vmax),
alpha=0.5, log=True, label='Medium gain', color='red')
ax.hist(corrected[gains == 2].flatten(), bins=nbins, range=(-100, vmax),
alpha=0.5, log=True, label='Low gain', color='yellow')
ax.legend()
ax.grid()
plt.xlabel('[ADU]')
plt.ylabel('Counts')
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Maximum GAIN Preview ###\n'))
display(Markdown(f'The per pixel maximum across one train for the digitized gain'))
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,
cmap="jet", vmin=-1, vmax=3)
```
%% Cell type:markdown id: tags:
## Bad Pixels ##
The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
%% Cell type:code id: tags:
``` python
table = []
for item in BadPixels:
table.append((item.name, "{:016b}".format(item.value)))
md = display(Latex(tabulate.tabulate(table, tablefmt='latex',
headers=["Bad pixel type", "Bit mask"])))
```
%% Cell type:code id: tags:
``` python
display(Markdown(f'### Single Shot Bad Pixels ### \n'))
display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \n'))
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
ax = geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap="jet")
```
%% Cell type:markdown id: tags:
### Percentage of Bad Pixels across one train ###
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
ax = geom.plot_data_fast(np.mean(mask>0, axis=0),
vmin=0, ax=ax, vmax=1, cmap="jet")
geom.plot_data_fast(np.mean(mask>0, axis=0), vmin=0, ax=ax, vmax=1, cmap="jet")
```
%% Cell type:markdown id: tags:
### Percentage of Bad Pixels across one train. Only Dark Related ###
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
cm = np.copy(mask)
cm[cm > BadPixels.NO_DARK_DATA.value] = 0
ax = geom.plot_data_fast(np.mean(cm>0, axis=0),
vmin=0, ax=ax, vmax=1, cmap="jet")
```
......
%% Cell type:markdown id: tags:
# AGIPD Retrieving Constants Pre-correction #
Author: European XFEL Detector Group, Version: 1.0
Retrieving Required Constants for Offline Calibration of the AGIPD Detector
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/202030/p900119/raw" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_" # the folder to output to, required
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
modules = [-1] # modules to correct, set to -1 for all, range allowed
run = 80 # runs to process, required
karabo_id = "SPB_DET_AGIPD1M-1" # karabo karabo_id
karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators
path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information
karabo_id_control = "SPB_IRU_AGIPD1M1" # karabo-id for control device
karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation
use_dir_creation_date = True # use the creation data of the input dir for database queries
cal_db_interface = "tcp://max-exfl016:8015#8045" # the database interface to use
creation_date_offset = "00:00:00" # add an offset to creation date, e.g. to get different constants
slopes_ff_from_files = "" # Path to locally stored SlopesFF and BadPixelsFF constants
calfile = "" # path to calibration file. Leave empty if all data should come from DB
nodb = False # if set only file-based constants will be used
mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
bias_voltage = 300
acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine
gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine
photon_energy = 9.2 # photon energy in keV
max_cells_db_dark = 0 # set to a value different than 0 to use this value for dark data DB queries
max_cells_db = 0 # set to a value different than 0 to use this value for DB queries
# Correction Booleans
only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.
rel_gain = False # do relative gain correction based on PC data
xray_gain = True # do relative gain correction based on xray data
blc_noise = False # if set, baseline correction via noise peak location is attempted
blc_stripes = False # if set, baseline corrected via stripes
blc_hmatch = False # if set, base line correction via histogram matching is attempted
match_asics = False # if set, inner ASIC borders are matched to the same signal level
adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value
```
%% Cell type:code id: tags:
``` python
# Fill dictionaries comprising bools and arguments for correction and data analysis
# Here the hierarichy and dependencies for correction booleans are defined
corr_bools = {}
# offset is at the bottom of AGIPD correction pyramid.
corr_bools["only_offset"] = only_offset
# Dont apply any corrections if only_offset is requested
if not only_offset:
corr_bools["adjust_mg_baseline"] = adjust_mg_baseline
corr_bools["rel_gain"] = rel_gain
corr_bools["xray_corr"] = xray_gain
corr_bools["blc_noise"] = blc_noise
corr_bools["blc_hmatch"] = blc_hmatch
```
%% Cell type:code id: tags:
``` python
from typing import List, Tuple
import matplotlib
import numpy as np
matplotlib.use("agg")
import multiprocessing
from datetime import timedelta
from pathlib import Path
from cal_tools import agipdlib, tools
from dateutil import parser
from iCalibrationDB import Conditions, Constants, Detectors
```
%% Cell type:code id: tags:
``` python
# slopes_ff_from_files left as str for now
in_folder = Path(in_folder)
out_folder = Path(out_folder)
metadata = tools.CalibrationMetadata(out_folder)
```
%% Cell type:code id: tags:
``` python
max_cells = mem_cells
creation_time = None
if use_dir_creation_date:
creation_time = tools.get_dir_creation_date(str(in_folder), run)
offset = parser.parse(creation_date_offset)
delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)
creation_time += delta
print(f"Using {creation_time} as creation time")
if sequences[0] == -1:
sequences = None
print(f"Outputting to {out_folder}")
out_folder.mkdir(parents=True, exist_ok=True)
import warnings
warnings.filterwarnings('ignore')
melt_snow = False if corr_bools["only_offset"] else agipdlib.SnowResolution.NONE
```
%% Cell type:code id: tags:
``` python
control_fn = in_folder / f'r{run:04d}' / f'RAW-R{run:04d}-{karabo_da_control}-S00000.h5'
h5path_ctrl = h5path_ctrl.format(karabo_id_control)
if gain_setting == 0.1:
if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):
print("Set gain-setting to None for runs taken before 2020-01-31")
gain_setting = None
else:
try:
gain_setting = agipdlib.get_gain_setting(str(control_fn), h5path_ctrl)
except Exception as e:
print(f'ERROR: while reading gain setting from: \n{control_fn}')
print(e)
print("Set gain setting to 0")
gain_setting = 0
# Evaluate gain mode (operation mode)
gain_mode = agipdlib.get_gain_mode(control_fn, h5path_ctrl)
print(f"Gain setting: {gain_setting}")
print(f"Gain mode: {gain_mode.name}")
print(f"Detector in use is {karabo_id}")
# Extracting Instrument string
instrument = karabo_id.split("_")[0]
# Evaluate detector instance for mapping
if instrument == "SPB":
dinstance = "AGIPD1M1"
nmods = 16
elif instrument == "MID":
dinstance = "AGIPD1M2"
nmods = 16
elif instrument == "HED":
dinstance = "AGIPD500K"
nmods = 8
print(f"Instrument {instrument}")
print(f"Detector instance {dinstance}")
if karabo_da[0] == '-1':
if modules[0] == -1:
modules = list(range(nmods))
karabo_da = ["AGIPD{:02d}".format(i) for i in modules]
else:
modules = [int(x[-2:]) for x in karabo_da]
```
%% Cell type:markdown id: tags:
## Retrieve Constants ##
%% Cell type:code id: tags:
``` python
def retrieve_constants(
qm_files: List[Path],
qm: str,
karabo_da: str,
idx: int
) -> Tuple[str, str, float, float, str, dict]:
"""
Retrieve constants for a module.
:return:
qm: module virtual name i.e. Q1M1.
karabo_da: karabo data aggregator.
acq_rate: acquisition rate parameter.
max_cells: number of memory cells.
err: string of faced errors.
mdata_dict: (DICT) dictionary with the metadata for the retrieved constants.
"""
err = None
if max_cells != 0:
# either use overriding notebook parameter
local_max_cells = max_cells
else:
# or look around in sequence files
for f in qm_files:
local_max_cells = agipdlib.get_num_cells(f, karabo_id, idx)
if local_max_cells is not None:
break
# maybe we never found this in a sequence file...
if local_max_cells is None:
raise ValueError(f"No raw images found for {qm} for all sequences")
if acq_rate == 0:
local_acq_rate = agipdlib.get_acq_rate(fast_paths=(f, karabo_id, idx))
else:
local_acq_rate = acq_rate
# avoid retrieving constant, if requested.
if nodb_with_dark:
return
const_dict = agipdlib.assemble_constant_dict(
corr_bools,
pc_bools,
local_max_cells,
bias_voltage,
gain_setting,
local_acq_rate,
photon_energy,
gain_mode=gain_mode,
beam_energy=None,
only_dark=only_dark,
)
# Retrieve multiple constants through an input dictionary
# to return a dict of useful metadata.
mdata_dict = dict()
mdata_dict['constants'] = dict()
mdata_dict['physical-detector-unit'] = None # initialization
for cname, cval in const_dict.items():
print(cname)
print(cval)
# saving metadata in a dict
mdata_dict['constants'][cname] = dict()
if slopes_ff_from_files and cname in ["SlopesFF", "BadPixelsFF"]:
mdata_dict['constants'][cname]["file-path"] = f"{slopes_ff_from_files}/slopesff_bpmask_module_{qm}.h5"
mdata_dict['constants'][cname]["creation-time"] = "00:00:00"
else:
condition = getattr(Conditions, cval[2][0]).AGIPD(**cval[2][1])
co, mdata = tools.get_from_db(
karabo_id,
karabo_da,
getattr(Constants.AGIPD, cname)(),
condition,
getattr(np, cval[0])(cval[1]),
cal_db_interface,
creation_time,
meta_only=True,
verbosity=1,
)
mdata_const = mdata.calibration_constant_version
# check if constant was sucessfully retrieved.
if mdata.comm_db_success:
mdata_dict['constants'][cname]["file-path"] = f"{mdata_const.hdf5path}" \
f"{mdata_const.filename}"
mdata_dict['constants'][cname]["creation-time"] = f"{mdata_const.begin_at}"
mdata_dict['physical-detector-unit'] = mdata_const.device_name
else:
mdata_dict['constants'][cname]["file-path"] = const_dict[cname][:2]
mdata_dict['constants'][cname]["creation-time"] = None
return qm, mdata_dict, karabo_da, acq_rate, local_max_cells, err
```
%% Cell type:code id: tags:
``` python
# set everything up filewise
mapped_files, _, _, _, _ = tools.map_modules_from_folder(
str(in_folder), run, path_template, karabo_da, sequences
)
pc_bools = [corr_bools.get("rel_gain"),
corr_bools.get("adjust_mg_baseline"),
corr_bools.get('blc_noise'),
corr_bools.get('blc_hmatch'),
corr_bools.get('blc_stripes'),
melt_snow]
inp = []
only_dark = False
nodb_with_dark = False
if not nodb:
only_dark = (calfile != "")
if calfile != "" and not corr_bools["only_offset"]:
nodb_with_dark = nodb
# A dict to connect virtual device
# to actual device name.
for module_index, k_da in zip(modules, karabo_da):
qm = tools.module_index_to_qm(module_index)
if qm in mapped_files and not mapped_files[qm].empty():
device = getattr(getattr(Detectors, dinstance), qm)
# TODO: make map_modules_from_folder just return list(s)
qm_files = [Path(mapped_files[qm].get()) for _ in range(mapped_files[qm].qsize())]
else:
print(f"Skipping {qm}")
continue
inp.append((qm_files, qm, k_da, module_index))
```
%% Cell type:code id: tags:
``` python
with multiprocessing.Pool(processes=nmods) as pool:
results = pool.starmap(retrieve_constants, inp)
```
%% Cell type:code id: tags:
``` python
mod_dev = dict()
mdata_dict = dict()
for r in results:
if r:
qm, md_dict, karabo_da, acq_rate, max_cells, err = r
mod_dev[karabo_da] = {"mod": qm, "err": err}
if err:
print(f"Error for module {qm}: {err}")
mdata_dict[karabo_da] = md_dict
# check if it is requested not to retrieve any constants from the database
if nodb_with_dark:
print("No constants were retrieved as calibrated files will be used.")
else:
metadata.update({"retrieved-constants": mdata_dict})
print("\nRetrieved constants for modules:",
', '.join([tools.module_index_to_qm(x) for x in modules]))
print(f"Operating conditions are:")
print(f"• Bias voltage: {bias_voltage}")
print(f"• Memory cells: {max_cells}\n")
print(f"• Memory cells: {max_cells}")
print(f"• Acquisition rate: {acq_rate}")
print(f"• Gain mode: {gain_mode.name}")
print(f"• Gain setting: {gain_setting}")
print(f"• Photon Energy: {photon_energy}")
print("Constant metadata is saved under \"retrieved-constants\" in calibration_metadata.yml\n")
```
%% Cell type:code id: tags:
``` python
print("Constants are retrieved with creation time: ")
i = 0
timestamps = {}
for k_da, dinfo in mod_dev.items():
print(dinfo["mod"], ":")
module_timestamps = {}
module_name = dinfo["mod"]
if k_da in mdata_dict:
for cname, mdata in mdata_dict[k_da]["constants"].items():
if hasattr(mdata["creation-time"], 'strftime'):
mdata["creation-time"] = mdata["creation-time"].strftime('%y-%m-%d %H:%M')
print(f'{cname:.<12s}', mdata["creation-time"])
# Store few time stamps if exists
# Add NA to keep array structure
for cname in ['Offset', 'SlopesPC', 'SlopesFF']:
if k_da not in mdata_dict or dinfo["err"]:
module_timestamps[cname] = "Err"
else:
if cname in mdata_dict[k_da]:
if mdata_dict[k_da][cname]["creation-time"]:
module_timestamps[cname] = mdata_dict[k_da][cname]["creation-time"]
else:
module_timestamps[cname] = "NA"
else:
module_timestamps[cname] = "NA"
timestamps[module_name] = module_timestamps
i += 1
if sequences:
seq_num = sequences[0]
else:
# if sequences[0] changed to None as it was -1
seq_num = 0
time_summary = metadata.setdefault("retrieved-constants", {}).setdefault("time-summary", {})
time_summary["SAll"] = timestamps
metadata.save()
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment