Skip to content
Snippets Groups Projects

feat[Epix100][Correct]: New corrected data source and a link to old data source

Merged Karim Ahmed requested to merge feat/epix_new_correct_souce into master
%% Cell type:markdown id: tags:
# ePix100 Data Correction
Author: European XFEL Detector Group, Version: 2.0
The following notebook provides data correction of images acquired with the ePix100 detector.
The sequence of correction applied are:
Offset --> Common Mode Noise --> Relative Gain --> Charge Sharing --> Absolute Gain.
Offset, common mode and gain corrected data is saved to /data/image/pixels in the CORR files.
If pattern classification is applied (charge sharing correction), this data will be saved to /data/image/pixels_classified, while the corresponding patterns will be saved to /data/image/patterns in the CORR files.
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/HED/202102/p002739/raw" # input folder, required
out_folder = "" # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
run = 38 # which run to read data from, required
# Parameters for accessing the raw data.
karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
karabo_da = "EPIX01" # data aggregators
db_module = "" # module id in the database
receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
input_source_template = '{karabo_id}/DET/{receiver}:daqOutput' # input(raw) detector data source in h5files
output_source_template = '{karabo_id}/CORR/{receiver}:daqOutput' # output(corrected) detector data source in h5files
# Parameters affecting writing corrected data.
chunk_size_idim = 1 # H5 chunking size of output data
limit_trains = 0 # Process only first N images, 0 - process all.
# Parameters for the calibration database.
cal_db_interface = "tcp://max-exfl-cal001:8015#8025" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
creation_time = "" # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
# Conditions for retrieving calibration constants.
bias_voltage = 200 # bias voltage
in_vacuum = False # detector operated in vacuum
integration_time = -1 # Detector integration time, Default value -1 to use the value from the slow data.
fix_temperature = -1 # fixed temperature value in Kelvin, Default value -1 to use the value from files.
gain_photon_energy = 8.048 # Photon energy used for gain calibration
photon_energy = 0. # Photon energy to calibrate in number of photons, 0 for calibration in keV
# Flags to select type of applied corrections.
pattern_classification = True # do clustering.
relative_gain = True # Apply relative gain correction.
absolute_gain = True # Apply absolute gain correction (implies relative gain).
common_mode = True # Apply common mode correction.
# Parameters affecting applied correction.
cm_min_frac = 0.25 # No CM correction is performed if after masking the ratio of good pixels falls below this
cm_noise_sigma = 5. # CM correction noise standard deviation
split_evt_primary_threshold = 7. # primary threshold for split event correction
split_evt_secondary_threshold = 5. # secondary threshold for split event correction
split_evt_mip_threshold = 1000. # minimum ionizing particle threshold
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
import tabulate
import warnings
from logging import warning
from sys import exit
import pasha as psh
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Latex, Markdown, display
from extra_data import RunDirectory, H5File
from extra_geom import Epix100Geometry
from mpl_toolkits.axes_grid1 import make_axes_locatable
from pathlib import Path
import cal_tools.restful_config as rest_cfg
from XFELDetAna import xfelpycaltools as xcal
from cal_tools.calcat_interface import EPIX100_CalibrationData, CalCatError
from cal_tools.epix100 import epix100lib
from cal_tools.files import DataFile
from cal_tools.tools import (
calcat_creation_time,
write_constants_fragment,
)
from cal_tools.step_timing import StepTimer
warnings.filterwarnings('ignore')
prettyPlotting = True
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
x = 708 # rows of the ePix100
y = 768 # columns of the ePix100
if absolute_gain:
relative_gain = True
plot_unit = 'ADU'
```
%% Cell type:code id: tags:
``` python
prop_str = in_folder[in_folder.find('/p')+1:in_folder.find('/p')+8]
in_folder = Path(in_folder)
out_folder = Path(out_folder)
out_folder.mkdir(parents=True, exist_ok=True)
run_folder = in_folder / f"r{run:04d}"
output_source_template = output_source_template or input_source_template
input_src = input_source_template.format(
karabo_id=karabo_id, receiver=receiver_template)
output_src = output_source_template.format(
karabo_id=karabo_id, receiver=receiver_template)
print(f"Correcting run: {run_folder}")
print(f"Data corrected files are stored at: {out_folder}")
```
%% Cell type:code id: tags:
``` python
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f"Using {creation_time.isoformat()} as creation time")
```
%% Cell type:code id: tags:
``` python
run_dc = RunDirectory(run_folder, _use_voview=False)
seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]
# If a set of sequences requested to correct,
# adapt seq_files list.
if sequences != [-1]:
seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]
if not len(seq_files):
raise IndexError("No sequence files available for the selected sequences.")
print(f"Processing a total of {len(seq_files)} sequence files")
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id: tags:
``` python
step_timer.start()
sensorSize = [x, y]
# Sensor area will be analysed according to blocksize
blockSize = [sensorSize[0]//2, sensorSize[1]//2]
xcal.defaultBlockSize = blockSize
memoryCells = 1 # ePIX has no memory cells
run_parallel = False
# Read control data.
ctrl_data = epix100lib.epix100Ctrl(
run_dc=run_dc,
instrument_src=input_src,
ctrl_src=f"{karabo_id}/DET/CONTROL",
)
if integration_time < 0:
integration_time = ctrl_data.get_integration_time()
integration_time_str_add = ""
else:
integration_time_str_add = "(manual input)"
if fix_temperature < 0:
temperature = ctrl_data.get_temprature()
temperature_k = temperature + 273.15
temp_str_add = ""
else:
temperature_k = fix_temperature
temperature = fix_temperature - 273.15
temp_str_add = "(manual input)"
print(f"Bias voltage is {bias_voltage} V")
print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
print(f"Operated in vacuum: {in_vacuum}")
```
%% Cell type:code id: tags:
``` python
# Table of sequence files to process
table = [(k, f) for k, f in enumerate(seq_files)]
if len(table):
md = display(Latex(tabulate.tabulate(
table,
tablefmt='latex',
headers=["#", "file"]
)))
```
%% Cell type:markdown id: tags:
## Retrieving calibration constants
As a first step, dark maps have to be loaded.
%% Cell type:code id: tags:
``` python
epix_cal = EPIX100_CalibrationData(
detector_name=karabo_id,
sensor_bias_voltage=bias_voltage,
integration_time=integration_time,
sensor_temperature=temperature_k,
in_vacuum=in_vacuum,
source_energy=gain_photon_energy,
event_at=creation_time,
client=rest_cfg.calibration_client(),
)
const_metadata = epix_cal.metadata(calibrations=epix_cal.dark_calibrations)
if relative_gain:
try:
metadata = epix_cal.metadata(epix_cal.illuminated_calibrations)
for key, value in metadata.items():
const_metadata.setdefault(key, {}).update(value)
except CalCatError as e:
warning(f"CalCatError: {e}")
# Display retrieved calibration constants timestamps
epix_cal.display_markdown_retrieved_constants(metadata=const_metadata)
# Load the constant data from files
const_data = epix_cal.ndarray_map(metadata=const_metadata)[karabo_da]
# Validate the constants availability and raise/warn correspondingly.
missing_dark_constants = {"OffsetEPix100", "NoiseEPix100"} - set(const_data)
if missing_dark_constants:
raise ValueError(
f"Dark constants {missing_dark_constants} are not available to correct {karabo_da}."
"No correction is performed!")
if relative_gain and "RelativeGainEPix100" not in const_data.keys():
warning("RelativeGainEPix100 is not found in the calibration database.")
relative_gain = False
absolute_gain = False
```
%% Cell type:code id: tags:
``` python
# Record constant details in YAML metadata
write_constants_fragment(
out_folder=(metadata_folder or out_folder),
det_metadata=const_metadata,
caldb_root=epix_cal.caldb_root,
)
```
%% Cell type:code id: tags:
``` python
# Initializing some parameters.
hscale = 1
stats = True
bins = np.arange(-50,1000)
hist = {'O': 0} # dictionary to store histograms
```
%% Cell type:code id: tags:
``` python
if common_mode:
commonModeBlockSize = [x//2, y//8]
cmCorrectionB = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='block',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
cmCorrectionR = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='row',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
cmCorrectionC = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='col',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
hist['CM'] = 0
```
%% Cell type:code id: tags:
``` python
if relative_gain:
# gain constant is given by the mode of the gain map
# because all bad pixels are masked using this value
_vals,_counts = np.unique(const_data["RelativeGainEPix100"], return_counts=True)
gain_cnst = _vals[np.argmax(_counts)]
gainCorrection = xcal.RelativeGainCorrection(
sensorSize,
gain_cnst/const_data["RelativeGainEPix100"][..., None],
nCells=memoryCells,
parallel=run_parallel,
blockSize=blockSize,
gains=None,
)
hist['RG'] = 0
if absolute_gain:
hscale = gain_cnst
plot_unit = 'keV'
if photon_energy > 0:
plot_unit = '$\gamma$'
hscale /= photon_energy
hist['AG'] = 0
```
%% Cell type:code id: tags:
``` python
if pattern_classification :
patternClassifier = xcal.PatternClassifier(
[x, y],
const_data["NoiseEPix100"],
split_evt_primary_threshold,
split_evt_secondary_threshold,
split_evt_mip_threshold,
tagFirstSingles=0,
nCells=memoryCells,
allowElongated=False,
blockSize=[x, y],
parallel=run_parallel,
)
hist['CS'] = 0
hist['S'] = 0
```
%% Cell type:markdown id: tags:
## Applying corrections
%% Cell type:code id: tags:
``` python
def correct_train(wid, index, tid, d):
d = d[..., np.newaxis].astype(np.float32)
d = np.compress(
np.any(d > 0, axis=(0, 1)), d, axis=2)
# Offset correction.
d -= const_data["OffsetEPix100"]
hist['O'] += np.histogram(d,bins=bins)[0]
# Common Mode correction.
if common_mode:
# Block CM
d = cmCorrectionB.correct(d)
# Row CM
d = cmCorrectionR.correct(d)
# COL CM
d = cmCorrectionC.correct(d)
hist['CM'] += np.histogram(d,bins=bins)[0]
# Relative gain correction.
if relative_gain:
d = gainCorrection.correct(d)
hist['RG'] += np.histogram(d,bins=bins)[0]
"""The gain correction is currently applying
an absolute correction (not a relative correction
as the implied by the name);
it changes the scale (the unit of measurement)
of the data from ADU to either keV or n_of_photons.
But the pattern classification relies on comparing
data with the NoiseEPix100 map, which is still in ADU.
The best solution is to do a relative gain
correction first and apply the global absolute
gain to the data at the end, after clustering.
"""
if pattern_classification:
d_clu, patterns = patternClassifier.classify(d)
d_clu[d_clu < (split_evt_primary_threshold*const_data["NoiseEPix100"])] = 0
data_clu[index, ...] = np.squeeze(d_clu)
data_patterns[index, ...] = np.squeeze(patterns)
hist['CS'] += np.histogram(d_clu,bins=bins)[0]
d_sing = d_clu[patterns==100] # pattern 100 corresponds to single photons events
if len(d_sing):
hist['S'] += np.histogram(d_sing,bins=bins)[0]
# Absolute gain correction
# changes data from ADU to keV (or n. of photons)
if absolute_gain:
d = d * gain_cnst
if photon_energy > 0:
d /= photon_energy
hist['AG'] += np.histogram(d,bins=bins)[0]
if pattern_classification:
# Modify pattern classification.
d_clu = d_clu * gain_cnst
if photon_energy > 0:
d_clu /= photon_energy
data_clu[index, ...] = np.squeeze(d_clu)
data[index, ...] = np.squeeze(d)
```
%% Cell type:code id: tags:
``` python
# 10 is a number chosen after testing 1 ... 71 parallel threads
context = psh.context.ThreadContext(num_workers=10)
```
%% Cell type:code id: tags:
``` python
empty_seq = 0
for f in seq_files:
seq_dc = H5File(f)
# Save corrected data in an output file with name
# of corresponding raw sequence file.
out_file = out_folder / f.name.replace("RAW", "CORR")
# Data shape in seq_dc excluding trains with empty images.
ishape = seq_dc[input_src, "data.image.pixels"].shape
corr_ntrains = ishape[0]
all_train_ids = seq_dc.train_ids
# Raise a WARNING if this sequence has no trains to correct.
# Otherwise, print number of trains with no data.
if corr_ntrains == 0:
warning(f"No trains to correct for {f.name}: "
"Skipping the processing of this file.")
empty_seq += 1
continue
elif len(all_train_ids) != corr_ntrains:
print(f"{f.name} has {len(all_train_ids) - corr_ntrains} trains with missing data.")
# This parameter is only used for testing.
if limit_trains > 0:
print(f"\nCorrected trains are limited to: {limit_trains} trains")
corr_ntrains = min(corr_ntrains, limit_trains)
oshape = (corr_ntrains, *ishape[1:])
data = context.alloc(shape=oshape, dtype=np.float32)
if pattern_classification:
data_clu = context.alloc(shape=oshape, dtype=np.float32)
data_patterns = context.alloc(shape=oshape, dtype=np.int32)
step_timer.start() # Correct data.
# Overwrite seq_dc after eliminating empty trains or/and applying limited images.
seq_dc = seq_dc.select(
input_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])
pixel_data = seq_dc[input_src, "data.image.pixels"]
context.map(correct_train, pixel_data)
step_timer.done_step(f'Correcting {corr_ntrains} trains.')
step_timer.start() # Write corrected data.
# Create CORR files and add corrected data sections.
image_counts = seq_dc[input_src, "data.image.pixels"].data_counts(labelled=False)
# Write corrected data.
with DataFile(out_file, "w") as ofile:
dataset_chunk = ((chunk_size_idim,) + oshape[1:]) # e.g. (1, pixels_x, pixels_y)
seq_file = seq_dc.files[0] # FileAccess
# Create INDEX datasets.
ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
# Create METADATA datasets
ofile.create_metadata(
like=seq_dc,
sequence=seq_file.sequence,
instrument_channels=sorted({f'{output_src}/data',f'{input_src}/data'})
)
# Create Instrument section to later add corrected datasets.
instr_src_group = ofile.create_instrument_source(output_src)
# Create count/first datasets at INDEX source.
instr_src_group.create_index(data=image_counts)
image_raw_fields = [ # /data/image/
"binning", "bitsPerPixel", "dimTypes", "dims",
"encoding", "flipX", "flipY", "roiOffsets", "rotation",
]
for field in image_raw_fields:
field_arr = seq_dc[input_src, f"data.image.{field}"].ndarray()
instr_src_group.create_key(
f"data.image.{field}", data=field_arr,
chunks=(chunk_size_idim, *field_arr.shape[1:]))
# Add main corrected `data.image.pixels` dataset and store corrected data.
instr_src_group.create_key(
"data.image.pixels", data=data, chunks=dataset_chunk)
instr_src_group.create_key(
"data.trainId", data=seq_dc.train_ids, chunks=min(50, len(seq_dc.train_ids)))
if np.isin('data.pulseId', list(seq_dc[input_src].keys())): # some runs are missing 'data.pulseId'
if 'data.pulseId' in list(seq_dc[input_src].keys()): # some runs are missing 'data.pulseId'
instr_src_group.create_key(
"data.pulseId",
data=list(seq_dc[input_src]['data.pulseId'].ndarray()[:, 0]),
data=seq_dc[input_src]['data.pulseId'].ndarray()[:, 0],
chunks=min(50, len(seq_dc.train_ids)),
)
if pattern_classification:
# Add main corrected `data.image.pixels` dataset and store corrected data.
instr_src_group.create_key(
"data.image.pixels_classified", data=data_clu, chunks=dataset_chunk)
instr_src_group.create_key(
"data.image.patterns", data=data_patterns, chunks=dataset_chunk)
if output_src != input_src:
ofile.create_legacy_source(input_src, output_src)
step_timer.done_step('Storing data.')
if empty_seq == len(seq_files):
warning("No valid trains for RAW data to correct.")
exit(0)
```
%% Cell type:markdown id: tags:
## Plot Histograms
%% Cell type:code id: tags:
``` python
bins_ADU = bins[:-1]+np.diff(bins)[0]/2
bins_keV = bins_ADU*hscale
```
%% Cell type:code id: tags:
``` python
# Histogram in ADU
plt.figure(figsize=(12,8))
plt.plot(bins_ADU,hist['O'], label='Offset corr')
if common_mode:
plt.plot(bins_ADU,hist['CM'], label='CM corr')
if relative_gain:
plt.plot(bins_ADU,hist['RG'], label='Relative Gain corr')
if pattern_classification:
plt.plot(bins_ADU[bins_ADU>10],hist['CS'][bins_ADU>10], label='Charge Sharing corr')
if np.any(hist['S']):
plt.plot(bins_ADU,hist['S'], label='Singles')
xtick_step = 50
plt.xlim(bins[0], bins[-1]+1)
plt.xticks(np.arange(bins[0],bins[-1]+2,xtick_step))
plt.xlabel('ADU',fontsize=12)
plt.yscale('log')
plt.title(f'{karabo_id} | {prop_str}, r{run}', fontsize=14, fontweight='bold')
plt.legend(fontsize=12)
plt.grid(ls=':')
```
%% Cell type:code id: tags:
``` python
# Histogram in keV/number of photons
if absolute_gain:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
plt.figure(figsize=(12,8))
if relative_gain:
plt.plot(bins_keV,hist['RG'], label='Absolute Gain corr', c=colors[2])
if pattern_classification:
plt.plot(bins_keV[bins_keV>.5],hist['CS'][bins_keV>.5], label='Charge Sharing corr', c=colors[3])
if np.any(hist['S']):
plt.plot(bins_keV[bins_keV>.5],hist['S'][bins_keV>.5], label='Singles', c=colors[4])
if photon_energy==0: # if keV instead of #photons
xtick_step = 5
plt.xlim(left=-2)
plt.xticks(np.arange(0,plt.gca().get_xlim()[1],xtick_step))
plt.xlabel(plot_unit,fontsize=12)
plt.yscale('log')
plt.title(f'{karabo_id} | {prop_str}, r{run}', fontsize=14, fontweight='bold')
plt.legend(fontsize=12)
plt.grid(ls=':')
```
%% Cell type:markdown id: tags:
## Mean Image of the corrected data
%% Cell type:code id: tags:
``` python
geom = Epix100Geometry.from_relative_positions(top=[386.5, 364.5, 0.], bottom=[386.5, -12.5, 0.])
if pattern_classification:
plt.subplots(1,2,figsize=(18,18)) if pattern_classification else plt.subplots(1,1,figsize=(9,9))
ax = plt.subplot(1,2,1)
ax.set_title(f'Before CS correction',fontsize=12,fontweight='bold');
else:
plt.subplots(1,1,figsize=(9,9))
ax = plt.subplot(1,1,1)
ax.set_title(f'{karabo_id} | {prop_str}, r{run} | Average of {data.shape[0]} trains',fontsize=12,fontweight='bold');
# Average image before charge sharing corrcetion
divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='5%', pad=0.5)
image = data.mean(axis=0)
vmin = max(image.mean()-2*image.std(),0)
vmax = image.mean()+3*image.std()
geom.plot_data(image,
ax=ax,
colorbar={'cax': cax, 'label': plot_unit, 'orientation': 'horizontal'},
origin='upper',
vmin=vmin,
vmax=vmax)
# Average image after charge sharing corrcetion
if pattern_classification:
ax = plt.subplot(1,2,2)
divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='5%', pad=0.5)
image = data_clu.mean(axis=0)
geom.plot_data(image,
ax=ax,
colorbar={'cax': cax, 'label': plot_unit, 'orientation': 'horizontal'},
origin='upper',
vmin=vmin,
vmax=vmax)
ax.set_title(f'After CS correction',fontsize=12,fontweight='bold');
plt.suptitle(f'{karabo_id} | {prop_str}, r{run} | Average of {data.shape[0]} trains',fontsize=14,fontweight='bold',y=.72);
```
%% Cell type:markdown id: tags:
## Single Shot of the corrected data
%% Cell type:code id: tags:
``` python
train_idx = -1
if pattern_classification:
plt.subplots(1,2,figsize=(18,18)) if pattern_classification else plt.subplots(1,1,figsize=(9,9))
ax = plt.subplot(1,2,1)
ax.set_title(f'Before CS correction',fontsize=12,fontweight='bold');
else:
plt.subplots(1,1,figsize=(9,9))
ax = plt.subplot(1,1,1)
ax.set_title(f'{karabo_id} | {prop_str}, r{run} | Single frame',fontsize=12,fontweight='bold');
# Average image before charge sharing corrcetion
divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='5%', pad=0.5)
image = data[train_idx]
vmin = max(image.mean()-2*image.std(),0)
vmax = image.mean()+3*image.std()
geom.plot_data(image,
ax=ax,
colorbar={'cax': cax, 'label': plot_unit, 'orientation': 'horizontal'},
origin='upper',
vmin=vmin,
vmax=vmax)
# Average image after charge sharing corrcetion
if pattern_classification:
ax = plt.subplot(1,2,2)
divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='5%', pad=0.5)
image = data_clu[train_idx]
geom.plot_data(image,
ax=ax,
colorbar={'cax': cax, 'label': plot_unit, 'orientation': 'horizontal'},
origin='upper',
vmin=vmin,
vmax=vmax)
ax.set_title(f'After CS correction',fontsize=12,fontweight='bold');
plt.suptitle(f'{karabo_id} | {prop_str}, r{run} | Single frame',fontsize=14,fontweight='bold',y=.72);
```
Loading