Skip to content
Snippets Groups Projects

[AGIPD][CORRECT] Use calcat_interface and remove precorrection notebook

Merged Karim Ahmed requested to merge AGIPD_calcat_interface into master
4 files
+ 1116
117
Compare changes
  • Side-by-side
  • Inline
Files
4
%% Cell type:markdown id: tags:
# ePix100 Data Correction
Author: European XFEL Detector Group, Version: 2.0
The following notebook provides data correction of images acquired with the ePix100 detector.
The sequence of correction applied are:
Offset --> Common Mode Noise --> Relative Gain --> Charge Sharing --> Absolute Gain.
Offset, common mode and gain corrected data is saved to /data/image/pixels in the CORR files.
If pattern classification is applied (charge sharing correction), this data will be saved to /data/image/pixels_classified, while the corresponding patterns will be saved to /data/image/patterns in the CORR files.
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/HED/202202/p003121/raw" # input folder, required
out_folder = "" # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
run = 156 # which run to read data from, required
# Parameters for accessing the raw data.
karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
karabo_da = "EPIX01" # data aggregators
db_module = "" # module id in the database
receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files
# Parameters affecting writing corrected data.
chunk_size_idim = 1 # H5 chunking size of output data
# Only for testing
limit_images = 0 # ONLY FOR TESTING. process only first N images, 0 - process all.
# Parameters for the calibration database.
cal_db_interface = "tcp://max-exfl016:8015#8025" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
creation_time = "" # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
# Conditions for retrieving calibration constants.
bias_voltage = 200 # bias voltage
in_vacuum = False # detector operated in vacuum
integration_time = -1 # Detector integration time, Default value -1 to use the value from the slow data.
fix_temperature = -1 # fixed temperature value in Kelvin, Default value -1 to use the value from files.
gain_photon_energy = 8.048 # Photon energy used for gain calibration
photon_energy = 0. # Photon energy to calibrate in number of photons, 0 for calibration in keV
# Flags to select type of applied corrections.
pattern_classification = True # do clustering.
relative_gain = True # Apply relative gain correction.
absolute_gain = True # Apply absolute gain correction (implies relative gain).
common_mode = True # Apply common mode correction.
# Parameters affecting applied correction.
cm_min_frac = 0.25 # No CM correction is performed if after masking the ratio of good pixels falls below this
cm_noise_sigma = 5. # CM correction noise standard deviation
split_evt_primary_threshold = 7. # primary threshold for split event correction
split_evt_secondary_threshold = 5. # secondary threshold for split event correction
split_evt_mip_threshold = 1000. # minimum ionizing particle threshold
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
import tabulate
import warnings
import h5py
import pasha as psh
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Latex, display
from extra_data import RunDirectory, H5File
from pathlib import Path
from XFELDetAna import xfelpyanatools as xana
from XFELDetAna import xfelpycaltools as xcal
from cal_tools import h5_copy_except
from cal_tools.epix100 import epix100lib
from cal_tools.calibration import CalCatError, EPIX100_CalibrationData
from cal_tools.tools import (
calcat_creation_time,
get_dir_creation_date,
get_constant_from_db,
load_specified_constants,
load_constants_dict,
CalibrationMetadata,
)
from cal_tools.step_timing import StepTimer
from iCalibrationDB import (
Conditions,
Constants,
)
warnings.filterwarnings('ignore')
prettyPlotting = True
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
x = 708 # rows of the ePix100
y = 768 # columns of the ePix100
if absolute_gain:
relative_gain = True
plot_unit = 'ADU'
```
%% Cell type:code id: tags:
``` python
in_folder = Path(in_folder)
out_folder = Path(out_folder)
out_folder.mkdir(parents=True, exist_ok=True)
run_folder = in_folder / f"r{run:04d}"
instrument_src = instrument_source_template.format(
karabo_id, receiver_template)
print(f"Correcting run: {run_folder}")
print(f"Instrument H5File source: {instrument_src}")
print(f"Data corrected files are stored at: {out_folder}")
```
%% Cell type:code id: tags:
``` python
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f"Using {creation_time.isoformat()} as creation time")
metadata = CalibrationMetadata(metadata_folder or out_folder)
# Constant paths are saved under retrieved-constants in calibration_metadata.yml.
# NOTE: this notebook shouldn't overwrite calibration metadata file.
const_yaml = metadata.get("retrieved-constants", {})
```
%% Cell type:code id: tags:
``` python
run_dc = RunDirectory(run_folder, _use_voview=False)
seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]
# If a set of sequences requested to correct,
# adapt seq_files list.
if sequences != [-1]:
seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]
if not len(seq_files):
raise IndexError("No sequence files available for the selected sequences.")
print(f"Processing a total of {len(seq_files)} sequence files")
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id: tags:
``` python
step_timer.start()
sensorSize = [x, y]
# Sensor area will be analysed according to blocksize
blockSize = [sensorSize[0]//2, sensorSize[1]//2]
xcal.defaultBlockSize = blockSize
memoryCells = 1 # ePIX has no memory cells
run_parallel = False
# Read control data.
ctrl_data = epix100lib.epix100Ctrl(
run_dc=run_dc,
instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
ctrl_src=f"{karabo_id}/DET/CONTROL",
)
if integration_time < 0:
integration_time = ctrl_data.get_integration_time()
integration_time_str_add = ""
else:
integration_time_str_add = "(manual input)"
if fix_temperature < 0:
temperature = ctrl_data.get_temprature()
temperature_k = temperature + 273.15
temp_str_add = ""
else:
temperature_k = fix_temperature
temperature = fix_temperature - 273.15
temp_str_add = "(manual input)"
print(f"Bias voltage is {bias_voltage} V")
print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
print(f"Operated in vacuum: {in_vacuum}")
```
%% Cell type:code id: tags:
``` python
# Table of sequence files to process
table = [(k, f) for k, f in enumerate(seq_files)]
if len(table):
md = display(Latex(tabulate.tabulate(
table,
tablefmt='latex',
headers=["#", "file"]
)))
```
%% Cell type:markdown id: tags:
## Retrieving calibration constants
As a first step, dark maps have to be loaded.
%% Cell type:code id: tags:
``` python
cond_dict = {
"bias_voltage": bias_voltage,
"integration_time": integration_time,
"temperature": temperature_k,
"in_vacuum": in_vacuum,
}
dark_condition = Conditions.Dark.ePix100(**cond_dict)
# update conditions with illuminated conditins.
cond_dict.update({
"photon_energy": gain_photon_energy
})
illum_condition = Conditions.Illuminated.ePix100(**cond_dict)
const_cond = {
"Offset": dark_condition,
"Noise": dark_condition,
"RelativeGain": illum_condition,
}
```
%% Cell type:code id: tags:
epix_cal = EPIX100_CalibrationData(
detector_name=karabo_id,
sensor_bias_voltage=bias_voltage,
integration_time=integration_time,
sensor_temperature=temperature_k,
in_vacuum=in_vacuum,
source_energy=gain_photon_energy,
event_at=creation_time,
snapshot_at=None,#creation_time,
)
constant_names = ["OffsetEPix100", "NoiseEPix100"]
if relative_gain:
constant_names += ["RelativeGainEPix100"]
``` python
empty_constant = np.zeros((708, 768, 1), dtype=np.float32)
if const_yaml: # Used while reproducing corrected data.
print(f"Using stored constants in {metadata.filename}")
const_data, _ = load_specified_constants(const_yaml[karabo_da]["constants"])
for cname, cval in const_data.items():
if cval is None and cname != "RelativeGain":
const_data[cname] = empty_constant
else: # First correction attempt.
const_data, _ = load_constants_dict(const_yaml[karabo_da]["constants"])
else:
const_data = dict()
for cname, condition in const_cond.items():
# Avoid retrieving RelativeGain, if not needed for correction.
if cname == "RelativeGain" and not relative_gain:
const_data[cname] = None
else:
const_data[cname] = get_constant_from_db(
karabo_id=karabo_id,
karabo_da=karabo_da,
constant=getattr(Constants.ePix100, cname)(),
condition=condition,
empty_constant=None if cname == "RelativeGain" else empty_constant,
cal_db_interface=cal_db_interface,
creation_time=creation_time,
print_once=2,
timeout=cal_db_timeout
)
for cname in constant_names:
try:
const_data[cname] = epix_cal.ndarray(module=1, calibration=cname) # TODO: what is this module number?
except CalCatError as e:
if cname == "RelativeGainEPix100":
print("RelativeGainEPix100 is not found. No gain correction will be applied.")
relative_gain = False
absolute_gain = False
else:
raise CalCatError(f"{cname}: {e}")
```
%% Cell type:code id: tags:
``` python
if relative_gain and const_data.get("RelativeGain", None) is None:
print(
"WARNING: RelativeGain map is requested, but not found.\n"
"No gain correction will be applied"
)
relative_gain = False
absolute_gain = False
# Initializing some parameters.
hscale = 1
stats = True
hrange = np.array([-50, 1000])
nbins = hrange[1] - hrange[0]
commonModeBlockSize = [x//2, y//2]
```
%% Cell type:code id: tags:
``` python
histCalOffsetCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
# *****************Histogram Calculators****************** #
histCalCor = xcal.HistogramCalculator(
sensorSize,
bins=1050,
range=[-50, 1000],
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
```
%% Cell type:code id: tags:
``` python
if common_mode:
histCalCMCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize,
)
cmCorrectionB = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='block',
nCells=memoryCells,
noiseMap=const_data['Noise'],
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
cmCorrectionR = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='row',
nCells=memoryCells,
noiseMap=const_data['Noise'],
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
cmCorrectionC = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='col',
nCells=memoryCells,
noiseMap=const_data['Noise'],
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
```
%% Cell type:code id: tags:
``` python
if relative_gain:
gain_cnst = np.median(const_data["RelativeGain"])
gain_cnst = np.median(const_data["RelativeGainEPix100"])
hscale = gain_cnst
plot_unit = 'keV'
if photon_energy > 0:
plot_unit = '$\gamma$'
hscale /= photon_energy
gainCorrection = xcal.RelativeGainCorrection(
sensorSize,
gain_cnst/const_data["RelativeGain"][..., None],
gain_cnst/const_data["RelativeGainEPix100"][..., None],
nCells=memoryCells,
parallel=run_parallel,
blockSize=blockSize,
gains=None,
)
histCalRelGainCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
if absolute_gain:
histCalAbsGainCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange*hscale,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
```
%% Cell type:code id: tags:
``` python
if pattern_classification :
patternClassifier = xcal.PatternClassifier(
[x, y],
const_data["Noise"],
const_data["NoiseEPix100"],
split_evt_primary_threshold,
split_evt_secondary_threshold,
split_evt_mip_threshold,
tagFirstSingles=0,
nCells=memoryCells,
allowElongated=False,
blockSize=[x, y],
parallel=run_parallel,
)
histCalCSCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize,
)
histCalGainCorClusters = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange*hscale,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
histCalGainCorSingles = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange*hscale,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
```
%% Cell type:markdown id: tags:
## Applying corrections
%% Cell type:code id: tags:
``` python
def correct_train(wid, index, tid, d):
d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
d = np.compress(
np.any(d > 0, axis=(0, 1)), d, axis=2)
# Offset correction.
d -= const_data["Offset"]
d -= const_data["OffsetEPix100"]
histCalOffsetCor.fill(d)
# Common Mode correction.
if common_mode:
# Block CM
d = cmCorrectionB.correct(d)
# Row CM
d = cmCorrectionR.correct(d)
# COL CM
d = cmCorrectionC.correct(d)
histCalCMCor.fill(d)
# relative gain correction.
if relative_gain:
d = gainCorrection.correct(d)
histCalRelGainCor.fill(d)
"""The gain correction is currently applying
an absolute correction (not a relative correction
as the implied by the name);
it changes the scale (the unit of measurement)
of the data from ADU to either keV or n_of_photons.
But the pattern classification relies on comparing
data with the noise map, which is still in ADU.
data with the NoiseEPix100 map, which is still in ADU.
The best solution is to do a relative gain
correction first and apply the global absolute
gain to the data at the end, after clustering.
"""
if pattern_classification:
d_clu, patterns = patternClassifier.classify(d)
d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0
data_clu[index, ...] = np.squeeze(d_clu)
data_patterns[index, ...] = np.squeeze(patterns)
histCalCSCor.fill(d_clu)
# absolute gain correction
# changes data from ADU to keV (or n. of photons)
if absolute_gain:
d = d * gain_cnst
if photon_energy > 0:
d /= photon_energy
histCalAbsGainCor.fill(d)
if pattern_classification:
# Modify pattern classification.
d_clu = d_clu * gain_cnst
if photon_energy > 0:
d_clu /= photon_energy
data_clu[index, ...] = np.squeeze(d_clu)
histCalGainCorClusters.fill(d_clu)
d_sing = d_clu[patterns==100] # pattern 100 corresponds to single photons events
if len(d_sing):
histCalGainCorSingles.fill(d_sing)
data[index, ...] = np.squeeze(d)
histCalCor.fill(d)
```
%% Cell type:code id: tags:
``` python
pixel_data = (instrument_src, "data.image.pixels")
# 10 is a number chosen after testing 1 ... 71 parallel threads
context = psh.context.ThreadContext(num_workers=10)
```
%% Cell type:code id: tags:
``` python
for f in seq_files:
seq_dc = H5File(f)
n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]
# Data shape in seq_dc excluding trains with empty images.
dshape = seq_dc[pixel_data].shape
dataset_chunk = ((chunk_size_idim,) + dshape[1:]) # e.g. (1, pixels_x, pixels_y)
if n_imgs - dshape[0] != 0:
print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")
# This parameter is only used for testing.
if limit_images > 0:
n_imgs = min(n_imgs, limit_images)
data = context.alloc(shape=dshape, dtype=np.float32)
if pattern_classification:
data_clu = context.alloc(shape=dshape, dtype=np.float32)
data_patterns = context.alloc(shape=dshape, dtype=np.int32)
step_timer.start()
context.map(
correct_train, seq_dc.select(
*pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
)
step_timer.done_step(f'Correcting {n_imgs} trains.')
# Store detector h5 information in the corrected file
# and deselect data to correct and store later.
step_timer.start()
out_file = out_folder / f.name.replace("RAW", "CORR")
data_path = "INSTRUMENT/"+instrument_src+"/data/image"
pixels_path = f"{data_path}/pixels"
# First copy all raw data source to the corrected file,
# while excluding the raw data image /data/image/pixels.
with h5py.File(out_file, 'w') as ofile:
# Copy RAW non-calibrated sources.
with h5py.File(f, 'r') as sfile:
h5_copy_except.h5_copy_except_paths(
sfile, ofile,
[pixels_path])
# Create dataset in CORR h5 file and add corrected images.
dataset = ofile.create_dataset(
pixels_path,
data=data,
chunks=dataset_chunk,
dtype=np.float32)
if pattern_classification:
# Save /data/image/pixels_classified in corrected file.
datasetc = ofile.create_dataset(
f"{data_path}/pixels_classified",
data=data_clu,
chunks=dataset_chunk,
dtype=np.float32)
# Save /data/image/patterns in corrected file.
datasetp = ofile.create_dataset(
f"{data_path}/patterns",
data=data_patterns,
chunks=dataset_chunk,
dtype=np.int32)
step_timer.done_step('Storing data.')
```
%% Cell type:code id: tags:
``` python
ho, eo, co, so = histCalCor.get()
d = [{
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Total corr.'
}]
ho, eo, co, so = histCalOffsetCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Offset corr.'
})
if common_mode:
ho, eo, co, so = histCalCMCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'CM corr.'
})
if relative_gain :
ho, eo, co, so = histCalRelGainCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Relative gain corr.'
})
if pattern_classification:
ho, eo, co, so = histCalCSCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Charge sharing corr.'
})
fig = xana.simplePlot(
d, aspect=1, x_label=f'Energy (ADU)',
y_label='Number of occurrences', figsize='2col',
y_log=True, x_range=(-50, 500),
legend='top-center-frame-2col',
)
plt.title(f'run {run} - {karabo_da}')
plt.grid()
```
%% Cell type:code id: tags:
``` python
if absolute_gain :
d=[]
ho, eo, co, so = histCalAbsGainCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Absolute gain corr.'
})
if pattern_classification:
ho, eo, co, so = histCalGainCorClusters.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Charge sharing corr.'
})
ho, eo, co, so = histCalGainCorSingles.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Isolated photons (singles)'
})
fig = xana.simplePlot(
d, aspect=1, x_label=f'Energy ({plot_unit})',
y_label='Number of occurrences', figsize='2col',
y_log=True,
x_range=np.array((-50, 500))*hscale,
legend='top-center-frame-2col',
)
plt.grid()
plt.title(f'run {run} - {karabo_da}')
```
%% Cell type:markdown id: tags:
## Mean Image of the corrected data
%% Cell type:code id: tags:
``` python
step_timer.start()
fig = xana.heatmapPlot(
np.nanmedian(data, axis=0),
x_label='Columns', y_label='Rows',
lut_label=f'Signal ({plot_unit})',
x_range=(0, y),
y_range=(0, x),
vmin=-50, vmax=50)
step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
```
%% Cell type:markdown id: tags:
## Single Shot of the corrected data
%% Cell type:code id: tags:
``` python
step_timer.start()
fig = xana.heatmapPlot(
data[0, ...],
x_label='Columns', y_label='Rows',
lut_label=f'Signal ({plot_unit})',
x_range=(0, y),
y_range=(0, x),
vmin=-50, vmax=50)
step_timer.done_step(f'Plotting single shot of corrected data.')
```
Loading