Skip to content
Snippets Groups Projects

[AGIPD][CORRECT] Use calcat_interface and remove precorrection notebook

Merged Karim Ahmed requested to merge AGIPD_calcat_interface into master
1 file
+ 3
2
Compare changes
  • Side-by-side
  • Inline
%% Cell type:markdown id: tags:
# ePix100 Data Correction
Author: European XFEL Detector Group, Version: 2.0
The following notebook provides data correction of images acquired with the ePix100 detector.
The sequence of correction applied are:
Offset --> Common Mode Noise --> Relative Gain --> Charge Sharing --> Absolute Gain.
Offset, common mode and gain corrected data is saved to /data/image/pixels in the CORR files.
If pattern classification is applied (charge sharing correction), this data will be saved to /data/image/pixels_classified, while the corresponding patterns will be saved to /data/image/patterns in the CORR files.
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/HED/202202/p003121/raw" # input folder, required
out_folder = "" # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
run = 156 # which run to read data from, required
# Parameters for accessing the raw data.
karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
karabo_da = "EPIX01" # data aggregators
db_module = "" # module id in the database
receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files
# Parameters affecting writing corrected data.
chunk_size_idim = 1 # H5 chunking size of output data
# Only for testing
limit_images = 0 # ONLY FOR TESTING. process only first N images, 0 - process all.
# Parameters for the calibration database.
cal_db_interface = "tcp://max-exfl016:8015#8025" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
creation_time = "" # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
# Conditions for retrieving calibration constants.
bias_voltage = 200 # bias voltage
in_vacuum = False # detector operated in vacuum
integration_time = -1 # Detector integration time, Default value -1 to use the value from the slow data.
fix_temperature = -1 # fixed temperature value in Kelvin, Default value -1 to use the value from files.
gain_photon_energy = 8.048 # Photon energy used for gain calibration
photon_energy = 0. # Photon energy to calibrate in number of photons, 0 for calibration in keV
# Flags to select type of applied corrections.
pattern_classification = True # do clustering.
relative_gain = True # Apply relative gain correction.
absolute_gain = True # Apply absolute gain correction (implies relative gain).
common_mode = True # Apply common mode correction.
# Parameters affecting applied correction.
cm_min_frac = 0.25 # No CM correction is performed if after masking the ratio of good pixels falls below this
cm_noise_sigma = 5. # CM correction noise standard deviation
split_evt_primary_threshold = 7. # primary threshold for split event correction
split_evt_secondary_threshold = 5. # secondary threshold for split event correction
split_evt_mip_threshold = 1000. # minimum ionizing particle threshold
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
import tabulate
import warnings
from logging import warning
import h5py
import pasha as psh
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Latex, display
from extra_data import RunDirectory, H5File
from pathlib import Path
from XFELDetAna import xfelpyanatools as xana
from XFELDetAna import xfelpycaltools as xcal
from calibration_client import CalibrationClient
from cal_tools import h5_copy_except, restful_config
from cal_tools.epix100 import epix100lib
from cal_tools import h5_copy_except
from cal_tools.calcat_interface import EPIX100_CalibrationData
from cal_tools.epix100 import epix100lib
from cal_tools.restful_config import restful_config
from cal_tools.tools import (
calcat_creation_time,
load_constants_dict,
CalibrationMetadata,
)
from cal_tools.step_timing import StepTimer
warnings.filterwarnings('ignore')
prettyPlotting = True
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
x = 708 # rows of the ePix100
y = 768 # columns of the ePix100
if absolute_gain:
relative_gain = True
plot_unit = 'ADU'
```
%% Cell type:code id: tags:
``` python
in_folder = Path(in_folder)
out_folder = Path(out_folder)
out_folder.mkdir(parents=True, exist_ok=True)
run_folder = in_folder / f"r{run:04d}"
instrument_src = instrument_source_template.format(
karabo_id, receiver_template)
print(f"Correcting run: {run_folder}")
print(f"Instrument H5File source: {instrument_src}")
print(f"Data corrected files are stored at: {out_folder}")
```
%% Cell type:code id: tags:
``` python
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f"Using {creation_time.isoformat()} as creation time")
metadata = CalibrationMetadata(metadata_folder or out_folder)
# Constant paths are saved under retrieved-constants in calibration_metadata.yml.
# NOTE: this notebook shouldn't overwrite calibration metadata file.
const_yaml = metadata.get("retrieved-constants", {})
```
%% Cell type:code id: tags:
``` python
run_dc = RunDirectory(run_folder, _use_voview=False)
seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]
# If a set of sequences requested to correct,
# adapt seq_files list.
if sequences != [-1]:
seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]
if not len(seq_files):
raise IndexError("No sequence files available for the selected sequences.")
print(f"Processing a total of {len(seq_files)} sequence files")
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id: tags:
``` python
step_timer.start()
sensorSize = [x, y]
# Sensor area will be analysed according to blocksize
blockSize = [sensorSize[0]//2, sensorSize[1]//2]
xcal.defaultBlockSize = blockSize
memoryCells = 1 # ePIX has no memory cells
run_parallel = False
# Read control data.
ctrl_data = epix100lib.epix100Ctrl(
run_dc=run_dc,
instrument_src=f"{karabo_id}/DET/{receiver_template}:daqOutput",
ctrl_src=f"{karabo_id}/DET/CONTROL",
)
if integration_time < 0:
integration_time = ctrl_data.get_integration_time()
integration_time_str_add = ""
else:
integration_time_str_add = "(manual input)"
if fix_temperature < 0:
temperature = ctrl_data.get_temprature()
temperature_k = temperature + 273.15
temp_str_add = ""
else:
temperature_k = fix_temperature
temperature = fix_temperature - 273.15
temp_str_add = "(manual input)"
print(f"Bias voltage is {bias_voltage} V")
print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
print(f"Operated in vacuum: {in_vacuum}")
```
%% Cell type:code id: tags:
``` python
# Table of sequence files to process
table = [(k, f) for k, f in enumerate(seq_files)]
if len(table):
md = display(Latex(tabulate.tabulate(
table,
tablefmt='latex',
headers=["#", "file"]
)))
```
%% Cell type:code id: tags:
``` python
# Connect to CalCat.
calcat_config = restful_config['calcat']
client = CalibrationClient(
base_api_url=calcat_config['base-api-url'],
use_oauth2=calcat_config['use-oauth2'],
client_id=calcat_config['user-id'],
client_secret=calcat_config['user-secret'],
user_email=calcat_config['user-email'],
token_url=calcat_config['token-url'],
refresh_url=calcat_config['refresh-url'],
auth_url=calcat_config['auth-url'],
scope='')
```
%% Cell type:markdown id: tags:
## Retrieving calibration constants
As a first step, dark maps have to be loaded.
%% Cell type:code id: tags:
``` python
constant_names = ["OffsetEPix100", "NoiseEPix100"]
if relative_gain:
constant_names += ["RelativeGainEPix100"]
if const_yaml: # Used while reproducing corrected data.
print(f"Using stored constants in {metadata.filename}")
const_data, _ = load_constants_dict(const_yaml[karabo_da]["constants"])
else:
epix_cal = EPIX100_CalibrationData(
detector_name=karabo_id,
sensor_bias_voltage=bias_voltage,
integration_time=integration_time,
sensor_temperature=temperature_k,
in_vacuum=in_vacuum,
source_energy=gain_photon_energy,
event_at=creation_time,
snapshot_at=creation_time,
client=client,
)
const_data = epix_cal.ndarray_map(calibrations=constant_names)[karabo_da]
# Validate the constants availability and raise/warn correspondingly.
missing_dark_constants = set(
c for c in ["OffsetEPix100", "NoiseEPix100"] if c not in const_data.keys())
if missing_dark_constants:
raise KeyError(
f"Dark constants {missing_dark_constants} are not available for correction.")
if relative_gain and "RelativeGainEPix100" not in const_data.keys():
warning("RelativeGainEPix100 is not found in the calibration database.")
relative_gain = False
absolute_gain = False
```
%% Cell type:code id: tags:
``` python
# Initializing some parameters.
hscale = 1
stats = True
hrange = np.array([-50, 1000])
nbins = hrange[1] - hrange[0]
commonModeBlockSize = [x//2, y//2]
```
%% Cell type:code id: tags:
``` python
histCalOffsetCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
# *****************Histogram Calculators****************** #
histCalCor = xcal.HistogramCalculator(
sensorSize,
bins=1050,
range=[-50, 1000],
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
```
%% Cell type:code id: tags:
``` python
if common_mode:
histCalCMCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize,
)
cmCorrectionB = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='block',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
cmCorrectionR = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='row',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
cmCorrectionC = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='col',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
```
%% Cell type:code id: tags:
``` python
if relative_gain:
gain_cnst = np.median(const_data["RelativeGainEPix100"])
hscale = gain_cnst
plot_unit = 'keV'
if photon_energy > 0:
plot_unit = '$\gamma$'
hscale /= photon_energy
gainCorrection = xcal.RelativeGainCorrection(
sensorSize,
gain_cnst/const_data["RelativeGainEPix100"][..., None],
nCells=memoryCells,
parallel=run_parallel,
blockSize=blockSize,
gains=None,
)
histCalRelGainCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
if absolute_gain:
histCalAbsGainCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange*hscale,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
```
%% Cell type:code id: tags:
``` python
if pattern_classification :
patternClassifier = xcal.PatternClassifier(
[x, y],
const_data["NoiseEPix100"],
split_evt_primary_threshold,
split_evt_secondary_threshold,
split_evt_mip_threshold,
tagFirstSingles=0,
nCells=memoryCells,
allowElongated=False,
blockSize=[x, y],
parallel=run_parallel,
)
histCalCSCor = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize,
)
histCalGainCorClusters = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange*hscale,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
histCalGainCorSingles = xcal.HistogramCalculator(
sensorSize,
bins=nbins,
range=hrange*hscale,
parallel=run_parallel,
nCells=memoryCells,
blockSize=blockSize
)
```
%% Cell type:markdown id: tags:
## Applying corrections
%% Cell type:code id: tags:
``` python
def correct_train(wid, index, tid, d):
d = d[pixel_data[0]][pixel_data[1]][..., np.newaxis].astype(np.float32)
d = np.compress(
np.any(d > 0, axis=(0, 1)), d, axis=2)
# Offset correction.
d -= const_data["OffsetEPix100"]
histCalOffsetCor.fill(d)
# Common Mode correction.
if common_mode:
# Block CM
d = cmCorrectionB.correct(d)
# Row CM
d = cmCorrectionR.correct(d)
# COL CM
d = cmCorrectionC.correct(d)
histCalCMCor.fill(d)
# relative gain correction.
if relative_gain:
d = gainCorrection.correct(d)
histCalRelGainCor.fill(d)
"""The gain correction is currently applying
an absolute correction (not a relative correction
as the implied by the name);
it changes the scale (the unit of measurement)
of the data from ADU to either keV or n_of_photons.
But the pattern classification relies on comparing
data with the NoiseEPix100 map, which is still in ADU.
The best solution is to do a relative gain
correction first and apply the global absolute
gain to the data at the end, after clustering.
"""
if pattern_classification:
d_clu, patterns = patternClassifier.classify(d)
d_clu[d_clu < (split_evt_primary_threshold*const_data["Noise"])] = 0
data_clu[index, ...] = np.squeeze(d_clu)
data_patterns[index, ...] = np.squeeze(patterns)
histCalCSCor.fill(d_clu)
# absolute gain correction
# changes data from ADU to keV (or n. of photons)
if absolute_gain:
d = d * gain_cnst
if photon_energy > 0:
d /= photon_energy
histCalAbsGainCor.fill(d)
if pattern_classification:
# Modify pattern classification.
d_clu = d_clu * gain_cnst
if photon_energy > 0:
d_clu /= photon_energy
data_clu[index, ...] = np.squeeze(d_clu)
histCalGainCorClusters.fill(d_clu)
d_sing = d_clu[patterns==100] # pattern 100 corresponds to single photons events
if len(d_sing):
histCalGainCorSingles.fill(d_sing)
data[index, ...] = np.squeeze(d)
histCalCor.fill(d)
```
%% Cell type:code id: tags:
``` python
pixel_data = (instrument_src, "data.image.pixels")
# 10 is a number chosen after testing 1 ... 71 parallel threads
context = psh.context.ThreadContext(num_workers=10)
```
%% Cell type:code id: tags:
``` python
for f in seq_files:
seq_dc = H5File(f)
n_imgs = seq_dc.get_data_counts(*pixel_data).shape[0]
# Data shape in seq_dc excluding trains with empty images.
dshape = seq_dc[pixel_data].shape
dataset_chunk = ((chunk_size_idim,) + dshape[1:]) # e.g. (1, pixels_x, pixels_y)
if n_imgs - dshape[0] != 0:
print(f"- WARNING: {f} has {n_imgs - dshape[0]} trains with empty data.")
# This parameter is only used for testing.
if limit_images > 0:
n_imgs = min(n_imgs, limit_images)
data = context.alloc(shape=dshape, dtype=np.float32)
if pattern_classification:
data_clu = context.alloc(shape=dshape, dtype=np.float32)
data_patterns = context.alloc(shape=dshape, dtype=np.int32)
step_timer.start()
context.map(
correct_train, seq_dc.select(
*pixel_data, require_all=True).select_trains(np.s_[:n_imgs])
)
step_timer.done_step(f'Correcting {n_imgs} trains.')
# Store detector h5 information in the corrected file
# and deselect data to correct and store later.
step_timer.start()
out_file = out_folder / f.name.replace("RAW", "CORR")
data_path = "INSTRUMENT/"+instrument_src+"/data/image"
pixels_path = f"{data_path}/pixels"
# First copy all raw data source to the corrected file,
# while excluding the raw data image /data/image/pixels.
with h5py.File(out_file, 'w') as ofile:
# Copy RAW non-calibrated sources.
with h5py.File(f, 'r') as sfile:
h5_copy_except.h5_copy_except_paths(
sfile, ofile,
[pixels_path])
# Create dataset in CORR h5 file and add corrected images.
dataset = ofile.create_dataset(
pixels_path,
data=data,
chunks=dataset_chunk,
dtype=np.float32)
if pattern_classification:
# Save /data/image/pixels_classified in corrected file.
datasetc = ofile.create_dataset(
f"{data_path}/pixels_classified",
data=data_clu,
chunks=dataset_chunk,
dtype=np.float32)
# Save /data/image/patterns in corrected file.
datasetp = ofile.create_dataset(
f"{data_path}/patterns",
data=data_patterns,
chunks=dataset_chunk,
dtype=np.int32)
step_timer.done_step('Storing data.')
```
%% Cell type:code id: tags:
``` python
ho, eo, co, so = histCalCor.get()
d = [{
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Total corr.'
}]
ho, eo, co, so = histCalOffsetCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Offset corr.'
})
if common_mode:
ho, eo, co, so = histCalCMCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'CM corr.'
})
if relative_gain :
ho, eo, co, so = histCalRelGainCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Relative gain corr.'
})
if pattern_classification:
ho, eo, co, so = histCalCSCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Charge sharing corr.'
})
fig = xana.simplePlot(
d, aspect=1, x_label=f'Energy (ADU)',
y_label='Number of occurrences', figsize='2col',
y_log=True, x_range=(-50, 500),
legend='top-center-frame-2col',
)
plt.title(f'run {run} - {karabo_da}')
plt.grid()
```
%% Cell type:code id: tags:
``` python
if absolute_gain :
d=[]
ho, eo, co, so = histCalAbsGainCor.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Absolute gain corr.'
})
if pattern_classification:
ho, eo, co, so = histCalGainCorClusters.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Charge sharing corr.'
})
ho, eo, co, so = histCalGainCorSingles.get()
d.append({
'x': co,
'y': ho,
'y_err': np.sqrt(ho[:]),
'drawstyle': 'steps-mid',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Isolated photons (singles)'
})
fig = xana.simplePlot(
d, aspect=1, x_label=f'Energy ({plot_unit})',
y_label='Number of occurrences', figsize='2col',
y_log=True,
x_range=np.array((-50, 500))*hscale,
legend='top-center-frame-2col',
)
plt.grid()
plt.title(f'run {run} - {karabo_da}')
```
%% Cell type:markdown id: tags:
## Mean Image of the corrected data
%% Cell type:code id: tags:
``` python
step_timer.start()
fig = xana.heatmapPlot(
np.nanmedian(data, axis=0),
x_label='Columns', y_label='Rows',
lut_label=f'Signal ({plot_unit})',
x_range=(0, y),
y_range=(0, x),
vmin=-50, vmax=50)
step_timer.done_step(f'Plotting mean image of {data.shape[0]} trains.')
```
%% Cell type:markdown id: tags:
## Single Shot of the corrected data
%% Cell type:code id: tags:
``` python
step_timer.start()
fig = xana.heatmapPlot(
data[0, ...],
x_label='Columns', y_label='Rows',
lut_label=f'Signal ({plot_unit})',
x_range=(0, y),
y_range=(0, x),
vmin=-50, vmax=50)
step_timer.done_step(f'Plotting single shot of corrected data.')
```
Loading