Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • calibration/pycalibration
1 result
Show changes
Showing
with 1233 additions and 192 deletions
%% Cell type:markdown id: tags:
# ePix100 Data Correction
Author: European XFEL Detector Group, Version: 2.0
The following notebook provides data correction of images acquired with the ePix100 detector.
The sequence of correction applied are:
Offset --> Common Mode Noise --> Relative Gain --> Charge Sharing --> Absolute Gain.
Offset, common mode and gain corrected data is saved to /data/image/pixels in the CORR files.
If pattern classification is applied (charge sharing correction), this data will be saved to /data/image/pixels_classified, while the corresponding patterns will be saved to /data/image/patterns in the CORR files.
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/HED/202102/p002739/raw" # input folder, required
out_folder = "" # output folder, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
run = 38 # which run to read data from, required
# Parameters for accessing the raw data.
karabo_id = "HED_IA1_EPX100-1" # karabo karabo_id
karabo_da = "EPIX01" # data aggregators
db_module = "" # module id in the database
receiver_template = "RECEIVER" # detector receiver template for accessing raw data files
path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data
instrument_source_template = '{}/DET/{}:daqOutput' # instrument detector data source in h5files
# Parameters affecting writing corrected data.
chunk_size_idim = 1 # H5 chunking size of output data
limit_trains = 0 # Process only first N images, 0 - process all.
# Parameters for the calibration database.
cal_db_interface = "tcp://max-exfl-cal001:8015#8025" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
creation_time = "" # The timestamp to use with Calibration DBe. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
# Conditions for retrieving calibration constants.
bias_voltage = 200 # bias voltage
in_vacuum = False # detector operated in vacuum
integration_time = -1 # Detector integration time, Default value -1 to use the value from the slow data.
fix_temperature = -1 # fixed temperature value in Kelvin, Default value -1 to use the value from files.
gain_photon_energy = 8.048 # Photon energy used for gain calibration
photon_energy = 0. # Photon energy to calibrate in number of photons, 0 for calibration in keV
# Flags to select type of applied corrections.
pattern_classification = True # do clustering.
relative_gain = True # Apply relative gain correction.
absolute_gain = True # Apply absolute gain correction (implies relative gain).
common_mode = True # Apply common mode correction.
# Parameters affecting applied correction.
cm_min_frac = 0.25 # No CM correction is performed if after masking the ratio of good pixels falls below this
cm_noise_sigma = 5. # CM correction noise standard deviation
split_evt_primary_threshold = 7. # primary threshold for split event correction
split_evt_secondary_threshold = 5. # secondary threshold for split event correction
split_evt_mip_threshold = 1000. # minimum ionizing particle threshold
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
import tabulate
import warnings
from logging import warning
from sys import exit
import h5py
import pasha as psh
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Latex, Markdown, display
from extra_data import RunDirectory, H5File
from extra_geom import Epix100Geometry
from mpl_toolkits.axes_grid1 import make_axes_locatable
from pathlib import Path
import cal_tools.restful_config as rest_cfg
from XFELDetAna import xfelpycaltools as xcal
from cal_tools.calcat_interface import EPIX100_CalibrationData, CalCatError
from cal_tools.epix100 import epix100lib
from cal_tools.files import DataFile
from cal_tools.tools import (
calcat_creation_time,
write_constants_fragment,
)
from cal_tools.step_timing import StepTimer
warnings.filterwarnings('ignore')
prettyPlotting = True
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
x = 708 # rows of the ePix100
y = 768 # columns of the ePix100
if absolute_gain:
relative_gain = True
plot_unit = 'ADU'
```
%% Cell type:code id: tags:
``` python
prop_str = in_folder[in_folder.find('/p')+1:in_folder.find('/p')+8]
in_folder = Path(in_folder)
out_folder = Path(out_folder)
out_folder.mkdir(parents=True, exist_ok=True)
run_folder = in_folder / f"r{run:04d}"
instrument_src = instrument_source_template.format(
karabo_id, receiver_template)
print(f"Correcting run: {run_folder}")
print(f"Instrument H5File source: {instrument_src}")
print(f"Data corrected files are stored at: {out_folder}")
```
%% Cell type:code id: tags:
``` python
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f"Using {creation_time.isoformat()} as creation time")
```
%% Cell type:code id: tags:
``` python
run_dc = RunDirectory(run_folder, _use_voview=False)
seq_files = [Path(f.filename) for f in run_dc.select(f"*{karabo_id}*").files]
# If a set of sequences requested to correct,
# adapt seq_files list.
if sequences != [-1]:
seq_files = [f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)]
if not len(seq_files):
raise IndexError("No sequence files available for the selected sequences.")
print(f"Processing a total of {len(seq_files)} sequence files")
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id: tags:
``` python
step_timer.start()
sensorSize = [x, y]
# Sensor area will be analysed according to blocksize
blockSize = [sensorSize[0]//2, sensorSize[1]//2]
xcal.defaultBlockSize = blockSize
memoryCells = 1 # ePIX has no memory cells
run_parallel = False
# Read control data.
ctrl_data = epix100lib.epix100Ctrl(
run_dc=run_dc,
instrument_src=instrument_src,
ctrl_src=f"{karabo_id}/DET/CONTROL",
)
if integration_time < 0:
integration_time = ctrl_data.get_integration_time()
integration_time_str_add = ""
else:
integration_time_str_add = "(manual input)"
if fix_temperature < 0:
temperature = ctrl_data.get_temprature()
temperature_k = temperature + 273.15
temp_str_add = ""
else:
temperature_k = fix_temperature
temperature = fix_temperature - 273.15
temp_str_add = "(manual input)"
print(f"Bias voltage is {bias_voltage} V")
print(f"Detector integration time is set to {integration_time} \u03BCs {integration_time_str_add}")
print(f"Mean temperature: {temperature:0.2f}°C / {temperature_k:0.2f} K {temp_str_add}")
print(f"Operated in vacuum: {in_vacuum}")
```
%% Cell type:code id: tags:
``` python
# Table of sequence files to process
table = [(k, f) for k, f in enumerate(seq_files)]
if len(table):
md = display(Latex(tabulate.tabulate(
table,
tablefmt='latex',
headers=["#", "file"]
)))
```
%% Cell type:markdown id: tags:
## Retrieving calibration constants
As a first step, dark maps have to be loaded.
%% Cell type:code id: tags:
``` python
epix_cal = EPIX100_CalibrationData(
detector_name=karabo_id,
sensor_bias_voltage=bias_voltage,
integration_time=integration_time,
sensor_temperature=temperature_k,
in_vacuum=in_vacuum,
source_energy=gain_photon_energy,
event_at=creation_time,
client=rest_cfg.calibration_client(),
)
const_metadata = epix_cal.metadata(calibrations=epix_cal.dark_calibrations)
if relative_gain:
try:
metadata = epix_cal.metadata(epix_cal.illuminated_calibrations)
for key, value in metadata.items():
const_metadata.setdefault(key, {}).update(value)
except CalCatError as e:
warning(f"CalCatError: {e}")
# Display retrieved calibration constants timestamps
epix_cal.display_markdown_retrieved_constants(metadata=const_metadata)
# Load the constant data from files
const_data = epix_cal.ndarray_map(metadata=const_metadata)[karabo_da]
# Validate the constants availability and raise/warn correspondingly.
missing_dark_constants = {"OffsetEPix100", "NoiseEPix100"} - set(const_data)
if missing_dark_constants:
raise ValueError(
f"Dark constants {missing_dark_constants} are not available to correct {karabo_da}."
"No correction is performed!")
if relative_gain and "RelativeGainEPix100" not in const_data.keys():
warning("RelativeGainEPix100 is not found in the calibration database.")
relative_gain = False
absolute_gain = False
```
%% Cell type:code id: tags:
``` python
# Record constant details in YAML metadata
write_constants_fragment(
out_folder=(metadata_folder or out_folder),
det_metadata=const_metadata,
caldb_root=epix_cal.caldb_root,
)
```
%% Cell type:code id: tags:
``` python
# Initializing some parameters.
hscale = 1
stats = True
bins = np.arange(-50,1000)
hist = {'O': 0} # dictionary to store histograms
```
%% Cell type:code id: tags:
``` python
if common_mode:
commonModeBlockSize = [x//2, y//8]
cmCorrectionB = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='block',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
cmCorrectionR = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='row',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
cmCorrectionC = xcal.CommonModeCorrection(
shape=sensorSize,
blockSize=commonModeBlockSize,
orientation='col',
nCells=memoryCells,
noiseMap=const_data['NoiseEPix100'],
runParallel=run_parallel,
parallel=run_parallel,
stats=stats,
minFrac=cm_min_frac,
noiseSigma=cm_noise_sigma,
)
hist['CM'] = 0
```
%% Cell type:code id: tags:
``` python
if relative_gain:
# gain constant is given by the mode of the gain map
# because all bad pixels are masked using this value
_vals,_counts = np.unique(const_data["RelativeGainEPix100"], return_counts=True)
gain_cnst = _vals[np.argmax(_counts)]
gainCorrection = xcal.RelativeGainCorrection(
sensorSize,
gain_cnst/const_data["RelativeGainEPix100"][..., None],
nCells=memoryCells,
parallel=run_parallel,
blockSize=blockSize,
gains=None,
)
hist['RG'] = 0
if absolute_gain:
hscale = gain_cnst
plot_unit = 'keV'
if photon_energy > 0:
plot_unit = '$\gamma$'
hscale /= photon_energy
hist['AG'] = 0
```
%% Cell type:code id: tags:
``` python
if pattern_classification :
patternClassifier = xcal.PatternClassifier(
[x, y],
const_data["NoiseEPix100"],
split_evt_primary_threshold,
split_evt_secondary_threshold,
split_evt_mip_threshold,
tagFirstSingles=0,
nCells=memoryCells,
allowElongated=False,
blockSize=[x, y],
parallel=run_parallel,
)
hist['CS'] = 0
hist['S'] = 0
```
%% Cell type:markdown id: tags:
## Applying corrections
%% Cell type:code id: tags:
``` python
def correct_train(wid, index, tid, d):
d = d[..., np.newaxis].astype(np.float32)
d = np.compress(
np.any(d > 0, axis=(0, 1)), d, axis=2)
# Offset correction.
d -= const_data["OffsetEPix100"]
hist['O'] += np.histogram(d,bins=bins)[0]
# Common Mode correction.
if common_mode:
# Block CM
d = cmCorrectionB.correct(d)
# Row CM
d = cmCorrectionR.correct(d)
# COL CM
d = cmCorrectionC.correct(d)
hist['CM'] += np.histogram(d,bins=bins)[0]
# Relative gain correction.
if relative_gain:
d = gainCorrection.correct(d)
hist['RG'] += np.histogram(d,bins=bins)[0]
"""The gain correction is currently applying
an absolute correction (not a relative correction
as the implied by the name);
it changes the scale (the unit of measurement)
of the data from ADU to either keV or n_of_photons.
But the pattern classification relies on comparing
data with the NoiseEPix100 map, which is still in ADU.
The best solution is to do a relative gain
correction first and apply the global absolute
gain to the data at the end, after clustering.
"""
if pattern_classification:
d_clu, patterns = patternClassifier.classify(d)
d_clu[d_clu < (split_evt_primary_threshold*const_data["NoiseEPix100"])] = 0
data_clu[index, ...] = np.squeeze(d_clu)
data_patterns[index, ...] = np.squeeze(patterns)
hist['CS'] += np.histogram(d_clu,bins=bins)[0]
d_sing = d_clu[patterns==100] # pattern 100 corresponds to single photons events
if len(d_sing):
hist['S'] += np.histogram(d_sing,bins=bins)[0]
# Absolute gain correction
# changes data from ADU to keV (or n. of photons)
if absolute_gain:
d = d * gain_cnst
if photon_energy > 0:
d /= photon_energy
hist['AG'] += np.histogram(d,bins=bins)[0]
if pattern_classification:
# Modify pattern classification.
d_clu = d_clu * gain_cnst
if photon_energy > 0:
d_clu /= photon_energy
data_clu[index, ...] = np.squeeze(d_clu)
data[index, ...] = np.squeeze(d)
```
%% Cell type:code id: tags:
``` python
# 10 is a number chosen after testing 1 ... 71 parallel threads
context = psh.context.ThreadContext(num_workers=10)
```
%% Cell type:code id: tags:
``` python
empty_seq = 0
for f in seq_files:
seq_dc = H5File(f)
# Save corrected data in an output file with name
# of corresponding raw sequence file.
out_file = out_folder / f.name.replace("RAW", "CORR")
# Data shape in seq_dc excluding trains with empty images.
ishape = seq_dc[instrument_src, "data.image.pixels"].shape
corr_ntrains = ishape[0]
all_train_ids = seq_dc.train_ids
# Raise a WARNING if this sequence has no trains to correct.
# Otherwise, print number of trains with no data.
if corr_ntrains == 0:
warning(f"No trains to correct for {f.name}: "
"Skipping the processing of this file.")
empty_seq += 1
continue
elif len(all_train_ids) != corr_ntrains:
print(f"{f.name} has {len(all_train_ids) - corr_ntrains} trains with missing data.")
# This parameter is only used for testing.
if limit_trains > 0:
print(f"\nCorrected trains are limited to: {limit_trains} trains")
corr_ntrains = min(corr_ntrains, limit_trains)
oshape = (corr_ntrains, *ishape[1:])
data = context.alloc(shape=oshape, dtype=np.float32)
if pattern_classification:
data_clu = context.alloc(shape=oshape, dtype=np.float32)
data_patterns = context.alloc(shape=oshape, dtype=np.int32)
step_timer.start() # Correct data.
# Overwrite seq_dc after eliminating empty trains or/and applying limited images.
seq_dc = seq_dc.select(
instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])
pixel_data = seq_dc[instrument_src, "data.image.pixels"]
context.map(correct_train, pixel_data)
step_timer.done_step(f'Correcting {corr_ntrains} trains.')
step_timer.start() # Write corrected data.
# Create CORR files and add corrected data sections.
image_counts = seq_dc[instrument_src, "data.image.pixels"].data_counts(labelled=False)
# Write corrected data.
with DataFile(out_file, "w") as ofile:
dataset_chunk = ((chunk_size_idim,) + oshape[1:]) # e.g. (1, pixels_x, pixels_y)
seq_file = seq_dc.files[0] # FileAccess
# Create INDEX datasets.
ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
# Create METDATA datasets
ofile.create_metadata(
like=seq_dc,
sequence=seq_dc.run_metadata()["sequenceNumber"],
sequence=seq_file.sequence,
instrument_channels=(f'{instrument_src}/data',)
)
# Create Instrument section to later add corrected datasets.
outp_source = ofile.create_instrument_source(instrument_src)
# Create count/first datasets at INDEX source.
outp_source.create_index(data=image_counts)
image_raw_fields = [ # /data/image/
"binning", "bitsPerPixel", "dimTypes", "dims",
"encoding", "flipX", "flipY", "roiOffsets", "rotation",
]
for field in image_raw_fields:
field_arr = seq_dc[instrument_src, f"data.image.{field}"].ndarray()
outp_source.create_key(
f"data.image.{field}", data=field_arr,
chunks=(chunk_size_idim, *field_arr.shape[1:]))
# Add main corrected `data.image.pixels` dataset and store corrected data.
outp_source.create_key(
"data.image.pixels", data=data, chunks=dataset_chunk)
outp_source.create_key(
"data.trainId", data=seq_dc.train_ids, chunks=min(50, len(seq_dc.train_ids)))
if np.isin('data.pulseId', list(seq_dc[instrument_src].keys())): # some runs are missing 'data.pulseId'
outp_source.create_key(
"data.pulseId", data=list(seq_dc[instrument_src]['data.pulseId'].ndarray().squeeze()), chunks=min(50, len(seq_dc.train_ids)))
"data.pulseId",
data=list(seq_dc[instrument_src]['data.pulseId'].ndarray()[:, 0]),
chunks=min(50, len(seq_dc.train_ids)),
)
if pattern_classification:
# Add main corrected `data.image.pixels` dataset and store corrected data.
outp_source.create_key(
"data.image.pixels_classified", data=data_clu, chunks=dataset_chunk)
outp_source.create_key(
"data.image.patterns", data=data_patterns, chunks=dataset_chunk)
step_timer.done_step('Storing data.')
if empty_seq == len(seq_files):
warning("No valid trains for RAW data to correct.")
exit(0)
```
%% Cell type:markdown id: tags:
## Plot Histograms
%% Cell type:code id: tags:
``` python
bins_ADU = bins[:-1]+np.diff(bins)[0]/2
bins_keV = bins_ADU*hscale
```
%% Cell type:code id: tags:
``` python
# Histogram in ADU
plt.figure(figsize=(12,8))
plt.plot(bins_ADU,hist['O'], label='Offset corr')
if common_mode:
plt.plot(bins_ADU,hist['CM'], label='CM corr')
if relative_gain:
plt.plot(bins_ADU,hist['RG'], label='Relative Gain corr')
if pattern_classification:
plt.plot(bins_ADU[bins_ADU>10],hist['CS'][bins_ADU>10], label='Charge Sharing corr')
if np.any(hist['S']):
plt.plot(bins_ADU,hist['S'], label='Singles')
xtick_step = 50
plt.xlim(bins[0], bins[-1]+1)
plt.xticks(np.arange(bins[0],bins[-1]+2,xtick_step))
plt.xlabel('ADU',fontsize=12)
plt.yscale('log')
plt.title(f'{karabo_id} | {prop_str}, r{run}', fontsize=14, fontweight='bold')
plt.legend(fontsize=12)
plt.grid(ls=':')
```
%% Cell type:code id: tags:
``` python
# Histogram in keV/number of photons
if absolute_gain:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
plt.figure(figsize=(12,8))
if relative_gain:
plt.plot(bins_keV,hist['RG'], label='Absolute Gain corr', c=colors[2])
if pattern_classification:
plt.plot(bins_keV[bins_keV>.5],hist['CS'][bins_keV>.5], label='Charge Sharing corr', c=colors[3])
if np.any(hist['S']):
plt.plot(bins_keV[bins_keV>.5],hist['S'][bins_keV>.5], label='Singles', c=colors[4])
if photon_energy==0: # if keV instead of #photons
xtick_step = 5
plt.xlim(left=-2)
plt.xticks(np.arange(0,plt.gca().get_xlim()[1],xtick_step))
plt.xlabel(plot_unit,fontsize=12)
plt.yscale('log')
plt.title(f'{karabo_id} | {prop_str}, r{run}', fontsize=14, fontweight='bold')
plt.legend(fontsize=12)
plt.grid(ls=':')
```
%% Cell type:markdown id: tags:
## Mean Image of the corrected data
%% Cell type:code id: tags:
``` python
geom = Epix100Geometry.from_relative_positions(top=[386.5, 364.5, 0.], bottom=[386.5, -12.5, 0.])
if pattern_classification:
plt.subplots(1,2,figsize=(18,18)) if pattern_classification else plt.subplots(1,1,figsize=(9,9))
ax = plt.subplot(1,2,1)
ax.set_title(f'Before CS correction',fontsize=12,fontweight='bold');
else:
plt.subplots(1,1,figsize=(9,9))
ax = plt.subplot(1,1,1)
ax.set_title(f'{karabo_id} | {prop_str}, r{run} | Average of {data.shape[0]} trains',fontsize=12,fontweight='bold');
# Average image before charge sharing corrcetion
divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='5%', pad=0.5)
image = data.mean(axis=0)
vmin = max(image.mean()-2*image.std(),0)
vmax = image.mean()+3*image.std()
geom.plot_data(image,
ax=ax,
colorbar={'cax': cax, 'label': plot_unit, 'orientation': 'horizontal'},
origin='upper',
vmin=vmin,
vmax=vmax)
# Average image after charge sharing corrcetion
if pattern_classification:
ax = plt.subplot(1,2,2)
divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='5%', pad=0.5)
image = data_clu.mean(axis=0)
geom.plot_data(image,
ax=ax,
colorbar={'cax': cax, 'label': plot_unit, 'orientation': 'horizontal'},
origin='upper',
vmin=vmin,
vmax=vmax)
ax.set_title(f'After CS correction',fontsize=12,fontweight='bold');
plt.suptitle(f'{karabo_id} | {prop_str}, r{run} | Average of {data.shape[0]} trains',fontsize=14,fontweight='bold',y=.72);
```
%% Cell type:markdown id: tags:
## Single Shot of the corrected data
%% Cell type:code id: tags:
``` python
train_idx = -1
if pattern_classification:
plt.subplots(1,2,figsize=(18,18)) if pattern_classification else plt.subplots(1,1,figsize=(9,9))
ax = plt.subplot(1,2,1)
ax.set_title(f'Before CS correction',fontsize=12,fontweight='bold');
else:
plt.subplots(1,1,figsize=(9,9))
ax = plt.subplot(1,1,1)
ax.set_title(f'{karabo_id} | {prop_str}, r{run} | Single frame',fontsize=12,fontweight='bold');
# Average image before charge sharing corrcetion
divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='5%', pad=0.5)
image = data[train_idx]
vmin = max(image.mean()-2*image.std(),0)
vmax = image.mean()+3*image.std()
geom.plot_data(image,
ax=ax,
colorbar={'cax': cax, 'label': plot_unit, 'orientation': 'horizontal'},
origin='upper',
vmin=vmin,
vmax=vmax)
# Average image after charge sharing corrcetion
if pattern_classification:
ax = plt.subplot(1,2,2)
divider = make_axes_locatable(ax)
cax = divider.append_axes('bottom', size='5%', pad=0.5)
image = data_clu[train_idx]
geom.plot_data(image,
ax=ax,
colorbar={'cax': cax, 'label': plot_unit, 'orientation': 'horizontal'},
origin='upper',
vmin=vmin,
vmax=vmax)
ax.set_title(f'After CS correction',fontsize=12,fontweight='bold');
plt.suptitle(f'{karabo_id} | {prop_str}, r{run} | Single frame',fontsize=14,fontweight='bold',y=.72);
```
......
%% Cell type:markdown id: tags:
# pnCCD Data Correction #
Authors: DET Group, Modified by Kiana Setoodehnia - Version 5.0
The following notebook provides offset, common mode, relative gain, split events and pattern classification corrections of images acquired with the pnCCD. This notebook *does not* yet correct for charge transfer inefficiency.
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SQS/202031/p900166/raw" # input folder
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove/pnccd_correct" # output folder
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
run = 347 # which run to read data from
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
sequences_per_node = 1 # number of sequences running on the same slurm node.
karabo_da = 'PNCCD01' # data aggregators
karabo_id = "SQS_NQS_PNCCD1MP" # karabo prefix of PNCCD devices
receiver_id = "PNCCD_FMT-0" # inset for receiver devices
path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
instrument_source_template = '{}/CAL/{}:output' # template for data source name, will be filled with karabo_id and receiver_id.
# Parameters affecting data correction.
commonModeAxis = 0 # axis along which common mode will be calculated, 0 = row, and 1 = column
commonModeBlockSize = [512, 512] # size of the detector in pixels for common mode calculations
split_evt_primary_threshold = 4. # primary threshold for split event classification in terms of n sigma noise
split_evt_secondary_threshold = 3. # secondary threshold for split event classification in terms of n sigma noise
saturated_threshold = 32000. # full well capacity in ADU
# Conditions for retrieving calibration constants
fix_temperature_top = 0. # fix temperature for top sensor in K, set to 0. to use value from slow data.
fix_temperature_bot = 0. # fix temperature for bottom senspr in K, set to 0. to use value from slow data.
gain = -1 # the detector's gain setting. Set to -1 to use the value from the slow data.
bias_voltage = 0. # the detector's bias voltage. set to 0. to use value from slow data.
integration_time = 70 # detector's integration time
photon_energy = 1.6 # Al fluorescence in keV
# Parameters for the calibration database.
cal_db_interface = "tcp://max-exfl-cal001:8015" # calibration DB interface to use
cal_db_timeout = 300000 # timeout on caldb requests
creation_time = "" # The timestamp to use with Calibration DB. Required Format: "YYYY-MM-DD hh:mm:ss" e.g. 2019-07-04 11:02:41
remove_bias_voltage_if_zero = True # This flag enables removing bias voltage from the conditions if a 0 value is read from RAW data. This is useful when the corresponding constants for old RAW had no bias voltage because of a mistake in control data. e.g. p002857
# Booleans for selecting corrections to apply.
only_offset = False # Only, apply offset.
common_mode = True # Apply common mode correction
relgain = True # Apply relative gain correction
pattern_classification = True # classify split events
# parameters affecting stored output data.
chunk_size_idim = 1 # H5 chunking size of output data
limit_trains = 0 # this parameter is used for limiting number of images to correct from a sequence file.
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
# Here the herarichy and dependability for correction booleans are defined
corr_bools = {}
corr_bools["only_offset"] = only_offset
# Apply offset only.
if not only_offset:
corr_bools["relgain"] = relgain
corr_bools["common_mode"] = common_mode
corr_bools["pattern_class"] = pattern_classification
```
%% Cell type:code id: tags:
``` python
import os
import sys
import warnings
from logging import warning
from pathlib import Path
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import numpy as np
import pasha as psh
from IPython.display import Markdown, display
from extra_data import H5File, RunDirectory
from prettytable import PrettyTable
%matplotlib inline
import cal_tools.restful_config as rest_cfg
from XFELDetAna import xfelpyanatools as xana
from XFELDetAna import xfelpycaltools as xcal
from cal_tools import pnccdlib
from cal_tools.files import DataFile
from cal_tools.calcat_interface import CalCatError, PNCCD_CalibrationData
from cal_tools.tools import (
calcat_creation_time,
write_constants_fragment,
)
from cal_tools.step_timing import StepTimer
```
%% Cell type:code id: tags:
``` python
# Calibration Database Settings, and Some Initial Run Parameters & Paths:
display(Markdown('### Initial Settings and Paths'))
# Sensor size and block size definitions (important for common mode and other corrections):
pixels_x = 1024 # rows of pnCCD in pixels
pixels_y = 1024 # columns of pnCCD in pixels
in_folder = Path(in_folder)
sensorSize = [pixels_x, pixels_y]
# For xcal.HistogramCalculators.
blockSize = [pixels_x//2, pixels_y//2] # sensor area will be analysed according to blockSize.
print(f"pnCCD size is: {pixels_x}x{pixels_y} pixels.")
print(f'Calibration database interface selected: {cal_db_interface}')
# Paths to the data:
instrument_src = instrument_source_template.format(karabo_id, receiver_id)
print(f"Instrument H5File source: {instrument_src}\n")
# Run's creation time:
creation_time = calcat_creation_time(in_folder, run, creation_time)
print(f"Creation time: {creation_time}")
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
```
%% Cell type:code id: tags:
``` python
run_dc = RunDirectory(in_folder / f"r{run:04d}", _use_voview=False)
# Output Folder Creation:
os.makedirs(out_folder, exist_ok=True)
# extract control data
step_timer.start()
ctrl_data = pnccdlib.PnccdCtrl(run_dc, karabo_id)
if bias_voltage == 0.:
bias_voltage = ctrl_data.get_bias_voltage()
if gain == -1:
gain = ctrl_data.get_gain()
if fix_temperature_top == 0:
fix_temperature_top = ctrl_data.get_fix_temperature_top()
if fix_temperature_bot == 0:
fix_temperature_bot = ctrl_data.get_fix_temperature_bot()
step_timer.done_step("Reading control parameters.")
# Printing the Parameters Read from the Data File:
display(Markdown('### Detector Parameters'))
print(f"Bias voltage is {bias_voltage:0.1f} V.")
print(f"Detector gain is set to 1/{int(gain)}.")
print(f"Detector integration time is set to {integration_time} ms")
print(f"Top pnCCD sensor is at temperature of {fix_temperature_top:0.2f} K")
print(f"Bottom pnCCD sensor is at temperature of {fix_temperature_bot:0.2f} K")
```
%% Cell type:code id: tags:
``` python
seq_files = []
for f in run_dc.select(instrument_src).files:
fpath = Path(f.filename)
if fpath.match(f"*{karabo_da}*.h5"):
seq_files.append(fpath)
if sequences != [-1]:
seq_files = sorted([f for f in seq_files if any(f.match(f"*-S{s:05d}.h5") for s in sequences)])
print(f"Processing a total of {len(seq_files)} sequence files:")
print(*seq_files, sep='\n')
```
%% Cell type:code id: tags:
``` python
gain_k = [k for k, v in pnccdlib.VALID_GAINS.items() if v == gain][0]
if gain_k == 'a':
split_evt_mip_threshold = 1000. # MIP threshold in ADU for event classification (10 times average noise)
# Each xcal.HistogramCalculator requires a total number of bins and a binning range. We define these
# using a dictionary:
# For all xcal histograms:
Hist_Bin_Dict = {
"bins": 35000, # number of bins
"bin_range": [0, 35000]
}
# For the numpy histograms on the last cell of the notebook:
Event_Bin_Dict = {
"event_bins": 1000, # number of bins
"b_range": [0, 35000] # bin range
}
elif gain_k == 'b':
split_evt_mip_threshold = 270. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 10000,
"bin_range": [0, 10000]
}
Event_Bin_Dict = {
"event_bins": 1000,
"b_range": [0, 10000]
}
elif gain_k == 'c':
split_evt_mip_threshold = 110. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 3000,
"bin_range": [0, 3000]
}
Event_Bin_Dict = {
"event_bins": 1000,
"b_range": [0, 3000]
}
elif gain_k == 'd':
split_evt_mip_threshold = 90. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 1000,
"bin_range": [0, 1000]
}
Event_Bin_Dict = {
"event_bins": 1000,
"b_range": [0, 1000]
}
elif gain_k == 'e':
split_evt_mip_threshold = 90. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 500,
"bin_range": [0, 500]
}
Event_Bin_Dict = {
"event_bins": 500,
"b_range": [0, 500]
}
else:
split_evt_mip_threshold = 90. # 10 times the average noise
Hist_Bin_Dict = {
"bins": 220,
"bin_range": [0, 220]
}
Event_Bin_Dict = {
"event_bins": 220,
"b_range": [0, 220]
}
bins = Hist_Bin_Dict["bins"]
bin_range = Hist_Bin_Dict["bin_range"]
event_bins = Event_Bin_Dict["event_bins"]
b_range = Event_Bin_Dict["b_range"]
```
%% Cell type:markdown id: tags:
As a first step, calibration constants have to be retrieved from the calibration database
%% Cell type:code id: tags:
``` python
display(Markdown("### Constants retrieval"))
step_timer.start()
# In the case of an older proposal (e.g., proposal 002857),
# it is possible that the bias voltage was 0
# resulting in the absence of bias voltage values in
# the previously injected dark constants. This situation can be
# attributed to a feature that is currently not available in iCalibrationDB.
if bias_voltage == 0 and remove_bias_voltage_if_zero:
bias_voltage = None
pnccd_cal = PNCCD_CalibrationData(
detector_name=karabo_id,
sensor_bias_voltage=bias_voltage,
integration_time=integration_time,
sensor_temperature=fix_temperature_top,
gain_setting=gain,
event_at=creation_time,
source_energy=photon_energy,
client=rest_cfg.calibration_client(),
)
pnccd_metadata = pnccd_cal.metadata(calibrations=pnccd_cal.dark_calibrations)
if relgain:
try:
gain_metadata = pnccd_cal.metadata(calibrations=["RelativeGainCCD"])
for mod, md in gain_metadata.items():
pnccd_metadata[mod].update(md)
except CalCatError as e: # TODO: fix after getting new exceptions.
warning(f"{e} While asking for {pnccd_cal.illuminated_calibrations}")
warning("RelativeGainEPix100 is not retrieved from the calibration database. "
"Relative gain correction is disabled.")
corr_bools['relgain'] = False
# Display retrieved calibration constants timestamps
pnccd_cal.display_markdown_retrieved_constants(metadata=pnccd_metadata)
metadata = pnccd_metadata[karabo_da]
# Validate the constants availability and raise/warn correspondingly.
missing_dark_constants = set(
c for c in pnccd_cal.dark_calibrations if c not in metadata.keys())
if missing_dark_constants:
raise KeyError(
f"Dark constants {missing_dark_constants} are not available for correction.")
# Record constant details in YAML metadata
write_constants_fragment(
out_folder=(metadata_folder or out_folder),
det_metadata=pnccd_metadata,
caldb_root=pnccd_cal.caldb_root,
)
# load constants arrays after storing fragment YAML file
# and validating constants availability.
constants = pnccd_cal.ndarray_map(metadata=pnccd_metadata).get(karabo_da, {})
step_timer.done_step("Constants retrieval")
```
%% Cell type:code id: tags:
``` python
fig = xana.heatmapPlot(constants["OffsetCCD"][:,:,0], x_label='Columns', y_label='Rows', lut_label='Offset (ADU)',
aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x), vmax=16000,
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Dark Offset Map')
fig = xana.heatmapPlot(constants["NoiseCCD"][:,:,0], x_label='Columns', y_label='Rows',
lut_label='Corrected Noise (ADU)',
aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Dark Noise Map')
fig = xana.heatmapPlot(np.log2(constants["BadPixelsDarkCCD"][:,:,0]), x_label='Columns', y_label='Rows',
lut_label='Bad Pixel Value (ADU)',
aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Dark Bad Pixels Map')
if corr_bools.get('relgain'):
fig = xana.heatmapPlot(constants["RelativeGainCCD"], figsize=(8, 8), x_label='Columns', y_label='Rows',
lut_label='Relative Gain',
aspect=1, x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0.8, vmax=1.2,
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
panel_top_low_lim = 0.5, panel_top_high_lim = 1.5, panel_side_low_lim = 0.5,
panel_side_high_lim = 1.5,
title = f'Relative Gain Map for pnCCD (Gain = 1/{int(gain)})')
```
%% Cell type:code id: tags:
``` python
#************************ Calculators ************************#
if corr_bools.get('common_mode'):
# Common Mode Correction Calculator:
cmCorrection = xcal.CommonModeCorrection([pixels_x, pixels_y],
commonModeBlockSize,
commonModeAxis,
parallel=False, dType=np.float32, stride=1,
noiseMap=constants["NoiseCCD"].astype(np.float32), minFrac=0.25)
if corr_bools.get('pattern_class'):
# Pattern Classifier Calculator:
# Left Hemisphere:
patternClassifierLH = xcal.PatternClassifier([pixels_x, pixels_y//2],
constants["NoiseCCD"][:, :pixels_y//2],
split_evt_primary_threshold,
split_evt_secondary_threshold,
split_evt_mip_threshold,
tagFirstSingles=3, # track along y-axis, left to right (see
nCells=1, # split_event.py file in pydetlib/lib/src/
allowElongated=False, # XFELDetAna/algorithms)
blockSize=[pixels_x, pixels_y//2],
parallel=False)
# Right Hemisphere:
patternClassifierRH = xcal.PatternClassifier([pixels_x, pixels_y//2],
constants["NoiseCCD"][:, pixels_y//2:],
split_evt_primary_threshold,
split_evt_secondary_threshold,
split_evt_mip_threshold,
tagFirstSingles=4, # track along y-axis, right to left
nCells=1,
allowElongated=False,
blockSize=[pixels_x, pixels_y//2],
parallel=False)
patternClassifierLH._imagesPerChunk = 1
patternClassifierRH._imagesPerChunk = 1
patternClassifierLH._noisemap = constants["NoiseCCD"][:, :pixels_x//2]
patternClassifierRH._noisemap = constants["NoiseCCD"][:, pixels_x//2:]
# Setting bad pixels:
patternClassifierLH.setBadPixelMask(constants["BadPixelsDarkCCD"][:, :pixels_x//2] != 0)
patternClassifierRH.setBadPixelMask(constants["BadPixelsDarkCCD"][:, pixels_x//2:] != 0)
```
%% Cell type:code id: tags:
``` python
#***************** Histogram Calculators ******************#
# Will contain uncorrected data:
histCalRaw = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
# Will contain offset corrected data:
histCalOffsetCor = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
if corr_bools.get('common_mode'):
# Will contain common mode corrected data:
histCalCommonModeCor = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
if corr_bools.get('pattern_class'):
# Will contain split events pattern data:
histCalPcorr = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
# Will contain singles events data:
histCalPcorrS = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
if corr_bools.get('relgain'):
# Will contain gain corrected data:
histCalGainCor = xcal.HistogramCalculator(sensorSize,
bins=bins,
range=bin_range,
nCells=1,
parallel=False,
blockSize=blockSize)
```
%% Cell type:markdown id: tags:
## Applying corrections to the raw data
%% Cell type:code id: tags:
``` python
def offset_correction(wid, index, d):
"""offset correction.
Equating bad pixels' values to np.nan,
so that the pattern classifier ignores them:
"""
d = d.copy()
# TODO: To clear this up. Is it on purpose to save corrected data with nans?
d[bpix != 0] = np.nan
d -= offset # offset correction
# TODO: to clear this up. why save the badpixels map in the corrected data?
bpix_data[index, ...] = bpix
data[index, ...] = d
def common_mode(wid, index, d):
"""common-mode correction.
Discarding events caused by saturated pixels:
"""
d = np.squeeze(cmCorrection.correct(d, cellTable=np.zeros(pixels_y, np.int32)))
# we equate these values to np.nan so that the pattern classifier ignores them:
d[d >= saturated_threshold] = np.nan
data[index, ...] = d
def gain_correction(wid, index, d):
"""relative gain correction."""
d /= relativegain
data[index, ...] = d
def pattern_classification_correction(wid, index, d):
"""pattern classification correction.
data set to save split event corrected images
The calculation of the cluster map:]
Dividing the data into left and right hemispheres:
"""
# pattern classification on corrected data
dataLH, patternsLH = patternClassifierLH.classify(d[:, :pixels_x//2])
dataRH, patternsRH = patternClassifierRH.classify(d[:, pixels_x//2:])
d[:, :pixels_x//2] = np.squeeze(dataLH)
d[:, pixels_x//2:] = np.squeeze(dataRH)
patterns = np.zeros(d.shape, patternsLH.dtype)
patterns[:, :pixels_x//2] = np.squeeze(patternsLH)
patterns[:, pixels_x//2:] = np.squeeze(patternsRH)
d[d < split_evt_primary_threshold*noise] = 0
data[index, ...] = d
ptrn_data[index, ...] = patterns
d[patterns != 100] = np.nan # Discard doubles, triples, quadruple, clusters, first singles
filtered_data[index, ...] = d
```
%% Cell type:code id: tags:
``` python
# 10 is a number chosen after testing 1 ... 71 parallel threads for a node with 72 cpus.
parallel_num_threads = 10
context = psh.context.ThreadContext(num_workers=parallel_num_threads)
data_path = "INSTRUMENT/"+instrument_src+"/data/"
offset = np.squeeze(constants["OffsetCCD"])
noise = np.squeeze(constants["NoiseCCD"])
bpix = np.squeeze(constants["BadPixelsDarkCCD"])
relativegain = constants.get("RelativeGainCCD")
```
%% Cell type:code id: tags:
``` python
def write_datasets(seq_dc, corr_arrays, out_file, instrument_src):
"""
Creating datasets first then adding data.
To have metadata together available at the start of the file,
so it's quick to see what the file contains.
"""
# Create CORR files and add corrected data sections.
image_counts = seq_dc[instrument_src, "data.image"].data_counts(labelled=False)
dataset_chunk = ((chunk_size_idim,) + corr_arrays["pixels"].shape[1:]) # e.g. (1, pixels_x, pixels_y)
with DataFile(out_file, 'w') as ofile:
seq_file = seq_dc.files[0]
# Create INDEX datasets.
ofile.create_index(seq_dc.train_ids, from_file=seq_dc.files[0])
ofile.create_index(seq_dc.train_ids, from_file=seq_file)
# Create METDATA datasets
ofile.create_metadata(
like=seq_dc,
sequence=seq_dc.run_metadata()["sequenceNumber"],
sequence=seq_file.sequence,
instrument_channels=(f"{instrument_src}/data",)
)
# Create Instrument section to later add corrected datasets.
outp_source = ofile.create_instrument_source(instrument_src)
# Create count/first datasets at INDEX source.
outp_source.create_index(data=image_counts)
# Store uncorrected trainId in the corrected file.
outp_source.create_key(
f"data.trainId", data=seq_dc.train_ids,
chunks=min(50, len(seq_dc.train_ids))
)
# TODO: gain dataset is just the RelativeGain constant
# and it doesn't make sense to write it into corrected data.
comp_fields = ["gain", "patterns", "pixels_classified"]
# TODO: to clear this up: why save corrected data
# in data/pixels rather than data/image.
for field, data in corr_arrays.items():
if field in comp_fields: # Write compressed corrected data.
outp_source.create_compressed_key(f"data.{field}", data=data)
else:
outp_source.create_key(
f"data.{field}", data=data,
chunks=dataset_chunk
)
```
%% Cell type:code id: tags:
``` python
# Data corrections and event classifications happen here.
# Also, the corrected data are written to datasets:
empty_seq = 0
for seq_n, seq_f in enumerate(seq_files):
seq_dc = H5File(seq_f)
out_file = f"{out_folder}/{seq_f.name}".replace("RAW", "CORR")
step_timer.start()
img_dc = seq_dc[instrument_src, "data.image"]
dshape = seq_dc[instrument_src, "data.image"].shape
n_trains = dshape[0]
corr_ntrains = dshape[0] # number of available trains to correct.
all_train_ids = img_dc.train_ids # All trains including trains with no data.
# Raise a WARNING if this sequence has no trains to correct.
# Otherwise, print number of trains with no data.
if corr_ntrains == 0:
warning(f"No trains to correct for {seq_f.name}: "
"Skipping the processing of this file.")
empty_seq += 1
continue
elif len(all_train_ids) != corr_ntrains:
print(
f"{seq_f.name} has {len(all_train_ids) - corr_ntrains} "
"trains with missing data."
)
# If you want to analyze only a certain number of frames
# instead of all available good frames.
if limit_trains > 0:
print(f"\nCorrected trains are limited to: {limit_trains} trains")
corr_ntrains = min(corr_ntrains, limit_trains)
data_shape = (corr_ntrains, dshape[1], dshape[2])
print(f"Correcting file {seq_f} of {corr_ntrains} trains.")
# Overwrite seq_dc after eliminating empty trains or/and applying limited images.
seq_dc = seq_dc.select(
instrument_src, "*", require_all=True).select_trains(np.s_[:corr_ntrains])
raw_data = seq_dc[instrument_src, "data.image"].ndarray().astype(np.float32)
to_store_arrays = {"image": raw_data}
# TODO: move the parts for reading data to plot to later cells.
if seq_n == 0:
raw_plt = raw_data.copy() # plot first sequence only
step_timer.start()
# Allocating shared arrays for data arrays for each correction stage.
data = context.alloc(shape=data_shape, dtype=np.float32)
bpix_data = context.alloc(shape=data_shape, dtype=np.uint32)
histCalRaw.fill(raw_data) # filling histogram with raw uncorrected data
# Applying offset correction
context.map(offset_correction, raw_data)
histCalOffsetCor.fill(data) # filling histogram with offset corrected data
if seq_n == 0:
off_data = data.copy() # plot first sequence only
to_store_arrays["pixels"] = data.copy()
to_store_arrays["mask"] = bpix_data
step_timer.done_step(f'offset correction.')
if corr_bools.get('common_mode'):
step_timer.start()
# Applying common mode correction
context.map(common_mode, data)
if seq_n == 0:
cm_data = data.copy() # plot first sequence only
to_store_arrays["pixels_cm"] = data.copy()
histCalCommonModeCor.fill(data) # filling histogram with common mode corrected data
step_timer.done_step(f'common-mode correction.')
if corr_bools.get('relgain'):
step_timer.start()
# Applying gain correction
context.map(gain_correction, data)
if seq_n == 0:
rg_data = data.copy() # plot first sequence only
# TODO: Why storing a repeated constant for each image in corrected files.
to_store_arrays["gain"] = np.repeat(relativegain[np.newaxis, ...], corr_ntrains, axis=0).astype(np.float32) # noqa
histCalGainCor.fill(data) # filling histogram with gain corrected data
step_timer.done_step(f'gain correction.')
if corr_bools.get('pattern_class'):
step_timer.start()
ptrn_data = context.alloc(shape=data_shape, dtype=np.int32)
filtered_data = context.alloc(shape=data_shape, dtype=np.int32)
# Applying pattern classification correction
# Even though data is indeed of dtype np.float32,
# not specifying this again screw with the data quality.
context.map(pattern_classification_correction, data.astype(np.float32))
if seq_n == 0:
cls_data = data.copy() # plot first sequence only
# split event corrected images plotted for first sequence only
# (also these events are only singles events):
to_store_arrays["pixels_classified"] = data.copy()
to_store_arrays["patterns"] = ptrn_data
histCalPcorr.fill(data) # filling histogram with split events corrected data
# filling histogram with corr data after discarding doubles, triples, quadruple, clusters, and first singles
histCalPcorrS.fill(filtered_data)
step_timer.done_step(f'pattern classification correction.')
step_timer.start()
# Storing corrected data sources.
write_datasets(
seq_dc=seq_dc,
corr_arrays=to_store_arrays,
out_file=out_file,
instrument_src=instrument_src,
)
step_timer.done_step(f'Storing data.')
# Exit and raise warning if there are no data to correct for all sequences.
if empty_seq == len(seq_files):
warning("No valid trains for RAW data to correct.")
sys.exit(0)
```
%% Cell type:code id: tags:
``` python
print("In addition to offset correction, the following corrections were performed:")
for k, v in corr_bools.items():
if v:
print(" -", k.upper())
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
print("In addition to offset correction, the following corrections were performed:")
for k, v in corr_bools.items():
if v:
print(" -", k.upper())
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
# Histograming the resulting spectra:
# First _ refers to the bin edges and second _ refers to statistics and we ignore both.
# if you use histCalRaw.get(cumulatative = True) and so on, the cumulatative = True turns the counts array such as
# RawHistVals and so on into a 1D array instead of keeping the original shape:
RawHistVals, _, RawHistMids, _ = histCalRaw.get()
off_cor_HistVals, _, off_cor_HistMids, _ = histCalOffsetCor.get()
if corr_bools.get('common_mode'):
cm_cor_HistVals, _, cm_HistMids, _ = histCalCommonModeCor.get()
if corr_bools.get('relgain'):
gain_cor_HistVals, _, gain_cor_HistMids, _ = histCalGainCor.get()
if corr_bools.get('pattern_class'):
split_HistVals, _, split_HistMids, _ = histCalPcorr.get() # split events corrected
singles_HistVals, _, singles_HistMids, _ = histCalPcorrS.get() # last s in variable names: singles events
```
%% Cell type:code id: tags:
``` python
# Saving intermediate data to disk:
step_timer.start()
np.savez(os.path.join(out_folder, 'Raw_Events.npz'), RawHistMids, RawHistVals)
np.savez(os.path.join(out_folder, 'Offset_Corrected_Events.npz'), off_cor_HistMids, off_cor_HistVals)
if corr_bools.get('common_mode'):
np.savez(os.path.join(out_folder, 'Common_Mode_Corrected_Events.npz'), cm_HistMids, cm_cor_HistVals)
if corr_bools.get('relgain'):
np.savez(os.path.join(out_folder, 'Gain_Corrected_Events.npz'), gain_cor_HistMids, gain_cor_HistVals)
if corr_bools.get('pattern_class'):
np.savez(os.path.join(out_folder, 'Split_Events_Corrected_Events.npz'), split_HistMids, split_HistVals)
np.savez(os.path.join(out_folder, 'Singles_Events.npz'), singles_HistMids, singles_HistVals)
step_timer.done_step(f'Saving intermediate data to disk.')
print("Various spectra are saved to disk in the form of histograms. Please check {}".format(out_folder))
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Raw vs. Corrected Spectra'))
step_timer.start()
figure = [{'x': RawHistMids,
'y': RawHistVals,
'y_err': np.sqrt(RawHistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Uncorrected'
},
{'x': off_cor_HistMids,
'y': off_cor_HistVals,
'y_err': np.sqrt(off_cor_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Offset Corrected'
}]
if corr_bools.get('common_mode'):
figure.append({'x': cm_HistMids,
'y': cm_cor_HistVals,
'y_err': np.sqrt(cm_cor_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Common Mode Corrected'})
if corr_bools.get('relgain'):
xrange = bin_range
figure.append({'x': gain_cor_HistMids,
'y': gain_cor_HistVals,
'y_err': np.sqrt(gain_cor_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Gain Corrected'})
if corr_bools.get('pattern_class'):
figure.extend([{'x': split_HistMids,
'y': split_HistVals,
'y_err': np.sqrt(split_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Split Events Corrected'
},
{'x': singles_HistMids,
'y': singles_HistVals,
'y_err': np.sqrt(singles_HistVals[:]),
'drawstyle': 'steps-post',
'errorstyle': 'bars',
'errorcoarsing': 2,
'label': 'Singles Events'
}])
fig = xana.simplePlot(figure, aspect=1, x_label='ADU', y_label='Number of Occurrences', figsize='2col',
y_log=True, x_range=bin_range, title = '1 ADU per bin is used.',
legend='top-right-frame-1col')
step_timer.done_step('Plotting')
```
%% Cell type:code id: tags:
``` python
# This function plots pattern statistics:
def classification_plot(patternStats, hemisphere):
print("****************** {} HEMISPHERE ******************\n"
.format(hemisphere))
fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(4, 4, 1)
sfields = ["singles", "first singles", "clusters"]
mfields = ["doubles", "triples", "quads"]
relativeOccurances = []
labels = []
for i, f in enumerate(sfields):
relativeOccurances.append(patternStats[f])
labels.append(f)
for i, f in enumerate(mfields):
for k in range(len(patternStats[f])):
relativeOccurances.append(patternStats[f][k])
labels.append("{}({})".format(f, k))
relativeOccurances = np.array(relativeOccurances, np.float)
relativeOccurances /= np.sum(relativeOccurances)
pie = ax.pie(relativeOccurances, labels=labels, autopct='%1.1f%%', shadow=True)
ax.set_title("Pattern Occurrence")
# Set aspect ratio to be equal so that pie is drawn as a circle.
a = ax.axis('equal')
smaps = ["singlemap", "firstsinglemap", "clustermap"]
for i, m in enumerate(smaps):
ax = fig.add_subplot(4, 4, 2+i)
pmap = ax.imshow(patternStats[m], interpolation="nearest", vmax=2*np.nanmedian(patternStats[m]))
ax.set_title(m)
cb = fig.colorbar(pmap)
mmaps = ["doublemap", "triplemap", "quadmap"]
k = 0
for i, m in enumerate(mmaps):
for j in range(4):
ax = fig.add_subplot(4, 4, 2+len(smaps)+k)
pmap = ax.imshow(patternStats[m][j], interpolation="nearest", vmax=2*np.median(patternStats[m][j]))
ax.set_title("{}({})".format(m,j))
cb = fig.colorbar(pmap)
k+=1
```
%% Cell type:code id: tags:
``` python
# The next two cells plot the classification results for left and right hemispheres, respectively:
display(Markdown('### Classification Results - Plots'))
if corr_bools.get('pattern_class'):
patternStatsLH = patternClassifierLH.getPatternStats()
classification_plot(patternStatsLH, 'Left')
```
%% Cell type:code id: tags:
``` python
if corr_bools.get('pattern_class'):
patternStatsRH = patternClassifierRH.getPatternStats()
classification_plot(patternStatsRH, 'Right')
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Classification Results - Tabulated Statistics'))
if corr_bools.get('pattern_class'):
step_timer.start()
t0 = PrettyTable()
t0.title = "Total Number of Counts after All Corrections"
t0.field_names = ["Hemisphere", "Singles", "First-Singles", "Clusters"]
t0.add_row(["LH", patternStatsLH['singles'], patternStatsLH['first singles'], patternStatsLH['clusters']])
t0.add_row(["RH", patternStatsRH['singles'], patternStatsRH['first singles'], patternStatsRH['clusters']])
print(t0)
print("Abbreviations: D (Doubles), T (Triples), Q (Quadruples), L (Left), R (Right), and H (Hemisphere).")
t1 = PrettyTable()
t1.field_names = ["Index", "D-LH", "D-RH", "T-LH", "T-RH", "Q-LH", "Q-RH"]
t1.add_row([0, patternStatsLH['doubles'][0], patternStatsRH['doubles'][0], patternStatsLH['triples'][0],
patternStatsRH['triples'][0], patternStatsLH['quads'][0], patternStatsRH['quads'][0]])
t1.add_row([1, patternStatsLH['doubles'][1], patternStatsRH['doubles'][1], patternStatsLH['triples'][1],
patternStatsRH['triples'][1], patternStatsLH['quads'][1], patternStatsRH['quads'][1]])
t1.add_row([2, patternStatsLH['doubles'][2], patternStatsRH['doubles'][2], patternStatsLH['triples'][2],
patternStatsRH['triples'][2], patternStatsLH['quads'][2], patternStatsRH['quads'][2]])
t1.add_row([3, patternStatsLH['doubles'][3], patternStatsRH['doubles'][3], patternStatsLH['triples'][3],
patternStatsRH['triples'][3], patternStatsLH['quads'][3], patternStatsRH['quads'][3]])
print(t1)
step_timer.done_step('Classification Results - Tabulated Statistics')
```
%% Cell type:code id: tags:
``` python
if corr_bools.get('pattern_class'):
doublesLH = patternStatsLH['doubles'][0] + patternStatsLH['doubles'][1] + patternStatsLH['doubles'][2] + \
patternStatsLH['doubles'][3]
triplesLH = patternStatsLH['triples'][0] + patternStatsLH['triples'][1] + patternStatsLH['triples'][2] + \
patternStatsLH['triples'][3]
quadsLH = patternStatsLH['quads'][0] + patternStatsLH['quads'][1] + patternStatsLH['quads'][2] + \
patternStatsLH['quads'][3]
allsinglesLH = patternStatsLH['singles'] + patternStatsLH['first singles']
eventsLH = allsinglesLH + doublesLH + triplesLH + quadsLH
doublesRH = patternStatsRH['doubles'][0] + patternStatsRH['doubles'][1] + patternStatsRH['doubles'][2] + \
patternStatsRH['doubles'][3]
triplesRH = patternStatsRH['triples'][0] + patternStatsRH['triples'][1] + patternStatsRH['triples'][2] + \
patternStatsRH['triples'][3]
quadsRH = patternStatsRH['quads'][0] + patternStatsRH['quads'][1] + patternStatsRH['quads'][2] + \
patternStatsRH['quads'][3]
allsinglesRH = patternStatsRH['singles'] + patternStatsRH['first singles']
eventsRH = allsinglesRH + doublesRH + triplesRH + quadsRH
if eventsLH > 0.:
reloccurLH = np.array([allsinglesLH/eventsLH, doublesLH/eventsLH, triplesLH/eventsLH, quadsLH/eventsLH])
else:
reloccurLH = np.array([0]*4)
if eventsRH > 0.:
reloccurRH = np.array([allsinglesRH/eventsRH, doublesRH/eventsRH, triplesRH/eventsRH, quadsRH/eventsRH])
else:
reloccurRH = np.array([0]*4)
```
%% Cell type:code id: tags:
``` python
display(Markdown('### Classification Results - Pie Charts'))
if corr_bools.get('pattern_class'):
step_timer.start()
fig = plt.figure(figsize=(12, 7))
ax = fig.add_subplot(1, 2, 1)
labels = ['Singles', 'Doubles', 'Triples', 'Quads']
pie = ax.pie(reloccurLH, labels=labels, autopct='%1.1f%%', shadow=True)
ax.set_title("Pattern Occurrence in LH")
# Set aspect ratio to be equal so that pie is drawn as a circle.
a = ax.axis('equal')
ax = fig.add_subplot(1, 2, 2)
pie = ax.pie(reloccurRH, labels=labels, autopct='%1.1f%%', shadow=True)
ax.set_title("Pattern Occurrence in RH")
# Set aspect ratio to be equal so that pie is drawn as a circle.
a = ax.axis('equal')
step_timer.done_step('Classification Results - Pie Charts')
```
%% Cell type:markdown id: tags:
### Various Images Averaged Over All Frames of Only the First Sequence ###
%% Cell type:code id: tags:
``` python
step_timer.start()
uncor_mean_im = np.nanmean(raw_data, axis=0)
offset_mean_im = np.nanmean(off_data, axis=0)
if corr_bools.get('common_mode'):
cm_mean_im = np.nanmean(cm_data, axis=0)
if corr_bools.get('relgain'):
gain_mean_im = np.nanmean(rg_data, axis=0)
if corr_bools.get('pattern_class'):
mean_im_cc = np.nanmean(cls_data, axis=0)
fig = xana.heatmapPlot(uncor_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Uncorrected Image Averaged over Frames in the First Sequence')
fig = xana.heatmapPlot(offset_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Offset Corrected Image Averaged over Frames in the First Sequence')
if corr_bools.get('common_mode'):
fig = xana.heatmapPlot(cm_mean_im, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Common Mode Corrected Image Averaged over Frames in the First Sequence')
if corr_bools.get('relgain'):
fig = xana.heatmapPlot(gain_mean_im, x_label='Columns', y_label='Rows',
lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Gain Corrected Image Averaged over Frames in the First Sequence')
if corr_bools.get('pattern_class'):
fig = xana.heatmapPlot(mean_im_cc, x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x), vmin=0, vmax= 18000,
title = 'Image of Single Events Averaged over Frames in the First Sequence')
step_timer.done_step("Plotting")
```
%% Cell type:markdown id: tags:
### Images of the First Frame of the First Sequence ###
%% Cell type:code id: tags:
``` python
step_timer.start()
fig = xana.heatmapPlot(raw_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Uncorrected Image (First Frame of the First Sequence)')
fig = xana.heatmapPlot(off_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Offset Corrected Image (First Frame of the First Sequence)')
if corr_bools.get('common_mode'):
fig = xana.heatmapPlot(cm_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Common Mode Corrected Image (First Frame of the First Sequence)')
if corr_bools.get('relgain'):
fig = xana.heatmapPlot(rg_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)',
aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Gain Corrected Image (First Frame of the First Sequence)')
if corr_bools.get('pattern_class'):
fig = xana.heatmapPlot(cls_data[0, :, :], x_label='Columns', y_label='Rows', lut_label='Signal (ADU)', aspect=1,
x_range=(0, pixels_y), y_range=(0, pixels_x),
panel_x_label='Row Stat (ADU)', panel_y_label='Column Stat (ADU)',
title = 'Image of Single Events (First Frame of the First Sequence)')
step_timer.done_step("Plotting")
```
%% Cell type:code id: tags:
``` python
# Resetting the histogram calculators:
histCalRaw.reset()
histCalOffsetCor.reset()
if corr_bools.get('common_mode'):
histCalCommonModeCor.reset()
if corr_bools.get('relgain'):
histCalGainCor.reset()
if corr_bools.get('pattern_class'):
histCalPcorr.reset()
histCalPcorrS.reset()
```
%% Cell type:markdown id: tags:
Next, the corrected event patterns are read from the patterns/dataset created previously and are separated into 4 different categories (singles, doubles, triples and quadruples) using the pattern indices. However, this is done only for one sequence, corresponding to the seq_num variable, as an example.
Note that the number of bins and the bin range for the following histograms may be different from those presented above (depending on gain) to make the counts more noticible and the peaks more defined.
If you are interested in plotting the events from all sequences or the spectra of half of the sensor, execute the spectra_pnCCD_NBC.ipynb notebook.
%% Cell type:code id: tags:
``` python
if corr_bools.get('pattern_class'):
singles = []
doubles = []
triples = []
quads = []
with H5File(f"{out_folder}/{seq_files[0].name.replace('RAW', 'CORR')}") as dc: # noqa
data = dc[instrument_src, "data.pixels_classified"].ndarray()
patterns = dc[instrument_src, "data.patterns"].ndarray()
# events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),
# 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns,
# there are left, right, up, and down indices.
# Separating the events:
# Singles and First Singles:
for s in range(100, 102):
single = data.copy()
single[patterns != s] = np.nan
singles.append(single)
for d in range(200, 204):
double = data.copy()
double[patterns != d] = np.nan
doubles.append(double)
for t in range(300, 304):
triple = data.copy()
triple[patterns != t] = np.nan
triples.append(triple)
for q in range(400, 404):
quad = data.copy()
quad[patterns != q] = np.nan
quads.append(quad)
```
%% Cell type:code id: tags:
``` python
if corr_bools.get('pattern_class'):
step_timer.start()
hA = 0
h = 0
for single in singles:
hs, e = np.histogram(single.flatten(), bins=event_bins, range=b_range) # h: histogram counts, e: bin edges
h += hs
hA += hs # hA: counts all events (see below)
# bin edges array has one extra element => need to plot from 0 to the one before the last element to have the
# same size as h-array => in what follows, we use e[:-1] (-1 means one before the last element)
display(Markdown('### Histograms of Corrected Events for One Sequence Only'))
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111)
ax.step(e[:-1], h, color='blue', label='Events Involving Single Pixels Only')
ax.semilogy() # y-axis is log, x-axis is linear
ax.set_xlabel("Energy (ADU) [{} bins per {} ADU]".format(event_bins, b_range[1]-b_range[0]))
ax.set_ylabel("Corrected Events for One Sequence (counts)")
ax.set_xlim(b_range)
h = 0
for double in doubles:
hd, e = np.histogram(double.flatten(), bins=event_bins, range=b_range)
h += hd
hA += hd
ax.step(e[:-1], h, color='red', label='Events Splitting on Double Pixels')
h = 0
for triple in triples:
ht, e = np.histogram(triple.flatten(), bins=event_bins, range=b_range)
h += ht
hA += ht
ax.step(e[:-1], h, color='green', label='Events Splitting on Triple Pixels')
h = 0
for quad in quads:
hq, e = np.histogram(quad.flatten(), bins=event_bins, range=b_range)
h += hq
hA += hq
ax.step(e[:-1], h, color='purple', label='Events Splitting on Quadruple Pixels')
ax.step(e[:-1], hA, color='grey', label='All Valid Events')
l = ax.legend()
step_timer.done_step("Plotting")
```
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
No preview for this file type
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.