Skip to content
Snippets Groups Projects

Preview PDU name beside karabo-da at plots titles

Merged Karim Ahmed requested to merge feat/show_pdu_along_with_karabo_da into master
1 unresolved thread
1 file
+ 12
8
Compare changes
  • Side-by-side
  • Inline
%% Cell type:markdown id: tags:
# Jungfrau Offline Correction #
Author: European XFEL Detector Group, Version: 2.0
Offline Calibration for the Jungfrau Detector
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove" # the folder to output to, required
run = 112 # run to process, required
sequences = [-1] # sequences to correct, set to [-1] for all, range allowed
sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
# Parameters used to access raw data.
karabo_id = "SPB_IRDA_JF4M" # karabo prefix of Jungfrau devices
karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08'] # data aggregators
receiver_template = "JNGFR{:02d}" # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
instrument_source_template = '{}/DET/{}:daqOutput' # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
ctrl_source_template = '{}/DET/CONTROL' # template for control source name (filled with karabo_id_control)
karabo_id_control = "" # if control is on a different ID, set to empty string if it is the same a karabo-id
# Parameters for calibration database.
use_dir_creation_date = True # use the creation data of the input dir for database queries
cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
cal_db_timeout = 180000 # timeout on caldb requests
# Parameters affecting corrected data.
overwrite = True # set to True if existing data should be overwritten
relative_gain = True # do relative gain correction
plt_images = 100 # Number of images to plot after applying selected corrections.
limit_images = 0 # ONLY FOR TESTING. process only first N images, Use 0 to process all.
# Parameters for retrieving calibration constants
manual_slow_data = False # if true, use manually entered bias_voltage and integration_time values
integration_time = 4.96 # integration time in us, will be overwritten by value in file
gain_setting = 0 # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
gain_mode = 0 # 0 for runs with dynamic gain setting, 1 for fixgain. It will be overwritten by value in file, if manual_slow_data is set to True.
mem_cells = 0 # leave memory cells equal 0, as it is saved in control information starting 2019.
bias_voltage = 180 # will be overwritten by value in file
# Parameters for plotting
skip_plots = False # exit after writing corrected files
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
import multiprocessing
import warnings
from functools import partial
from pathlib import Path
import h5py
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pasha as psh
import tabulate
from IPython.display import Latex, Markdown, display
from extra_data import H5File, RunDirectory
from matplotlib.colors import LogNorm
from cal_tools import h5_copy_except
from cal_tools.jungfraulib import JungfrauCtrl
from cal_tools.enums import BadPixels
from cal_tools.step_timing import StepTimer
from cal_tools.tools import (
get_constant_from_db_and_time,
get_dir_creation_date,
get_pdu_from_db,
map_seq_files,
write_compressed_frames,
)
from iCalibrationDB import Conditions, Constants
warnings.filterwarnings('ignore')
matplotlib.use('agg')
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
in_folder = Path(in_folder)
out_folder = Path(out_folder)
run_dc = RunDirectory(in_folder / f'r{run:04d}')
instrument_src = instrument_source_template.format(karabo_id, receiver_template)
if out_folder.exists() and not overwrite:
raise AttributeError("Output path exists! Exiting")
else:
out_folder.mkdir(parents=True, exist_ok=True)
print(f"Run is: {run}")
print(f"Instrument H5File source: {instrument_src}")
print(f"Process modules: {karabo_da}")
creation_time = None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
print(f"Using {creation_time} as creation time")
if karabo_id_control == "":
karabo_id_control = karabo_id
```
%% Cell type:code id: tags:
``` python
# Read available sequence files to correct.
mapped_files, num_seq_files = map_seq_files(
run_dc, karabo_id, karabo_da, sequences)
if not len(mapped_files):
raise IndexError(
"No sequence files available to correct for the selected sequences and karabo_da.")
```
%% Cell type:code id: tags:
``` python
print(f"Processing a total of {num_seq_files} sequence files")
table = []
fi = 0
for kda, sfiles in mapped_files.items():
for k, f in enumerate(sfiles):
if k == 0:
table.append((fi, kda, k, f))
else:
table.append((fi, "", k, f))
fi += 1
md = display(Latex(tabulate.tabulate(
table, tablefmt='latex',
headers=["#", "module", "# module", "file"])))
```
%% Cell type:code id: tags:
``` python
ctrl_src = ctrl_source_template.format(karabo_id_control)
ctrl_data = JungfrauCtrl(run_dc, ctrl_src)
try:
this_run_mcells, sc_start = ctrl_data.get_memory_cells()
if this_run_mcells == 1:
memory_cells = 1
print("Run is in single cell mode.\n"
f"Storage cell start: {sc_start:02d}")
else:
memory_cells = 16
print(f"Run is in burst mode.\n"
f"Storage cell start: {sc_start:02d}")
except KeyError as e:
print("WARNING: KeyError while reading number of memory cells.")
if mem_cells == 0:
memory_cells = 1
else:
memory_cells = mem_cells
print(f"WARNING: Set memory cells to {memory_cells}, as "
"it is not saved in control information.")
if not manual_slow_data:
integration_time = ctrl_data.get_integration_time()
bias_voltage = ctrl_data.get_bias_voltage()
gain_setting = ctrl_data.get_gain_setting()
gain_mode = ctrl_data.get_gain_mode()
print(f"Integration time is {integration_time} us")
print(f"Gain setting is {gain_setting} (run settings: "
f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})") # noqa
print(f"Gain mode is {gain_mode}")
print(f"Bias voltage is {bias_voltage} V")
print(f"Number of memory cells are {memory_cells}")
```
%% Cell type:markdown id: tags:
### Retrieving calibration constants ###
%% Cell type:code id: tags:
``` python
condition = Conditions.Dark.jungfrau(
memory_cells=memory_cells,
bias_voltage=bias_voltage,
integration_time=integration_time,
gain_setting=gain_setting,
gain_mode=gain_mode,
)
db_modules = get_pdu_from_db(
karabo_id=karabo_id,
karabo_da=karabo_da,
constant=Constants.jungfrau.Offset(),
condition=condition,
cal_db_interface=cal_db_interface,
snapshot_at=creation_time)
def get_constants_for_module(karabo_da: str):
""" Get calibration constants for given module of Jungfrau
:return:
offset_map (offset map),
mask (mask of bad pixels),
gain_map (map of relative gain factors),
db_module (name of DB module),
when (dictionaty: constant - creation time)
"""
when = {}
retrieval_function = partial(
get_constant_from_db_and_time,
karabo_id=karabo_id,
karabo_da=karabo_da,
cal_db_interface=cal_db_interface,
creation_time=creation_time,
timeout=cal_db_timeout,
print_once=False,
)
offset_map, when["Offset"] = retrieval_function(
condition=condition,
constant=Constants.jungfrau.Offset(),
empty_constant=np.zeros((512, 1024, memory_cells, 3))
)
mask, when["BadPixelsDark"] = retrieval_function(
condition=condition,
constant=Constants.jungfrau.BadPixelsDark(),
empty_constant=np.zeros((512, 1024, memory_cells, 3), dtype=np.uint32),
)
mask_ff, when["BadPixelsFF"] = retrieval_function(
condition=condition,
constant=Constants.jungfrau.BadPixelsFF(),
empty_constant=None
)
gain_map, when["Gain"] = retrieval_function(
condition=condition,
constant=Constants.jungfrau.RelativeGain(),
empty_constant=None
)
# combine masks
if mask_ff is not None:
mask |= np.moveaxis(mask_ff, 0, 1)
if memory_cells > 1:
# move from x, y, cell, gain to cell, x, y, gain
offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
mask = np.moveaxis(mask, [0, 1], [1, 2])
else:
offset_map = np.squeeze(offset_map)
mask = np.squeeze(mask)
if gain_map is not None:
if memory_cells > 1:
gain_map = np.moveaxis(gain_map, [0, 2], [2, 0])
# add extra empty cell constant
b = np.ones(((1,)+gain_map.shape[1:]))
gain_map = np.concatenate((gain_map, b), axis=0)
else:
gain_map = np.moveaxis(np.squeeze(gain_map), 1, 0)
return offset_map, mask, gain_map, karabo_da, when
with multiprocessing.Pool() as pool:
r = pool.map(get_constants_for_module, karabo_da)
# Print timestamps for the retrieved constants.
constants = {}
for offset_map, mask, gain_map, k_da, when in r:
print(f'Constants for module {k_da}:')
for const in when:
print(f' {const} injected at {when[const]}')
if gain_map is None:
print("No gain map found")
relative_gain = False
constants[k_da] = (offset_map, mask, gain_map)
```
%% Cell type:code id: tags:
``` python
# Correct a chunk of images for offset and gain
def correct_train(wid, index, d):
d = d.astype(np.float32) # [cells, x, y]
g = gain[index]
# Jungfrau gains 0[00], 1[01], 3[11]
g[g==3] = 2
# Select memory cells
if memory_cells > 1:
"""
Even though it is correct to assume that memory cells pattern
can be the same across all trains (for one correction run
taken with one acquisition), it is preferred to not assume
this to account for exceptions that can happen.
"""
m = memcells[index].copy()
# 255 is a cell value pointing to no cell image data (image of 0 pixels).
# Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
# This line is depending on not storing the modified memory cells in the corrected data.
m[m==255] = 0
offset_map_cell = offset_map[m, ...] # [16 + empty cell, x, y]
mask_cell = mask[m, ...]
else:
offset_map_cell = offset_map
mask_cell = mask
# Offset correction
offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))
d -= offset
# Gain correction
if relative_gain:
if memory_cells > 1:
gain_map_cell = gain_map[m, ...]
else:
gain_map_cell = gain_map
cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
d /= cal
msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))
data_corr[index, ...] = d
mask_corr[index, ...] = msk
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
n_cpus = multiprocessing.cpu_count()
context = psh.context.ProcessContext(num_workers=n_cpus)
print(f"Using {n_cpus} workers for correction.")
```
%% Cell type:markdown id: tags:
### Correcting RAW data ###
%% Cell type:code id: tags:
``` python
fim_data = {}
gim_data = {}
rim_data = {}
msk_data = {}
# Loop over modules
for local_karabo_da, mapped_files_module in mapped_files.items():
instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
data_path = "INSTRUMENT/"+instrument_src_kda+"/data"
for sequence_file in mapped_files_module: # noqa
sequence_file = Path(sequence_file)
seq_dc = H5File(sequence_file)
# Save corrected data in an output file with name
# of corresponding raw sequence file.
ofile_name = sequence_file.name.replace("RAW", "CORR")
out_file = out_folder / ofile_name
# load shape of data for memory cells, and detector size (imgs, cells, x, y)
# dshape[0] = number of available images to correct.
dshape = seq_dc[instrument_src_kda, "data.adc"].shape
if dshape[0] == 0:
print(f"\t- WARNING: No image data for {ofile_name}: data shape is {dshape}")
continue
# load number of data available, including trains with empty data.
n_trains = len(seq_dc.train_ids)
n_imgs = dshape[0]
# For testing, only correct limit_images
if limit_images > 0:
n_imgs = min(n_imgs, limit_images)
print(f"\nNumber of images to correct: {n_imgs} for {ofile_name}")
if n_trains - dshape[0] != 0:
print(f"\t- WARNING: {sequence_file.name} has {n_trains - dshape[0]} "
"trains with empty data.")
# Just in case if n_imgs is less than the chosen plt_images.
plt_images = min(plt_images, n_imgs)
# load constants from the constants dictionary.
offset_map, mask, gain_map = constants[local_karabo_da]
# Allocate shared arrays.
data_corr = context.alloc(shape=dshape, dtype=np.float32)
mask_corr = context.alloc(shape=dshape, dtype=np.uint32)
step_timer.start()
seq_dc = seq_dc.select(
instrument_src_kda, "*", require_all=True).select_trains(np.s_[:n_imgs])
data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
rim_data[local_karabo_da] = data[:plt_images, 0, ...].copy()
context.map(correct_train, data)
step_timer.done_step(f'Correction time.')
step_timer.start()
# Create CORR files and add corrected data sources.
# Exclude raw data images (data/adc)
with h5py.File(out_file, 'w') as ofile:
# Copy RAW non-calibrated sources.
with h5py.File(sequence_file, 'r') as sfile:
h5_copy_except.h5_copy_except_paths(
sfile, ofile, [f"{data_path}/adc"])
# Create datasets with the available corrected data
ddset = ofile.create_dataset(
f"{data_path}/adc",
data=data_corr,
chunks=((1,) + dshape[1:]), # 1 chunk == 1 image
dtype=np.float32,
)
write_compressed_frames(
mask_corr,
ofile,
dataset_path=f"{data_path}/mask",
comp_threads=n_cpus,
)
step_timer.done_step(f'Saving data time.')
# Prepare plotting arrays
step_timer.start()
# TODO: Print out which cell is being selected for plotting
fim_data[local_karabo_da] = data_corr[:plt_images, 0, ...].copy()
msk_data[local_karabo_da] = mask_corr[:plt_images, 0, ...].copy()
gim_data[local_karabo_da] = gain[:plt_images, 0, ...].copy()
step_timer.done_step(f'Preparing plotting data of {plt_images} images.')
```
%% Cell type:markdown id: tags:
### Processing time summary ###
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
if skip_plots:
print('Skipping plots')
import sys
sys.exit(0)
```
%% Cell type:code id: tags:
``` python
def do_2d_plot(data, edges, y_axis, x_axis, title):
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
extent = [
np.min(edges[1]),
np.max(edges[1]),
np.min(edges[0]),
np.max(edges[0]),
]
im = ax.imshow(
data[::-1, :],
extent=extent,
aspect="auto",
norm=LogNorm(vmin=1, vmax=np.max(data))
)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_title(title)
cb = fig.colorbar(im)
cb.set_label("Counts")
```
%% Cell type:code id: tags:
``` python
db_modules = get_pdu_from_db(
karabo_id=karabo_id,
karabo_da=karabo_da,
constant=Constants.jungfrau.Offset(),
condition=condition,
cal_db_interface=cal_db_interface,
snapshot_at=creation_time)
```
%% Cell type:code id: tags:
``` python
for pdu, mod in zip(db_modules, karabo_da):
h, ex, ey = np.histogram2d(
rim_data[mod].flatten(),
gim_data[mod].flatten(),
bins=[100, 4],
range=[[0, 10000], [0, 4]],
)
do_2d_plot(
h, (ex, ey),
"Signal (ADU)",
"Gain Bit Value (high gain=0[00], medium gain=1[01], low gain=3[11])",
f"Module {mod} ({pdu})")
```
%% Cell type:markdown id: tags:
### Mean RAW Preview ###
The per pixel mean of the sequence file of RAW data
%% Cell type:code id: tags:
``` python
for pdu, mod in zip(db_modules, karabo_da):
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
np.mean(rim_data[mod],axis=0),
vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000),
cmap="jet")
ax.set_title(f'Module {mod} ({pdu})')
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Mean CORRECTED Preview ###
The per pixel mean of the sequence file of CORR data
%% Cell type:code id: tags:
``` python
for pdu, mod in zip(db_modules, karabo_da):
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
np.mean(fim_data[mod],axis=0),
vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100),
cmap="jet")
ax.set_title(f'Module {mod} ({pdu})', size=18)
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Single Train Preview ###
A single image from the first train
%% Cell type:code id: tags:
``` python
for pdu, mod in zip(db_modules, karabo_da):
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
fim_data[mod][0, ...],
vmin=min(0.75*np.median(fim_data[mod][0, ...]), -0.5),
vmax=max(2.*np.median(fim_data[mod][0, ...]), 100),
cmap="jet")
ax.set_title(f'Module {mod} ({pdu})', size=18)
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
## Signal Distribution ##
%% Cell type:code id: tags:
``` python
for pdu, mod in zip(db_modules, karabo_da):
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(211)
h = ax.hist(
fim_data[mod].flatten(),
bins=1000,
range=(-100, 1000),
log=True,
)
l = ax.set_xlabel("Signal (keV)")
l = ax.set_ylabel("Counts")
_ = ax.set_title(f'Module {mod} ({pdu})')
ax = fig.add_subplot(212)
h = ax.hist(
fim_data[mod].flatten(),
bins=1000,
range=(-1000, 10000),
log=True,
)
l = ax.set_xlabel("Signal (keV)")
l = ax.set_ylabel("Counts")
_ = ax.set_title(f'Module {mod} ({pdu})')
```
%% Cell type:markdown id: tags:
### Maximum GAIN Preview ###
The per pixel maximum of the first train of the GAIN data
%% Cell type:code id: tags:
``` python
for pdu, mod in zip(db_modules, karabo_da):
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
np.max(gim_data[mod], axis=0),
vmin=0, vmax=3, cmap="jet")
ax.set_title(f'Module {mod} ({pdu})', size=18)
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
## Bad Pixels ##
The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
%% Cell type:code id: tags:
``` python
table = []
for item in BadPixels:
table.append(
(item.name, f"{item.value:016b}"))
md = display(Latex(tabulate.tabulate(
table, tablefmt='latex',
headers=["Bad pixel type", "Bit mask"])))
```
%% Cell type:markdown id: tags:
### Single Image Bad Pixels ###
A single image bad pixel map for the first image of the first train
%% Cell type:code id: tags:
``` python
for pdu, mod in zip(db_modules, karabo_da):
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
np.log2(msk_data[mod][0,...]),
vmin=0, vmax=32, cmap="jet")
ax.set_title(f'Module {mod} ({pdu})', size=18)
cb = fig.colorbar(im, ax=ax)
```
Loading