Skip to content
Snippets Groups Projects

[Jungfrau][DARK][CORRECT] Feat/fixed gain jungfrau

Merged Karim Ahmed requested to merge feat/Fixed_gain_jungfrau into master
3 files
+ 59
64
Compare changes
  • Side-by-side
  • Inline
Files
3
%% Cell type:markdown id: tags:
# Jungfrau Offline Correction #
Author: European XFEL Detector Group, Version: 2.0
Offline Calibration for the Jungfrau Detector
%% Cell type:code id: tags:
``` python
in_folder = "/gpfs/exfel/exp/SPB/202130/p900204/raw" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/ahmedk/test/remove" # the folder to output to, required
run = 112 # run to process, required
sequences = [-1] # sequences to correct, set to [-1] for all, range allowed
sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
# Parameters used to access raw data.
karabo_id = "SPB_IRDA_JF4M" # karabo prefix of Jungfrau devices
karabo_da = ['JNGFR01', 'JNGFR02', 'JNGFR03', 'JNGFR04', 'JNGFR05', 'JNGFR06', 'JNGFR07', 'JNGFR08'] # data aggregators
receiver_template = "JNGFR{:02d}" # Detector receiver template for accessing raw data files. e.g. "JNGFR{:02d}"
path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # template to use for file name
instrument_source_template = '{}/DET/{}:daqOutput' # template for source name (filled with karabo_id & receiver_id). e.g. 'SPB_IRDA_JF4M/DET/JNGFR01:daqOutput'
ctrl_source_template = '{}/DET/CONTROL' # template for control source name (filled with karabo_id_control)
karabo_id_control = "" # if control is on a different ID, set to empty string if it is the same a karabo-id
karabo_da_control = "JNGFRCTRL00" # file inset for control data
# Parameters for calibration database.
use_dir_creation_date = True # use the creation data of the input dir for database queries
cal_db_interface = "tcp://max-exfl016:8017#8025" # the database interface to use
cal_db_timeout = 180000 # timeout on caldb requests
# Parameters affecting corrected data.
overwrite = True # set to True if existing data should be overwritten
relative_gain = True # do relative gain correction
plt_images = 100 # Number of images to plot after applying selected corrections.
limit_images = 0 # ONLY FOR TESTING. process only first N images, Use 0 to process all.
# Parameters for retrieving calibration constants
manual_slow_data = False # if true, use manually entered bias_voltage and integration_time values
integration_time = 4.96 # integration time in us, will be overwritten by value in file
gain_setting = 0 # 0 for dynamic gain, 1 for dynamic HG0, will be overwritten by value in file
mem_cells = 0 # leave memory cells equal 0, as it is saved in control information starting 2019.
bias_voltage = 180 # will be overwritten by value in file
# TODO: Remove
db_module = "" # ID of module in calibration database, this parameter is ignore in the notebook. TODO: remove from calibration_configurations.
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
import multiprocessing
import warnings
from functools import partial
from pathlib import Path
import h5py
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pasha as psh
import tabulate
from IPython.display import Latex, Markdown, display
from extra_data import H5File, RunDirectory
from matplotlib.colors import LogNorm
from cal_tools import h5_copy_except
from cal_tools.jungfraulib import JungfrauCtrl
from cal_tools.enums import BadPixels
from cal_tools.step_timing import StepTimer
from cal_tools.tools import (
get_constant_from_db_and_time,
get_dir_creation_date,
map_seq_files,
write_compressed_frames,
)
from iCalibrationDB import Conditions, Constants
warnings.filterwarnings('ignore')
matplotlib.use('agg')
%matplotlib inline
```
%% Cell type:code id: tags:
``` python
in_folder = Path(in_folder)
out_folder = Path(out_folder)
run_dc = RunDirectory(in_folder / f'r{run:04d}')
instrument_src = instrument_source_template.format(karabo_id, receiver_template)
if out_folder.exists() and not overwrite:
raise AttributeError("Output path exists! Exiting")
else:
out_folder.mkdir(parents=True, exist_ok=True)
print(f"Run is: {run}")
print(f"Instrument H5File source: {instrument_src}")
print(f"Process modules: {karabo_da}")
creation_time = None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
print(f"Using {creation_time} as creation time")
if karabo_id_control == "":
karabo_id_control = karabo_id
```
%% Cell type:code id: tags:
``` python
# Read available sequence files to correct.
mapped_files, num_seq_files = map_seq_files(
run_dc, karabo_id, karabo_da, sequences)
if not len(mapped_files):
raise IndexError(
"No sequence files available to correct for the selected sequences and karabo_da.")
```
%% Cell type:code id: tags:
``` python
print(f"Processing a total of {num_seq_files} sequence files")
table = []
fi = 0
if len(mapped_files) > 0: # create table
for kda, sfiles in mapped_files.items():
for k, f in enumerate(sfiles):
if k == 0:
table.append((fi, kda, k, f))
else:
table.append((fi, "", k, f))
fi += 1
md = display(Latex(tabulate.tabulate(
table, tablefmt='latex',
headers=["#", "module", "# module", "file"])))
```
%% Cell type:code id: tags:
``` python
ctrl_src = ctrl_source_template.format(karabo_id_control)
ctrl_data = JungfrauCtrl(run_dc, ctrl_src)
try:
this_run_mcells, sc_start = ctrl_data.get_memory_cells()
if this_run_mcells == 1:
memory_cells = 1
print("Run is in single cell mode.\n"
f"Storage cell start: {sc_start:02d}")
else:
memory_cells = 16
print(f"Run is in burst mode.\n"
f"Storage cell start: {sc_start:02d}")
except KeyError as e:
print("WARNING: KeyError while reading number of memory cells.") # noqa
if mem_cells == 0:
memory_cells = 1
else:
memory_cells = mem_cells
print(f"Set memory cells to {memory_cells}, as "
"it is not saved in control information.")
print("WARNING: KeyError while reading number of memory cells.")
if mem_cells == 0:
memory_cells = 1
else:
memory_cells = mem_cells
print(f"WARNING: Set memory cells to {memory_cells}, as "
"it is not saved in control information.")
if not manual_slow_data:
integration_time = ctrl_data.get_integration_time()
bias_voltage = ctrl_data.get_bias_voltage()
gain_setting = ctrl_data.get_gain_setting()
gain_mode = ctrl_data.get_gain_mode()
print(f"Integration time is {integration_time} us")
print(f"Gain setting is {gain_setting} (run settings: "
f"{ctrl_data.run_settings.value if ctrl_data.run_settings else ctrl_data.run_settings})") # noqa
print(f"Gain mode is {gain_mode}")
print(f"Bias voltage is {bias_voltage} V")
print(f"Number of memory cells are {memory_cells}")
```
%% Cell type:markdown id: tags:
### Retrieving calibration constants ###
%% Cell type:code id: tags:
``` python
condition = Conditions.Dark.jungfrau(
memory_cells=memory_cells,
bias_voltage=bias_voltage,
integration_time=integration_time,
gain_setting=gain_setting,
gain_mode=gain_mode,
)
def get_constants_for_module(karabo_da: str):
""" Get calibration constants for given module of Jungfrau
:return:
offset_map (offset map),
mask (mask of bad pixels),
gain_map (map of relative gain factors),
db_module (name of DB module),
when (dictionaty: constant - creation time)
"""
when = {}
retrieval_function = partial(
get_constant_from_db_and_time,
karabo_id=karabo_id,
karabo_da=karabo_da,
condition=condition,
cal_db_interface=cal_db_interface,
creation_time=creation_time,
timeout=cal_db_timeout,
print_once=False,
)
offset_map, when["Offset"] = retrieval_function(
constant=Constants.jungfrau.Offset(),
empty_constant=np.zeros((1024, 512, 1, 3))
)
mask, when["BadPixelsDark"] = retrieval_function(
constant=Constants.jungfrau.BadPixelsDark(),
empty_constant=np.zeros((1024, 512, 1, 3)),
)
mask_ff, when["BadPixelsFF"] = retrieval_function(
constant=Constants.jungfrau.BadPixelsFF(),
empty_constant=None
)
gain_map, when["Gain"] = retrieval_function(
constant=Constants.jungfrau.RelativeGain(),
empty_constant=None
)
# combine masks
if mask_ff is not None:
mask |= np.moveaxis(mask_ff, 0, 1)
# move from x,y,cell,gain to cell,x,y,gain
offset_map = np.squeeze(offset_map)
mask = np.squeeze(mask)
if memory_cells > 1:
offset_map = np.moveaxis(np.moveaxis(offset_map, 0, 2), 0, 2)
mask = np.moveaxis(np.moveaxis(mask, 0, 2), 0, 2)
if gain_map is not None:
if memory_cells > 1:
gain_map = np.moveaxis(np.moveaxis(gain_map, 0, 2), 0, 1)
else:
gain_map = np.squeeze(gain_map)
gain_map = np.moveaxis(gain_map, 1, 0)
return offset_map, mask, gain_map, karabo_da, when
with multiprocessing.Pool() as pool:
r = pool.map(get_constants_for_module, karabo_da)
# Print timestamps for the retrieved constants.
constants = {}
for offset_map, mask, gain_map, k_da, when in r:
print(f'Constants for module {k_da}:')
for const in when:
print(f' {const} injected at {when[const]}')
if gain_map is None:
print("No gain map found")
relative_gain = False
constants[k_da] = (offset_map, mask, gain_map)
```
%% Cell type:code id: tags:
``` python
# Correct a chunk of images for offset and gain
def correct_train(wid, index, d):
d = d.astype(np.float32)
d = d.astype(np.float32) # [2, x, y]
g = gain[index]
m = memcells[index]
g[g==3] = 2
if 0 <= index < plt_images:
r_data[index, ...] = d[0, ...]
# Select memory cells
# TODO: This needs to be revisited.
# As this result in copying data to a new array on every train,
# even when there's the same pattern of memory cells on every train.
if memory_cells > 1:
m[m>16] = 0
m[m>16] = 0 # TODO: this is wrong and needs to be updated with burst mode.
# For an invalid image a memory cell of 255 is set.
# These images doesn't need to be processed.
offset_map_cell = offset_map[m, ...]
mask_cell = mask[m, ...]
else:
offset_map_cell = offset_map
mask_cell = mask
# Offset correction
offset = np.choose(g, np.moveaxis(offset_map_cell, -1, 0))
d -= offset
# Gain correction
if relative_gain:
if memory_cells > 1:
gain_map_cell = gain_map[m, ...]
else:
gain_map_cell = gain_map
cal = np.choose(g, np.moveaxis(gain_map_cell, -1, 0))
d /= cal
msk = np.choose(g, np.moveaxis(mask_cell, -1, 0))
# Store sample of data for plotting
if 0 <= index < plt_images:
if memory_cells == 1:
g_data[index, ...] = g
else:
g_data[index, ...] = g[1, ...]
data_corr[index, ...] = d
mask_corr[index, ...] = msk
```
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
n_cpus = multiprocessing.cpu_count()
context = psh.context.ProcessContext(num_workers=n_cpus)
print(f"Using {n_cpus} workers for correction.")
```
%% Cell type:markdown id: tags:
### Correcting RAW data ###
%% Cell type:code id: tags:
``` python
fim_data = {}
gim_data = {}
rim_data = {}
msk_data = {}
# Loop over modules
for local_karabo_da, mapped_files_module in mapped_files.items():
instrument_src_kda = instrument_src.format(int(local_karabo_da[-2:]))
data_path = "INSTRUMENT/"+instrument_src_kda+"/data"
for sequence_file in mapped_files_module: # noqa
sequence_file = Path(sequence_file)
seq_dc = H5File(sequence_file)
# Save corrected data in an output file with name
# of corresponding raw sequence file.
out_file = out_folder / sequence_file.name.replace("RAW", "CORR")
ofile_name = sequence_file.name.replace("RAW", "CORR")
out_file = out_folder / ofile_name
# load shape of data for memory cells, and detector size (imgs, cells, x, y)
# dshape[0] = number of available images to correct.
dshape = seq_dc[instrument_src_kda, "data.adc"].shape
if dshape[0] == 0:
print(f"\t- WARNING: No image data for {out_file}: data shape is {dshape}")
print(f"\t- WARNING: No image data for {ofile_name}: data shape is {dshape}")
continue
sensor_size = dshape[1:]
# load number of data available, including trains with empty data.
n_trains = seq_dc.get_data_counts(instrument_src_kda, "data.adc").shape[0]
n_imgs = dshape[0]
# For testing, only correct limit_images
if limit_images > 0:
n_imgs = min(n_imgs, limit_images)
print(f"\nNumber of images to correct: {n_imgs} for {out_file}")
print(f"\nNumber of images to correct: {n_imgs} for {ofile_name}")
if n_trains - dshape[0] != 0:
print(f"\t- WARNING: {sequence_file} has {n_trains - dshape[0]} "
print(f"\t- WARNING: {sequence_file.name} has {n_trains - dshape[0]} "
"trains with empty data.")
# Just in case if n_imgs is less than the chosen plt_images.
plt_images = min(plt_images, n_imgs)
# load constants from the constants dictionary.
offset_map, mask, gain_map = constants[local_karabo_da]
# Allocate shared arrays.
# Shared arrays for plotting.
g_data = context.alloc(
shape=(plt_images, dshape[2], dshape[3]), dtype=np.uint8)
r_data = context.alloc(
shape=(plt_images, dshape[2], dshape[3]), dtype=np.float32)
# shared arrays for corrected data.
data_corr = context.alloc(shape=dshape, dtype=np.float32)
mask_corr = context.alloc(shape=dshape, dtype=np.uint32)
step_timer.start()
seq_dc = seq_dc.select(
instrument_src_kda, "*", require_all=True).select_trains(np.s_[:n_imgs])
data = seq_dc[instrument_src_kda, "data.adc"].ndarray()
gain = seq_dc[instrument_src_kda, "data.gain"].ndarray()
memcells = seq_dc[instrument_src_kda, "data.memoryCell"].ndarray()
context.map(correct_train, data)
step_timer.done_step(f'Correction time.')
step_timer.start()
# Create CORR files and add corrected data sources.
# Exclude raw data images (data/adc)
with h5py.File(out_file, 'w') as ofile:
# Copy RAW non-calibrated sources.
with h5py.File(sequence_file, 'r') as sfile:
h5_copy_except.h5_copy_except_paths(
sfile, ofile, [f"{data_path}/adc"])
# Create datasets with the available corrected data
ddset = ofile.create_dataset(
f"{data_path}/adc",
data=data_corr,
chunks=((1,) + dshape[1:]), # 1 chunk == 1 image
dtype=np.float32,
)
write_compressed_frames(
mask_corr,
ofile,
dataset_path=f"{data_path}/mask",
comp_threads=n_cpus,
)
step_timer.done_step(f'Saving data time.')
# Prepare plotting arrays
step_timer.start()
if memory_cells == 1:
fim_data[local_karabo_da] = data_corr[:plt_images, ...].copy()
msk_data[local_karabo_da] = mask_corr[:plt_images, ...].copy()
else:
fim_data[local_karabo_da] = data_corr[:plt_images, 1, ...].copy()
msk_data[local_karabo_da] = mask_corr[:plt_images, 1, ...].copy()
gim_data[local_karabo_da] = g_data
rim_data[local_karabo_da] = r_data
step_timer.done_step(f'Preparing plotting data of {plt_images} images:.')
```
%% Cell type:markdown id: tags:
### Processing time summary ###
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:code id: tags:
``` python
def do_2d_plot(data, edges, y_axis, x_axis, title):
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
extent = [
np.min(edges[1]),
np.max(edges[1]),
np.min(edges[0]),
np.max(edges[0]),
]
im = ax.imshow(
data[::-1, :],
extent=extent,
aspect="auto",
norm=LogNorm(vmin=1, vmax=np.max(data))
)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_title(title)
cb = fig.colorbar(im)
cb.set_label("Counts")
```
%% Cell type:code id: tags:
``` python
for mod in rim_data:
h, ex, ey = np.histogram2d(
rim_data[mod].flatten(),
gim_data[mod].flatten(),
bins=[100, 4],
range=[[0, 10000], [0, 4]],
)
do_2d_plot(
h, (ex, ey),
"Signal (ADU)",
"Gain Bit Value",
f"Module {mod}")
```
%% Cell type:markdown id: tags:
### Mean RAW Preview ###
The per pixel mean of the sequence file of RAW data
%% Cell type:code id: tags:
``` python
for mod in rim_data:
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
np.mean(rim_data[mod],axis=0),
vmin=min(0.75*np.median(rim_data[mod][rim_data[mod] > 0]), 2000),
vmax=max(1.5*np.median(rim_data[mod][rim_data[mod] > 0]), 16000),
cmap="jet")
ax.set_title(f'Module {mod}')
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Mean CORRECTED Preview ###
The per pixel mean of the sequence file of CORR data
%% Cell type:code id: tags:
``` python
for mod in rim_data:
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
np.mean(fim_data[mod],axis=0),
vmin=min(0.75*np.median(fim_data[mod][fim_data[mod] > 0]), -0.5),
vmax=max(2.*np.median(fim_data[mod][fim_data[mod] > 0]), 100),
cmap="jet")
ax.set_title(f'Module {mod}', size=18)
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Single Train Preview ###
A single image from the first train
%% Cell type:code id: tags:
``` python
for mod in rim_data:
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
fim_data[mod][0, ...],
vmin=min(0.75*np.median(fim_data[mod][0, ...]), -0.5),
vmax=max(2.*np.median(fim_data[mod][0, ...]), 100),
cmap="jet")
ax.set_title(f'Module {mod}', size=18)
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
## Signal Distribution ##
%% Cell type:code id: tags:
``` python
for mod in rim_data:
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(211)
h = ax.hist(
fim_data[mod].flatten(),
bins=1000,
range=(-100, 1000),
log=True,
)
l = ax.set_xlabel("Signal (keV)")
l = ax.set_ylabel("Counts")
_ = ax.set_title(f'Module {mod}')
ax = fig.add_subplot(212)
h = ax.hist(
fim_data[mod].flatten(),
bins=1000,
range=(-1000, 10000),
log=True,
)
l = ax.set_xlabel("Signal (keV)")
l = ax.set_ylabel("Counts")
_ = ax.set_title(f'Module {mod}')
```
%% Cell type:markdown id: tags:
### Maximum GAIN Preview ###
The per pixel maximum of the first train of the GAIN data
%% Cell type:code id: tags:
``` python
for mod in rim_data:
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
np.max(gim_data[mod], axis=0),
vmin=0, vmax=3, cmap="jet")
ax.set_title(f'Module {mod}', size=18)
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
## Bad Pixels ##
The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
%% Cell type:code id: tags:
``` python
table = []
for item in BadPixels:
table.append(
(item.name, f"{item.value:016b}"))
md = display(Latex(tabulate.tabulate(
table, tablefmt='latex',
headers=["Bad pixel type", "Bit mask"])))
```
%% Cell type:markdown id: tags:
### Single Image Bad Pixels ###
A single image bad pixel map for the first image of the first train
%% Cell type:code id: tags:
``` python
for mod in rim_data:
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)
im = ax.imshow(
np.log2(msk_data[mod][0,...]),
vmin=0, vmax=32, cmap="jet")
ax.set_title(f'Module {mod}', size=18)
cb = fig.colorbar(im, ax=ax)
```
Loading