Skip to content
Snippets Groups Projects
Commit ab6138dc authored by Karim Ahmed's avatar Karim Ahmed
Browse files

FF_ALL for tests

parent 50538d7d
No related branches found
No related tags found
1 merge request!841[JUNGFRAU][FF] Feat: new notebook for producing gain constants.
%% Cell type:markdown id: tags:
# Histogram of single photon spectra
Author: European XFEL Detector Group, Version: 1.0
Creates histograms from flat fields.
Histograms are on a pixel-by-pixel, memory cell by memory cell basis to save memory and space, histogram range are to be optimized around the desired peak.
%% Cell type:code id: tags:
``` python
in_folder = '/gpfs/exfel/exp/HED/202231/p900297/raw'
out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/FFDATA/HED/p900297/gain_fit'
metadata_folder = '' # Directory containing calibration_metadata.yml when run by xfel-calibrate
g0_runs = [26, ] # can be a list of runs
sensor_size = [512, 1024] # size of the array in the 'row' and 'col' dimensions
chunked_trains = 1000 # Number of trains per chunk.
gains = [0, 1, 2] # gain bit values
karabo_id = 'HED_IA1_JF500K1' # karabo prefix of Jungfrau devices
karabo_da = ["JNGFR01"]
da_template = 'JNGFR{:02d}'
creation_time = "" # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"
bias_voltage = -1 # bias voltage - Set to -1 to derive from CONTROL file.
integration_time = -1 # integration time - Set to -1 to derive from CONTROL file.
gain_mode = -1 # number of memory cells - Set to -1 to derive from CONTROL file.
gain_setting = -1
memory_cells = -1 # number of memory cells - Set to -1 to derive from CONTROL file.
sc_start = -1 # storage cell start value - should be derived from CONTROL file
h_bins_s = 0.2 # bin width in ADC units of the histogram
h_range = (-5., 10.) # histogram range in ADC units
block_size = [128, 64] # dimension of the chunks in 'row' and 'col'
det_src_template = '{}/DET/{}:daqOutput'
control_src_template = '{}/DET/CONTROL'
extra_dims = ['cell', 'row', 'col'] # labels for the DataArray dims after the first
fit_path = '/gpfs/exfel/data/scratch/mramilli/DATA/MID/p900322/gain_fit' # fit files directory
gain_map_file = '/gainMaps_M109_Burst_Fix_20230523.h5' # path
_fit_func = 'CHARGE_SHARING' # function used to fit the single photon peak
mode = 'Burst' # TODO: CONFIRM MY DECISION TO REMOVE MODE DECLARATION FROM ALL NOTEBOOKS.
adc_fit = True
is_strixel = False
# Condition limits
bias_voltage_lims = [0, 200]
integration_time_lims = [0.1, 1000]
g_map_old = '' # '/gpfs/exfel/data/user/mramilli/jungfrau/module_PSI_gainmaps/M302/gainMaps_M302_2022-02-23.h5' # old gain map file path to calculate gain ratios G0/G1 and G1/G2. Set to "" to get last gain constant from the database.
old_gain_dataset_name = 'gain_map_g0' # name of the data structure in the old gain map
correct_offset = True # correct the photon peak value with a pedestal fit position
db_output = False
send_bpix = False
gain_map_name = 'gain_map_g0'
g0_name_new = 'gainMap_fit' # name of the data structure in the fit files
E_ph = 8.048 # photon energy of the peak fitted
badpixel_threshold_sigma = 3. # number of std in gain distribution to mark bad pixels
_roi = [0, 1024, 0, 256] # ROI to consider to evaluate gain distribution (for bad pixels evaluation)
# CALCAT API parameters
cal_db_interface = "tcp://max-exfl-cal-001:8020" # the database interface to use
cal_db_timeout = 180000
```
%% Cell type:code id: tags:
``` python
import gc
import multiprocessing
import numpy as np
import time
import os
from h5py import File as h5file
from functools import partial
from logging import warning
import pasha as psh
import cal_tools.restful_config as rest_cfg
from XFELDetAna.detectors.jungfrau.util import count_n_files
from extra_data import open_run, H5File, RunDirectory
from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
from cal_tools.jungfrau import jungfrau_ff
from cal_tools.jungfraulib import JungfrauCtrl
from cal_tools.tools import calcat_creation_time
from cal_tools.step_timing import StepTimer
from pathlib import Path
```
%% Cell type:code id: tags:
``` python
in_folder = Path(in_folder)
out_folder = Path(out_folder)
out_folder.mkdir(exist_ok=True, parents=True)
print(f'Creating directory {out_folder}')
```
%% Cell type:code id: tags:
``` python
det_src = det_src_template.format(karabo_id, karabo_da[0])
control_src = control_src_template.format(karabo_id, karabo_da[0])
da_name = karabo_da[0] # TODO fix this
```
%% Cell type:code id: tags:
``` python
begin_stuff = time.localtime()
first_run_folder = in_folder / f'r{g0_runs[0]:04d}'
ctrl_data = JungfrauCtrl(RunDirectory(first_run_folder), control_src)
if memory_cells < 0 and sc_start < 0:
memory_cells, sc_start = ctrl_data.get_memory_cells()
_mode = 'Burst'
if memory_cells == 1:
_mode = 'Single'
fout_temp = f'R{g0_runs[0]:04d}_Gain_{_mode}_Spectra_{da_name}'
# Run's creation time:
creation_time = calcat_creation_time(in_folder, g0_runs[0], creation_time)
print(f"Creation time: {creation_time}")
print("Chunked trains: ", chunked_trains)
```
%% Cell type:markdown id: tags:
### Creating empty histogram
%% Cell type:code id: tags:
``` python
h_n_bins = int((h_range[1] - h_range[0])/h_bins_s)
h_bins = np.linspace(h_range[0], h_range[1], h_n_bins)
h_spectra = np.zeros((h_n_bins-1, memory_cells, sensor_size[0], sensor_size[1]), dtype=np.int32)
edges = None
```
%% Cell type:code id: tags:
``` python
# def read_conditions_and_retrieve_constants(
# run_dc,
# run_number,
# integration_time,
# bias_voltage,
# gain_mode,
# creation_time,
# ):
# """Retrieve offset and noise calibration constants
# for one jungfrau module.
# Args:
# run_dc(extra_data.DataCollection):
# run_number(int):
# integration_time(float)
# bias_voltage(float):
# gain_mode(int):
# creation_time():
# Returns:
# ndarray, ndarray: Offset and Noise calibration constants.
# """
run_folder = in_folder / f'r{g0_runs[0]:04d}' # first run for now
run_number = g0_runs[0]
## Retrieve DB constants
ctrl_data = JungfrauCtrl(RunDirectory(run_folder), control_src)
if integration_time < 0:
integration_time = ctrl_data.get_integration_time()
if bias_voltage < 0:
bias_voltage = ctrl_data.get_bias_voltage()
# gain_setting = ctrl_data.get_gain_setting() TODO: Where is the gain_setting?
if gain_mode < 0:
gain_mode = ctrl_data.get_gain_mode()
def read_detector_conditions(
run_dc,
run_number,
bias_voltage,
memory_cells,
integration_time,
gain_mode,
gain_setting,
):
"""Get parameter conditions for a run.
Args:
run_dc(extra_data.DataCollection):
run_number(int):
integration_time(float)
bias_voltage(float):
gain_mode(int):
gain_setting(int):
Returns:
float, int, float, int, int, int
"""
## Retrieve DB constants
ctrl_data = JungfrauCtrl(run_dc, control_src)
if memory_cells < 0:
memory_cells, sc_start = ctrl_data.get_memory_cells()
if integration_time < 0:
integration_time = ctrl_data.get_integration_time()
if bias_voltage < 0:
bias_voltage = ctrl_data.get_bias_voltage()
# gain_setting = ctrl_data.get_gain_setting() TODO: Where is the gain_setting?
if gain_mode < 0:
gain_mode = ctrl_data.get_gain_mode()
return (
bias_voltage,
memory_cells,
sc_start,
integration_time,
gain_mode,
gain_setting,
run_number,
)
condition_lists = {
"integration_time": [],
"memory_cells": [],
"sc_start": [],
"bias_voltage": [],
"gain_setting": [],
"gain_mode": [],
"runs": [],
}
for run in g0_runs:
voltage, cells, sc_start, integration, mode, setting, run_number = read_detector_conditions( # noqa
RunDirectory(in_folder / f"r{run:04d}"),
run,
bias_voltage=bias_voltage,
memory_cells=memory_cells,
integration_time=integration_time,
gain_mode=gain_mode,
gain_setting=gain_setting)
condition_lists["bias_voltage"].append(voltage)
condition_lists["memory_cells"].append(cells)
condition_lists["integration_time"].append(integration)
condition_lists["gain_mode"].append(mode)
condition_lists["gain_setting"].append(setting)
condition_lists["sc_start"].append(sc_start)
# Validate run conditions
runs_results = condition_lists["runs"]
for c, lst in condition_lists.items():
if c == "runs":
continue
if not all(val == lst[0] for val in lst):
warning(
f"{c} is not the same for all runs: {lst} "
f"for runs {runs_results}, respectively"
)
# TODO: clarify this way in getting the control data.
# We are using RUN. We can of course update and start using `as_single_value` method.
memory_cells = condition_lists["memory_cells"][0]
sc_start = condition_lists["sc_start"][0]
print(f"Memory cells: {memory_cells:d}")
print(f"Exposure Time: {integration_time:1.2f} us")
bias_voltage = condition_lists["bias_voltage"][0]
print(f"Bias Voltage: {bias_voltage:3.1f} V")
integration_time = condition_lists["integration_time"][0]
print(f"Exposure Time: {integration_time:1.2f} us")
gain_mode = condition_lists["gain_mode"][0]
print(f"Gain mode: {gain_mode}")
gain_setting = condition_lists["gain_setting"][0]
print(f"Gain setting: {gain_setting}")
# Run's creation time:
creation_time = calcat_creation_time(in_folder, run_number, creation_time)
print(f"Creation time: {creation_time}")
_mode = 'Burst'
if memory_cells == 1:
_mode = 'Single'
fout_temp = f'R{g0_runs[0]:04d}_Gain_{_mode}_Spectra_{da_name}'
```
%% Cell type:code id: tags:
``` python
# Retrieve calibration constants
jf_cal = JUNGFRAU_CalibrationData(
detector_name=karabo_id,
sensor_bias_voltage=bias_voltage,
event_at=creation_time,
modules=karabo_da[0],
modules=karabo_da,
memory_cells=memory_cells,
integration_time=integration_time,
# No gain setting 1? I use 0 for now as I don't see gain_setting used anywhere for the 3 notebooks.
gain_setting=0,
gain_mode=gain_mode,
client=rest_cfg.calibration_client(),
)
jf_metadata = jf_cal.metadata(calibrations=["Offset10Hz", "Noise10Hz"])
jf_metadata = jf_cal.metadata(
calibrations=["Offset10Hz", "Noise10Hz"])
# TODO: display CCV timestamp
const_data = jf_cal.ndarray_map(metadata=jf_metadata)
constants_data = const_data[karabo_da[0]]
if "Offset10Hz" in constants_data:
offset_map = np.array(
np.transpose(np.swapaxes(constants_data["Offset10Hz"], 0, 1)),
dtype=np.float32
)
print(f'Retrieved Offset Map {offset_map.shape}')
else:
# Create offset empty constant.
offset_map = np.zeros((3, memory_cells, 512, 1024), dtype=np.float32)
print("Offset map not found")
if "Noise10Hz" in constants_data:
noise_map = np.array(
np.transpose(np.swapaxes(constants_data["Noise10Hz"], 0, 1)),
dtype=np.float32
)
print(f'Retrieved Noise Map {noise_map.shape}')
else:
noise_map = None
print("Noise map not found")
if memory_cells > 1:
# move from x, y, cell, gain to cell, x, y, gain
offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
else:
offset_map = np.squeeze(offset_map)
for mod in karabo_da:
constants_data = const_data[mod]
if "Offset10Hz" in constants_data:
constants_data["Offset10Hz"] = np.array(
np.transpose(np.swapaxes(constants_data["Offset10Hz"], 0, 1)),
dtype=np.float32
)
print(
f'Retrieved Offset Map {constants_data["Noise10Hz"].shape}')
else:
# Create offset empty constant.
constants_data["Offset10Hz"] = np.zeros(
(3, memory_cells, 512, 1024),
dtype=np.float32,
)
warning(f"Offset map is not found for {mod}")
if "Noise10Hz" in constants_data:
constants_data["Noise10Hz"] = np.array(
np.transpose(np.swapaxes(constants_data["Noise10Hz"], 0, 1)),
dtype=np.float32
)
print(
f'Retrieved Noise Map {constants_data["Noise10Hz"].shape} for {mod}')
else:
constants_data["Noise10Hz"] = None
print(f"Noise map is not found for {mod}")
# return offset, noise
if memory_cells > 1:
# move from x, y, cell, gain to cell, x, y, gain
offset_map = np.moveaxis(offset_map, [0, 1], [1, 2])
else:
offset_map = np.squeeze(offset_map)
```
%% Cell type:code id: tags:
``` python
def correct_train(wid, index, d):
"""Offset correct per train."""
g = gain[index].values
# Jungfrau gains 0[00], 1[01], 3[11]
# Change low gain to 2 for indexing purposes.
g[g==3] = 2
# Select memory cells
if memory_cells > 1:
"""
Even though it is correct to assume that memory cells pattern
can be the same across all trains (for one correction run
taken with one acquisition), it is preferred to not assume
this to account for exceptions that can happen.
"""
m = memcells[index].copy()
# 255 is a cell value pointing to no cell image data (image of 0 pixels).
# Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
# This line is depending on not storing the modified memory cells in the corrected data.
m[m==255] = 0
offset_map_cell = offset_map[m, ...] # [16 + empty cell, x, y]
else:
offset_map_cell = offset_map
# Offset correction
offset = np.choose(g, offset_map_cell)
d -= offset
data_corr[index, ...] = d
```
%% Cell type:markdown id: tags:
## Filling the histogram
* first it will retreive DB offsets for a given run
* on a file-by-file basis it will perform offset subtraction and fill the histogram
%% Cell type:code id: tags:
``` python
step_timer = StepTimer()
n_cpus = multiprocessing.cpu_count()
context = psh.context.ProcessContext(num_workers=n_cpus)
print(f"Using {n_cpus} workers for correction.")
```
%% Cell type:code id: tags:
``` python
selection = {
det_src: {
'data.adc', 'data.gain', 'data.memoryCell', 'data.trainId',
},
control_src: {
'exposureTime.timestamp', 'exposureTime.value', 'vHighVoltage.value',
}
}
for g0_run in g0_runs:
raw_fname_tmp = f'/RAW-R{g0_run:04d}-{da_name}' + '-S{:05d}.h5'
run_folder = in_folder / f'r{g0_run:04d}'
run_dc = RunDirectory(run_folder)
run_dc.info()
run_dc = run_dc.select(selection)
# offset_map, noise_map = read_conditions_and_retrieve_constants(
# run_dc=run_dc,
# run_number=g0_run,
# integration_time=integration_time,
# bias_voltage=bias_voltage,
# gain_mode=gain_mode,
# creation_time=creation_time,
# )
offset_map, noise_map = retrieve_calibration_constants(
run_number=g0_run,
integration_time=integration_time,
bias_voltage=bias_voltage,
gain_mode=gain_mode,
creation_time=creation_time,
)
## Offset subtraction & Histogram filling
### performs offset subtraction and fills the histogram
### looping on individual files because single photon runs can be very large
for dc_chunk in RunDirectory(
run_folder, include=f"*{da_name}*"
).split_trains(trains_per_part=chunked_trains):
trains = dc_chunk.get_array(det_src, 'data.trainId')
cellid = dc_chunk.get_array(
det_src,
'data.memoryCell',
extra_dims=extra_dims[0:1]).astype(np.int16)
# cellid = remove_duplicates(cellid) # TODO: where is this function.
# TODO: What is this for?
if memory_cells == 1:
cellid -= sc_start
adc = dc_chunk.get_array(det_src, 'data.adc', extra_dims=extra_dims).astype(np.float32)
gain = dc_chunk.get_array(det_src, 'data.gain', extra_dims=extra_dims)
# gain = gain.where(gain < 2, other = 2).astype(np.int16)
step_timer.start()
# Allocate shared arrays for corrected data. Used in `correct_train()`
data_corr = context.alloc(shape=adc.shape, dtype=np.float32)
context.map(correct_train, adc)
step_timer.done_step("correct_train")
step_timer.start()
# Create histogram`
chunks = jungfrau_ff.chunk_Multi([adc,], block_size)
with multiprocessing.Pool() as pool:
partial_fill = partial(jungfrau_ff.fill_histogram, h_bins)
r_maps = pool.map(partial_fill, chunks)
for i, r in enumerate(r_maps):
h_chk, e_chk = r
if edges is None:
edges = np.array(e_chk)
n_blocks_col = int(h_spectra.shape[-1]/block_size[1])
irow = int(np.floor(i/n_blocks_col)) * block_size[0]
icol = i%n_blocks_col * block_size[1]
h_spectra[..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk
step_timer.done_step("Create histogram")
x = (edges[1:] + edges[:-1])/2.
```
%% Cell type:code id: tags:
``` python
print(f"Total processing time {step_timer.timespan():.01f} s")
step_timer.print_summary()
```
%% Cell type:markdown id: tags:
### Saving histogram
%% Cell type:code id: tags:
``` python
fout_h_path = f'{out_folder}/{fout_temp}_Histo.h5'
hists = np.transpose(h_spectra) # this is to make it compatible with my other notebooks, can be removed
with h5file(fout_h_path, 'w') as fout_h:
print(f"Saving histograms at {fout_h_path}.")
dset_h = fout_h.create_dataset("histos", data=hists)
dset_e = fout_h.create_dataset("edges", data=edges)
# TODO: why save this and not retrieve it again later.
# One can also save the file path instead somewhere and open it in the other notebooks.
dset_noi = fout_h.create_dataset("noise_map", data=np.transpose(noise_map))
fout_h.attrs["integration_time"] = integration_time
fout_h.attrs["bias_voltage"] = bias_voltage
fout_h.attrs["creation_time"] = str(creation_time)
```
......
......@@ -206,8 +206,8 @@ notebooks = {
"FF_HISTS": {
"notebook":
"notebooks/Jungfrau/gainCal_JF_Create_Spectra_Histos.ipynb",
# "dep_notebooks": [
# "notebooks/Jungfrau/gainCal_JF_Fit_Spectra_Histos.ipynb"],
"dep_notebooks": [
"notebooks/Jungfrau/gainCal_JF_Fit_Spectra_Histos.ipynb"],
"concurrency": {"parameter": None,
"default concurrency": None,
"cluster cores": 4},
......@@ -221,6 +221,17 @@ notebooks = {
"default concurrency": None,
"cluster cores": 4},
},
"FF_ALL": {
"notebook":
"notebooks/Jungfrau/gainCal_JF_Create_Spectra_Histos.ipynb",
"dep_notebooks": [
"notebooks/Jungfrau/gainCal_JF_Fit_Spectra_Histos.ipynb",
"notebooks/Jungfrau/create_gain_map.ipynb",
"notebooks/Jungfrau/gainCal_JF_Fit_sendDB_NBC.ipynb",],
"concurrency": {"parameter": None,
"default concurrency": None,
"cluster cores": 4},
},
},
"GOTTHARD2": {
"CORRECT": {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment