Skip to content
Snippets Groups Projects
Commit 14bd0d49 authored by Philipp Schmidt's avatar Philipp Schmidt
Browse files

Improve plotting details after gain fix

parent 4c60cc43
No related branches found
No related tags found
1 merge request!672[LPD] New correction notebook
%% Cell type:markdown id: tags:
# LPD Offline Correction #
Author: European XFEL Data Analysis Group
%% Cell type:code id: tags:
``` python
# Input parameters
in_folder = "/gpfs/exfel/exp/FXE/202201/p003073/raw/" # the folder to read data from, required
out_folder = "/gpfs/exfel/data/scratch/schmidtp/random/LPD_test" # the folder to output to, required
metadata_folder = '' # Directory containing calibration_metadata.yml when run by xfel-calibrate.
sequences = [0] # sequences to correct, set to empty for all, range allowed
modules = '' # modules to correct, set to -1 for all, range allowed, used only when karabo_da is empty
run = 10 # runs to process, required
karabo_id = "FXE_DET_LPD1M-1" # karabo karabo_id
karabo_da = '' # a list of data aggregators names, Default empty string for selecting all data aggregators
input_source = '{karabo_id}/DET/{module_index}CH0:xtdf' # Input fast data source.
output_source = '' # Output fast data source, empty to use same as input
# CalCat parameters
use_dir_creation_date = True # use the creation date of the directory for database time derivation
cal_db_root = '/gpfs/exfel/d/cal/caldb_store'
# Operating conditions
mem_cells = 512 # Memory cells.
bias_voltage = 250.0 # Detector bias voltage.
capacitor = '5pF' # Capacitor setting: 5pF or 50pF
photon_energy = 9.2 # Photon energy in kEv.
category = 0 # Whom to blame.
# Correction parameters.
offset_corr = True # Offset correction.
rel_gain = True # Gain correction based on RelativeGain constant.
ff_map = True # Gain correction based on FFMap constant.
gain_amp_map = True # Gain correction based on GainAmpMap constant.
# Output options
overwrite = True # set to True if existing data should be overwritten
sequences_per_node = 1 # sequence files to process per node
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
from collections import OrderedDict
from pathlib import Path
from time import perf_counter
import gc
import re
import warnings
import numpy as np
import h5py
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
%matplotlib inline
from calibration_client import CalibrationClient
from calibration_client.modules import CalibrationConstantVersion
import extra_data as xd
import extra_geom as xg
import pasha as psh
from extra_data.components import LPD1M
from cal_tools.enums import BadPixels
from cal_tools.lpdalgs import correct_lpd_frames
from cal_tools.tools import CalibrationMetadata, get_dir_creation_date, write_compressed_frames
from cal_tools.h5_copy_except import h5_copy_except_paths
from cal_tools.files import DataFile
from cal_tools.restful_config import restful_config
```
%% Cell type:markdown id: tags:
# Prepare environment
%% Cell type:code id: tags:
``` python
file_re = re.compile(r'^RAW-R(\d{4})-(\w+\d+)-S(\d{5})$') # This should probably move to cal_tools
out_folder = Path(out_folder)
out_folder.mkdir(exist_ok=True)
output_source = output_source or input_source
cal_db_root = Path(cal_db_root)
metadata = CalibrationMetadata(metadata_folder or out_folder)
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
else:
from datetime import datetime
creation_time = datetime.now()
print(f'Using {creation_time.isoformat()} as creation time')
# Pick all modules/aggregators or those selected.
if not karabo_da:
if not modules:
modules = list(range(16))
karabo_da = [f'LPD{i:02d}' for i in modules]
# Pick all sequences or those selected.
if not sequences:
do_sequence = lambda seq: True
else:
do_sequence = [int(x) for x in sequences].__contains__
# List of detector sources.
det_inp_sources = [input_source.format(karabo_id=karabo_id, module_index=int(da[-2:])) for da in karabo_da]
```
%% Cell type:markdown id: tags:
# Select data to process
%% Cell type:code id: tags:
``` python
# Open the entire input directory.
dc_all = xd.RunDirectory(Path(in_folder) / f'r{run:04d}') \
.select([(src, 'image.*') for src in det_inp_sources])
out_folder = Path(out_folder)
data_to_process = []
for file_access in dc_all.files:
match = file_re.match(Path(file_access.filename).stem)
if match[2] not in karabo_da or not do_sequence(int(match[3])):
continue
outp_path = out_folder / 'CORR-R{run:04d}-{aggregator}-S{seq:05d}.h5'.format(
run=int(match[1]), aggregator=match[2], seq=int(match[3]))
data_to_process.append((file_access.filename, match[2], outp_path))
print('Files to process:')
for data_descr in sorted(data_to_process, key=lambda x: f'{x[1]}{x[0]}'):
print('\t'.join(data_descr[1::-1]))
```
%% Cell type:markdown id: tags:
# Obtain and prepare calibration constants
%% Cell type:code id: tags:
``` python
# Connect to CalCat.
calcat_config = restful_config['calcat']
client = CalibrationClient(
client_id=calcat_config['user-id'],
client_secret=calcat_config['user-secret'],
user_email=calcat_config['user-email'],
base_api_url=calcat_config['base-api-url'],
token_url=calcat_config['token-url'],
refresh_url=calcat_config['refresh-url'],
auth_url=calcat_config['auth-url'],
scope='')
```
%% Cell type:code id: tags:
``` python
dark_calibrations = {
1: 'Offset', # np.float32
14: 'BadPixelsDark' # should be np.uint32, but is np.float64
}
dark_condition = [
dict(parameter_id=1, value=bias_voltage), # Sensor bias voltage
dict(parameter_id=7, value=mem_cells), # Memory cells
dict(parameter_id=15, value=capacitor), # Feedback capacitor
dict(parameter_id=13, value=256), # Pixels X
dict(parameter_id=14, value=256), # Pixels Y
]
illuminated_calibrations = {
20: 'BadPixelsFF', # np.uint32
42: 'GainAmpMap', # np.float32
43: 'FFMap', # np.float32
44: 'RelativeGain' # np.float32
}
illuminated_condition = dark_condition.copy()
illuminated_condition += [
dict(parameter_id=3, value=photon_energy), # Source energy
dict(parameter_id=25, value=category) # category
]
const_data = {}
const_load_mp = psh.ProcessContext(num_workers=24)
print('Querying calibration database', end='', flush=True)
start = perf_counter()
for calibrations, condition in [
(dark_calibrations, dark_condition),
(illuminated_calibrations, illuminated_condition)
]:
resp = CalibrationConstantVersion.get_closest_by_time_by_detector_conditions(
client, 'FXE_DET_LPD1M-1', list(calibrations.keys()),
{'parameters_conditions_attributes': condition},
karabo_da='', event_at=None, snapshot_at=None)
if not resp['success']:
raise RuntimeError(resp)
for ccv in resp['data']:
cc = ccv['calibration_constant']
da = ccv['physical_detector_unit']['karabo_da']
calibration_name = calibrations[cc['calibration_id']]
dtype = np.uint32 if calibration_name.startswith('BadPixels') else np.float32
const_data[(da, calibration_name)] = dict(
path=Path(ccv['path_to_file']) / ccv['file_name'],
dataset=ccv['data_set_name'],
data=const_load_mp.alloc(shape=(256, 256, mem_cells, 3), dtype=dtype)
)
print('.', end='', flush=True)
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
```
%% Cell type:code id: tags:
``` python
def load_constant_dataset(wid, index, const_descr):
ccv_entry = const_data[const_descr]
with h5py.File(cal_db_root / ccv_entry['path'], 'r') as fp:
fp[ccv_entry['dataset'] + '/data'].read_direct(ccv_entry['data'])
print('.', end='', flush=True)
print('Loading calibration data', end='', flush=True)
start = perf_counter()
const_load_mp.map(load_constant_dataset, list(const_data.keys()))
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
```
%% Cell type:code id: tags:
``` python
ccv_offsets = {}
ccv_gains = {}
ccv_masks = {}
def prepare_constants(wid, index, aggregator):
consts = {calibration_name: entry['data']
for (aggregator_, calibration_name), entry
in const_data.items()
if aggregator == aggregator_}
if offset_corr and 'Offset' in consts:
ccv_offsets[aggregator] = consts['Offset'].astype(np.float32)
else:
ccv_offsets[aggregator] = np.zeros((256, 256, mem_cells, 3), dtype=np.float32)
ccv_gains[aggregator] = np.ones((256, 256, mem_cells, 3), dtype=np.float32)
if 'BadPixelsDark' in consts:
ccv_masks[aggregator] = consts['BadPixelsDark'].astype(np.uint32)
else:
ccv_masks[aggregator] = np.zeros((256, 256, mem_cells, 3), dtype=np.uint32)
if rel_gain and 'RelativeGain' in consts:
ccv_gains[aggregator] *= consts['RelativeGain']
if ff_map and 'FFMap' in consts:
ccv_gains[aggregator] *= consts['FFMap']
if 'BadPixelsFF' in consts:
np.bitwise_or(ccv_masks[aggregator], consts['BadPixelsFF'], out=ccv_masks[aggregator])
if gain_amp_map and 'GainAmpMap' in consts:
ccv_gains[aggregator] *= consts['GainAmpMap']
print('.', end='', flush=True)
print('Preparing constants', end='', flush=True)
start = perf_counter()
psh.ThreadContext(num_workers=len(karabo_da)).map(prepare_constants, karabo_da)
total_time = perf_counter() - start
print(f'{total_time:.1f}s')
const_data.clear() # Clear raw constants data now to save memory.
gc.collect();
```
%% Cell type:code id: tags:
``` python
def correct_file(wid, index, work):
filename, aggregator, outp_path = work
module_index = int(aggregator[-2:])
start = perf_counter()
dc = xd.H5File(filename).select('*', 'image.*', require_all=True)
inp_source = dc[input_source.format(karabo_id=karabo_id, module_index=module_index)]
open_time = perf_counter() - start
# Load raw data for this file.
start = perf_counter()
in_data = inp_source['image.data'].ndarray().squeeze()
in_cell = inp_source['image.cellId'].ndarray().squeeze()
in_pulse = inp_source['image.pulseId'].ndarray().squeeze()
read_time = perf_counter() - start
# Allocate output arrays.
out_pixels = np.zeros((in_data.shape[0], 256, 256), dtype=np.float32)
out_gain = np.zeros((in_data.shape[0], 256, 256), dtype=np.uint8)
out_mask = np.zeros((in_data.shape[0], 256, 256), dtype=np.uint32)
start = perf_counter()
correct_lpd_frames(in_data, in_cell,
out_pixels, out_gain, out_mask,
ccv_offsets[aggregator], ccv_gains[aggregator], ccv_masks[aggregator],
num_threads=num_threads_per_worker)
correct_time = perf_counter() - start
image_counts = inp_source['image.data'].data_counts(labelled=False)
start = perf_counter()
if (not outp_path.exists() or overwrite) and image_counts.sum() > 0:
fa = dc.files[0]
sel_trains = np.isin(fa.train_ids, dc.train_ids)
outp_source_name = output_source.format(karabo_id=karabo_id, module_index=module_index)
DataFile.instrument_source_pattern = re.compile(r'^[\w\/-]+:\w+$')
with DataFile(outp_path, 'w') as outp_file:
outp_file.create_index(
train_ids=dc.train_ids,
timestamp=fa.file['INDEX/timestamp'][sel_trains],
flag=fa.validity_flag[sel_trains])
outp_source = outp_file.create_instrument_source(outp_source_name)
outp_source.create_index(image=image_counts)
outp_source.create_key('image.data', data=out_pixels)
outp_source.create_key('image.cellId', data=in_cell)
outp_source.create_key('image.pulseId', data=in_pulse)
write_compressed_frames(
out_gain, outp_file, f'INSTRUMENT/{outp_source_name}/image/gain', comp_threads=8)
write_compressed_frames(
out_mask, outp_file, f'INSTRUMENT/{outp_source_name}/image/mask', comp_threads=8)
outp_file.create_metadata(like=dc)
write_time = perf_counter() - start
total_time = open_time + read_time + correct_time + write_time
frame_rate = in_data.shape[0] / total_time
print('{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{}\t{:.1f}'.format(
wid, aggregator, open_time, read_time, correct_time, write_time, total_time,
in_data.shape[0], frame_rate))
in_data = None
in_cell = None
in_pulse = None
out_pixels = None
out_gain = None
out_mask = None
gc.collect()
# 8/18, 479s
# 10/18, 464s
num_workers = 5
num_threads_per_worker = 24
print('worker\tDA\topen\tread\tcorrect\twrite\ttotal\tframes\trate')
start = perf_counter()
psh.ProcessContext(num_workers=num_workers).map(correct_file, data_to_process)
total_time = perf_counter() - start
print(f'Total time: {total_time:.1f}s')
```
%% Cell type:markdown id: tags:
# Data preview for first train
%% Cell type:code id: tags:
``` python
geom = xg.LPD_1MGeometry.from_quad_positions(
[(11.4, 299), (-11.5, 8), (254.5, -16), (278.5, 275)])
output_paths = [outp_path for _, _, outp_path in data_to_process if outp_path.exists()]
dc = xd.DataCollection.from_paths(output_paths).select_trains(np.s_[0])
LPD1M._source_re = re.compile(r'(?P<detname>.+_LPD1M.*)\/(?:DET|CORR)\/(?P<modno>\d+)CH')
det = LPD1M(dc, detector_name=karabo_id)
data = det.get_array('image.data')
```
%% Cell type:markdown id: tags:
### Intensity histogram across all cells
%% Cell type:code id: tags:
``` python
left_edge_ratio = 0.01
right_edge_ratio = 0.99
left_edge_ratio = 0.02
right_edge_ratio = 0.98
fig, ax = plt.subplots(num=1, clear=True, figsize=(15, 6))
values, bins, _ = ax.hist(np.ravel(data.data), bins=500, range=(-5000, 20000))
values, bins, _ = ax.hist(np.ravel(data.data), bins=500, range=(-5000, 10000))
def find_nearest_index(array, value):
return (np.abs(array - value)).argmin()
cum_values = np.cumsum(values)
vmin = bins[find_nearest_index(cum_values, cum_values[-1]*left_edge_ratio)]
vmax = bins[find_nearest_index(cum_values, cum_values[-1]*right_edge_ratio)]
max_value = values.max()
ax.vlines([vmin, vmax], 0, max_value, color='red', linewidth=5, alpha=0.2)
ax.text(vmin, max_value, f'{left_edge_ratio*100:.0f}%',
color='red', ha='center', va='bottom', size='large')
ax.text(vmax, max_value, f'{right_edge_ratio*100:.0f}%',
color='red', ha='center', va='bottom', size='large')
ax.text(vmax+(vmax-vmin)*0.01, max_value/2, 'Colormap interval',
color='red', rotation=90, ha='left', va='center', size='x-large')
ax.set_xlim(-5000, 20000)
ax.set_ylim(0, max_value*1.1)
pass
```
%% Cell type:markdown id: tags:
### First memory cell
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(num=2, figsize=(15, 15), clear=True, nrows=1, ncols=1)
geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax);
geom.plot_data_fast(data[:, 0, 0], ax=ax, vmin=vmin, vmax=vmax)
pass
```
%% Cell type:markdown id: tags:
### Train average
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(num=3, figsize=(15, 15), clear=True, nrows=1, ncols=1)
geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax);
geom.plot_data_fast(data[:, 0].mean(axis=1), ax=ax, vmin=vmin, vmax=vmax)
pass
```
%% Cell type:markdown id: tags:
### Lowest gain stage per pixel
%% Cell type:code id: tags:
``` python
highest_gain_stage = det.get_array('image.gain', pulses=np.s_[:]).max(axis=(1, 2))
```
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(num=4, figsize=(15, 15), clear=True, nrows=1, ncols=1)
p = geom.plot_data_fast(highest_gain_stage, ax=ax, vmin=0, vmax=2);
cb = ax.images[0].colorbar
cb.set_ticks([0, 1, 2])
cb.set_ticklabels(['High gain', 'Medium gain', 'Low gain'])
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment