Skip to content
Snippets Groups Projects
Commit 4b0eb541 authored by Karim Ahmed's avatar Karim Ahmed
Browse files

Merge branch 'feat/dssc_deviation_memcells' into 'master'

[DSSC][Dark][Correct] Remove options check for num_cells and move it into dssclib and add deviation

See merge request !927
parents 98f01423 4bdd91bc
No related branches found
No related tags found
1 merge request!927[DSSC][Dark][Correct] Remove options check for num_cells and move it into dssclib and add deviation
%% Cell type:markdown id: tags:
# DSSC Characterize Dark Images #
Author: S. Hauf, Version: 0.1
The following code analyzes a set of dark images taken with the DSSC detector to deduce detector offsets and noise. Data for the detector is presented in one run and don't acquire multiple gain stages.
The notebook explicitely does what pyDetLib provides in its offset calculation method for streaming data.
%% Cell type:code id: tags:
``` python
cluster_profile = "noDB" # The ipcluster profile to use
in_folder = "/gpfs/exfel/exp/SQS/202131/p900210/raw" # path to input data, required
out_folder = "/gpfs/exfel/data/scratch/samartse/data/DSSC" # path to output to, required
metadata_folder = "" # Directory containing calibration_metadata.yml when run by xfel-calibrate
sequences = [0] # sequence files to evaluate.
modules = [-1] # modules to run for
run = 20 #run number in which data was recorded, required
karabo_id = "SQS_DET_DSSC1M-1" # karabo karabo_id
karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators
receiver_id = "{}CH0" # inset for receiver devices
path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
slow_data_pattern = 'RAW-R{}-DA{}-S00000.h5'
use_dir_creation_date = True # use the dir creation date for determining the creation time
cal_db_interface = "tcp://max-exfl-cal001:8020" # the database interface to use
cal_db_timeout = 3000000 # timeout on caldb requests"
local_output = True # output constants locally
db_output = False # output constants to database
mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
bias_voltage = 100 # detector bias voltage
rawversion = 2 # RAW file format version
thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
thresholds_offset_hard = [4, 125] # thresholds in absolute ADU terms for offset deduced bad pixels,
# minimal threshold at 4 is set at hardware level, DSSC full range 0-511
thresholds_noise_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels
thresholds_noise_hard = [0.001, 3] # thresholds in absolute ADU terms for offset deduced bad pixels
offset_numpy_algorithm = "mean"
high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h
slow_data_aggregators = [1,1,1,1] # quadrant/aggregator
slow_data_path = 'SQS_NQS_DSSC/FPGA/PPT_Q'
operation_mode = '' # Detector operation mode, optional
```
%% Cell type:code id: tags:
``` python
import os
import warnings
# imports and things that do not usually need to be changed
from datetime import datetime
warnings.filterwarnings('ignore')
from collections import OrderedDict
import h5py
import matplotlib
from ipyparallel import Client
from IPython.display import Latex, Markdown, display
matplotlib.use('agg')
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tabulate
import yaml
from iCalibrationDB import Conditions, Constants, Detectors, Versions
from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
from cal_tools.enums import BadPixels
from cal_tools.plotting import (
create_constant_overview,
plot_badpix_3d,
show_overview,
show_processed_modules,
)
from cal_tools.tools import (
get_dir_creation_date,
get_from_db,
get_notebook_name,
get_pdu_from_db,
get_random_db_interface,
get_report,
map_gain_stages,
parse_runs,
run_prop_seq_from_path,
save_const_to_h5,
send_to_db,
)
view = Client(profile=cluster_profile)[:]
view.use_dill()
# make sure a cluster is running with ipcluster start --n=32, give it a while to start
h5path = h5path.format(karabo_id, receiver_id)
h5path_idx = h5path_idx.format(karabo_id, receiver_id)
gain_names = ['High', 'Medium', 'Low']
if karabo_da[0] == '-1':
if modules[0] == -1:
modules = list(range(16))
karabo_da = ["DSSC{:02d}".format(i) for i in modules]
else:
modules = [int(x[-2:]) for x in karabo_da]
max_cells = mem_cells
offset_runs = OrderedDict()
offset_runs["high"] = run
creation_time=None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
print(f"Using {creation_time} as creation time of constant.")
run, prop, seq = run_prop_seq_from_path(in_folder)
dinstance = "DSSC1M1"
print(f"Detector in use is {karabo_id}")
cal_db_interface = get_random_db_interface(cal_db_interface)
```
%% Cell type:code id: tags:
``` python
print("Parameters are:")
print(f"Proposal: {prop}")
print(f"Memory cells: {mem_cells}/{max_cells}")
print("Runs: {}".format([ v for v in offset_runs.values()]))
print(f"Sequences: {sequences}")
print(f"Using DB: {db_output}")
print(f"Input: {in_folder}")
print(f"Output: {out_folder}")
print(f"Bias voltage: {bias_voltage}V")
file_loc = f'proposal:{prop} runs:{[ v for v in offset_runs.values()][0]}'
report = get_report(metadata_folder)
```
%% Cell type:markdown id: tags:
The following lines will create a queue of files which will the be executed module-parallel. Distinguishing between different gains.
%% Cell type:code id: tags:
``` python
# set everything up filewise
os.makedirs(out_folder, exist_ok=True)
gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)
gain_mapped_files, total_sequences, total_file_size = gmf
print(f"Will process a total of {total_sequences} file.")
```
%% Cell type:markdown id: tags:
## Calculate Offsets, Noise and Thresholds ##
The calculation is performed per-pixel and per-memory-cell. Offsets are simply the median value for a set of dark data taken at a given gain, noise the standard deviation, and gain-bit values the medians of the gain array.
%% Cell type:code id: tags:
``` python
import copy
from functools import partial
def characterize_module(cells, bp_thresh, rawversion, karabo_id, h5path, h5path_idx, inp):
import copy
import h5py
import numpy as np
from cal_tools.enums import BadPixels
def get_num_cells(fname, h5path):
with h5py.File(fname, "r") as f:
cells = f[f"{h5path}/cellId"][()]
if cells == []:
return
maxcell = np.max(cells)
options = [100, 200, 400, 500, 600, 700, 800, 900]
dists = np.array([(o-maxcell) for o in options])
dists[dists<0] = 10000 # assure to always go higher
return options[np.argmin(dists)]
from cal_tools.dssclib import get_num_cells
filename, channel = inp
h5path = h5path.format(channel)
h5path_idx = h5path_idx.format(channel)
if cells == 0:
cells = get_num_cells(filename, h5path)
if cells is None:
raise ValueError(f"ERROR! Empty image data file for channel {channel}")
print(f"Using {cells} memory cells")
pulseid_checksum = None
thresholds_offset_hard, thresholds_offset_sigma, thresholds_noise_hard, thresholds_noise_sigma = bp_thresh
infile = h5py.File(filename, "r")
if rawversion == 2:
count = np.squeeze(infile[f"{h5path_idx}/count"])
first = np.squeeze(infile[f"{h5path_idx}/first"])
last_index = int(first[count != 0][-1]+count[count != 0][-1])
first_index = int(first[count != 0][0])
else:
status = np.squeeze(infile[f"{h5path_idx}/status"])
if np.count_nonzero(status != 0) == 0:
return
last = np.squeeze(infile[f"{h5path_idx}/last"])
first = np.squeeze(infile[f"{h5path_idx}/first"])
last_index = int(last[status != 0][-1]) + 1
first_index = int(first[status != 0][0])
im = np.array(infile[f"{h5path}/data"][first_index:last_index,...])
cellIds = np.squeeze(infile[f"{h5path}/cellId"][first_index:last_index,...])
infile.close()
pulseid_checksum = get_pulseid_checksum(filename, h5path, h5path_idx)
im = im[:, 0, ...].astype(np.float32)
im = np.rollaxis(im, 2)
im = np.rollaxis(im, 2, 1)
mcells = cells
offset = np.zeros((im.shape[0], im.shape[1], mcells), dtype = np.float64)
noise = np.zeros((im.shape[0], im.shape[1], mcells), dtype = np.float64)
for cc in np.unique(cellIds[cellIds < mcells]):
cellidx = cellIds == cc
if offset_numpy_algorithm == "mean":
offset[...,cc] = np.mean(im[..., cellidx], axis=2)
else:
offset[...,cc] = np.median(im[..., cellidx], axis=2)
noise[...,cc] = np.std(im[..., cellidx], axis=2)
# bad pixels
bp = np.zeros(offset.shape, np.uint32)
# offset related bad pixels
offset_mn = np.nanmedian(offset, axis=(0,1))
offset_std = np.nanstd(offset, axis=(0,1))
bp[(offset < offset_mn-thresholds_offset_sigma*offset_std) |
(offset > offset_mn+thresholds_offset_sigma*offset_std)] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
bp[(offset < thresholds_offset_hard[0]) | (offset > thresholds_offset_hard[1])] |= BadPixels.OFFSET_OUT_OF_THRESHOLD.value
bp[~np.isfinite(offset)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
# noise related bad pixels
noise_mn = np.nanmedian(noise, axis=(0,1))
noise_std = np.nanstd(noise, axis=(0,1))
bp[(noise < noise_mn-thresholds_noise_sigma*noise_std) |
(noise > noise_mn+thresholds_noise_sigma*noise_std)] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
bp[(noise < thresholds_noise_hard[0]) | (noise > thresholds_noise_hard[1])] |= BadPixels.NOISE_OUT_OF_THRESHOLD.value
bp[~np.isfinite(noise)] |= BadPixels.OFFSET_NOISE_EVAL_ERROR.value
return offset, noise, bp, cells, pulseid_checksum
offset_g = OrderedDict()
noise_g = OrderedDict()
gain_g = OrderedDict()
badpix_g = OrderedDict()
gg = 0
start = datetime.now()
all_cells = []
checksums = {}
try:
tGain, encodedGain, operatingFreq = get_dssc_ctrl_data(in_folder + "/r{:04d}/".format(offset_runs["high"]),
slow_data_pattern,
slow_data_aggregators,
offset_runs["high"], slow_data_path)
except IOError:
print("ERROR: Couldn't access slow data to read tGain, encodedGain, and operatingFreq \n")
for gain, mapped_files in gain_mapped_files.items():
inp = []
dones = []
for i in modules:
qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
if qm in mapped_files and not mapped_files[qm].empty():
fname_in = mapped_files[qm].get()
print("Process file: ", fname_in)
dones.append(mapped_files[qm].empty())
else:
continue
inp.append((fname_in, i))
p = partial(characterize_module, max_cells,
(thresholds_offset_hard, thresholds_offset_sigma,
thresholds_noise_hard, thresholds_noise_sigma), rawversion, karabo_id, h5path, h5path_idx)
results = list(map(p, inp))
for ii, r in enumerate(results):
i = modules[ii]
offset, noise, bp, thiscell, pulseid_checksum = r
all_cells.append(thiscell)
qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
if qm not in offset_g:
offset_g[qm] = np.zeros((offset.shape[0], offset.shape[1], offset.shape[2]))
noise_g[qm] = np.zeros_like(offset_g[qm])
badpix_g[qm] = np.zeros_like(offset_g[qm], np.uint32)
checksums[qm] = pulseid_checksum
offset_g[qm][...] = offset
noise_g[qm][...] = noise
badpix_g[qm][...] = bp
gg +=1
if len(all_cells) > 0:
max_cells = np.max(all_cells)
print(f"Using {max_cells} memory cells")
else:
raise ValueError("0 processed memory cells. No raw data available.")
```
%% Cell type:code id: tags:
``` python
# TODO: add db_module when received from myMDC
# Create the modules dict of karabo_das and PDUs
qm_dict = OrderedDict()
for i, k_da in zip(modules, karabo_da):
qm = f"Q{i//4+1}M{i%4+1}"
qm_dict[qm] = {"karabo_da": k_da,
"db_module": ""}
```
%% Cell type:code id: tags:
``` python
# Retrieve existing constants for comparison
clist = ["Offset", "Noise"]
old_const = {}
old_mdata = {}
print('Retrieve pre-existing constants for comparison.')
for qm in offset_g.keys():
old_const[qm] = {}
old_mdata[qm] = {}
qm_db = qm_dict[qm]
karabo_da = qm_db["karabo_da"]
for const in clist:
dconst =getattr(Constants.DSSC, const)()
condition = Conditions.Dark.DSSC(memory_cells=max_cells,
bias_voltage=bias_voltage,
pulseid_checksum=checksums[qm],
acquisition_rate=operatingFreq[qm],
target_gain=tGain[qm],
encoded_gain=encodedGain[qm])
# This should be used in case of running notebook
# by a different method other than myMDC which already
# sends CalCat info.
# TODO: Set db_module to "" by default in the first cell
if not qm_db["db_module"]:
qm_db["db_module"] = get_pdu_from_db(karabo_id, karabo_da, dconst,
condition, cal_db_interface,
snapshot_at=creation_time)[0]
data, mdata = get_from_db(karabo_id, karabo_da,
dconst,
condition,
None,
cal_db_interface, creation_time=creation_time,
verbosity=2, timeout=cal_db_timeout)
old_const[qm][const] = data
if mdata is None or data is None:
old_mdata[qm][const] = {
"timestamp": "Not found",
"filepath": None,
"h5path": None
}
else:
old_mdata[qm][const] = {
"timestamp": mdata.calibration_constant_version.begin_at.isoformat(),
"filepath": os.path.join(
mdata.calibration_constant_version.hdf5path,
mdata.calibration_constant_version.filename,
),
"h5path": mdata.calibration_constant_version.h5path,
}
with open(f"{out_folder}/module_metadata_{qm}.yml", "w") as fd:
yaml.safe_dump(
{"module": qm, "pdu": qm_db["db_module"], "old-constants": old_mdata[qm]},
fd,
)
```
%% Cell type:code id: tags:
``` python
res = OrderedDict()
for i in modules:
qm = f"Q{i//4+1}M{i%4+1}"
try:
res[qm] = {'Offset': offset_g[qm],
'Noise': noise_g[qm],
}
except Exception as e:
print(f"Error: No constants for {qm}: {e}")
```
%% Cell type:code id: tags:
``` python
# Push the same constant two different times.
# One with the generated pulseID check sum setting for the offline calibration.
# And another for the online calibration as it doesn't have this pulseID checksum, yet.
md = None
for dont_use_pulseIds in [True, False]:
for qm in res.keys():
karabo_da = qm_dict[qm]["karabo_da"]
db_module = qm_dict[qm]["db_module"]
for const in res[qm].keys():
dconst = getattr(Constants.DSSC, const)()
dconst.data = res[qm][const]
opfreq = None if dont_use_pulseIds else operatingFreq[qm]
targetgain = None if dont_use_pulseIds else tGain[qm]
encodedgain = None if dont_use_pulseIds else encodedGain[qm]
pidsum = None if dont_use_pulseIds else checksums[qm]
# set the operating condition
condition = Conditions.Dark.DSSC(memory_cells=max_cells,
bias_voltage=bias_voltage,
pulseid_checksum=pidsum,
acquisition_rate=opfreq,
target_gain=targetgain,
encoded_gain=encodedgain)
for parm in condition.parameters:
if parm.name == "Memory cells":
parm.lower_deviation = max_cells
parm.upper_deviation = 0
if db_output:
md = send_to_db(db_module, karabo_id, dconst, condition, file_loc, report,
cal_db_interface, creation_time=creation_time, timeout=cal_db_timeout)
if local_output and dont_use_pulseIds: # Don't save constant localy two times.
md = save_const_to_h5(db_module, karabo_id, dconst, condition,
dconst.data, file_loc, report,
creation_time, out_folder)
print(f"Calibration constant {const} is stored locally.\n")
if not dont_use_pulseIds:
print("Constants parameter conditions are:\n")
print(f"• memory_cells: {max_cells}\n• bias_voltage: {bias_voltage}\n"
f"• pulseid_checksum: {pidsum}\n• acquisition_rate: {opfreq}\n"
f"• target_gain: {targetgain}\n• encoded_gain: {encodedgain}\n"
f"• creation_time: {creation_time}\n")
```
%% Cell type:code id: tags:
``` python
mnames = []
for i in modules:
qm = f"Q{i//4+1}M{i % 4+1}"
display(Markdown(f'## Position of the module {qm} and its ASICs##'))
mnames.append(qm)
show_processed_modules(dinstance=dinstance, constants=None, mnames=mnames, mode="position")
```
%% Cell type:markdown id: tags:
## Single-Cell Overviews ##
Single cell overviews allow to identify potential effects on all memory cells, e.g. on sensor level. Additionally, they should serve as a first sanity check on expected behaviour, e.g. if structuring on the ASIC level is visible in the offsets, but otherwise no immediate artifacts are visible.
%% Cell type:code id: tags:
``` python
cell = 9
gain = 0
out_folder = None
show_overview(res, cell, gain, out_folder=out_folder, infix="_{}".format(run))
```
%% Cell type:code id: tags:
``` python
cols = {BadPixels.NOISE_OUT_OF_THRESHOLD.value: (BadPixels.NOISE_OUT_OF_THRESHOLD.name, '#FF000080'),
BadPixels.OFFSET_NOISE_EVAL_ERROR.value: (BadPixels.OFFSET_NOISE_EVAL_ERROR.name, '#0000FF80'),
BadPixels.OFFSET_OUT_OF_THRESHOLD.value: (BadPixels.OFFSET_OUT_OF_THRESHOLD.name, '#00FF0080'),
BadPixels.OFFSET_OUT_OF_THRESHOLD.value | BadPixels.NOISE_OUT_OF_THRESHOLD.value: ('MIXED', '#DD00DD80')}
if high_res_badpix_3d:
display(Markdown("""
## Global Bad Pixel Behaviour ##
The following plots show the results of bad pixel evaluation for all evaluated memory cells.
Cells are stacked in the Z-dimension, while pixels values in x/y are rebinned with a factor of 2.
This excludes single bad pixels present only in disconnected pixels.
Hence, any bad pixels spanning at least 4 pixels in the x/y-plane, or across at least two memory cells are indicated.
Colors encode the bad pixel type, or mixed type.
"""))
# set rebin_fac to 1 for avoiding rebining and
# losing real values of badpixels(High resolution).
gain = 0
for mod, data in badpix_g.items():
plot_badpix_3d(data, cols, title=mod, rebin_fac=2)
plt.show()
```
%% Cell type:markdown id: tags:
## Aggregate values, and per Cell behaviour ##
The following tables and plots give an overview of statistical aggregates for each constant, as well as per cell behavior.
%% Cell type:code id: tags:
``` python
create_constant_overview(offset_g, "Offset (ADU)", max_cells, entries=1)
```
%% Cell type:code id: tags:
``` python
create_constant_overview(noise_g, "Noise (ADU)", max_cells, 0, 100, entries=1)
```
%% Cell type:code id: tags:
``` python
bad_pixel_aggregate_g = OrderedDict()
for m, d in badpix_g.items():
bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)
create_constant_overview(bad_pixel_aggregate_g, "Bad pixel fraction", max_cells, entries=1)
```
%% Cell type:markdown id: tags:
## Summary tables ##
The following tables show summary information for the evaluated module. Values for currently evaluated constants are compared with values for pre-existing constants retrieved from the calibration database.
%% Cell type:code id: tags:
``` python
time_summary = []
for qm, qm_data in old_mdata.items():
time_summary.append(f"The following pre-existing constants are used for comparison for module {qm}:")
for const, const_data in qm_data.items():
time_summary.append(f"- {const} created at {const_data['timestamp']}")
display(Markdown("\n".join(time_summary)))
```
%% Cell type:code id: tags:
``` python
header = ['Parameter',
"New constant", "Old constant ",
"New constant", "Old constant ",
"New constant", "Old constant "]
for const in ['Offset', 'Noise']:
table = [['','High gain', 'High gain']]
for qm in res.keys():
data = np.copy(res[qm][const])
if old_const[qm][const] is not None:
dataold = np.copy(old_const[qm][const])
f_list = [np.nanmedian, np.nanmean, np.nanstd, np.nanmin, np.nanmax]
n_list = ['Median', 'Mean', 'Std', 'Min', 'Max']
for i, f in enumerate(f_list):
line = [n_list[i]]
line.append('{:6.1f}'.format(f(data[...,gain])))
if old_const[qm][const] is not None:
line.append('{:6.1f}'.format(f(dataold[...,gain])))
else:
line.append('-')
table.append(line)
display(Markdown('### {} [ADU], good and bad pixels ###'.format(const)))
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=header)))
```
......
%% Cell type:markdown id: tags:
# DSSC Offline Correction #
Author: European XFEL Detector Group, Version: 1.0
Offline Calibration for the DSSC Detector
%% Cell type:code id: tags:
``` python
cluster_profile = "noDB" # The ipcluster profile to use
in_folder = "/gpfs/exfel/exp/SQS/202131/p900210/raw" # path to input data, required
out_folder = "/gpfs/exfel/data/scratch/samartse/data/DSSC" # path to output to, required
sequences = [-1] # sequence files to evaluate.
modules = [-1] # modules to correct, set to -1 for all, range allowed
run = 20 #runs to process, required
karabo_id = "SQS_DET_DSSC1M-1" # karabo karabo_id
karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators
receiver_id = "{}CH0" # inset for receiver devices
path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data
h5path = 'INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images
slow_data_pattern = 'RAW-R{}-DA{}-S00000.h5'
use_dir_creation_date = True # use the creation data of the input dir for database queries
cal_db_interface = "tcp://max-exfl-cal001:8020#8025" # the database interface to use
cal_db_timeout = 300000 # in milli seconds
mem_cells = 0 # number of memory cells used, set to 0 to automatically infer
overwrite = True # set to True if existing data should be overwritten
max_pulses = 800 # maximum number of pulses per train
bias_voltage = 100 # detector bias voltage
sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel
chunk_size_idim = 1 # chunking size of imaging dimension, adjust if user software is sensitive to this.
mask_noisy_asic = 0.25 # set to a value other than 0 and below 1 to mask entire ADC if fraction of noisy pixels is above
mask_cold_asic = 0.25 # mask cold ASICS if number of pixels with negligable standard deviation is larger than this fraction
noisy_pix_threshold = 1. # threshold above which ap pixel is considered noisy.
geo_file = "/gpfs/exfel/data/scratch/xcal/dssc_geo_june19.h5" # detector geometry file
dinstance = "DSSC1M1"
slow_data_aggregators = [1,2,3,4] #quadrant/aggregator
slow_data_path = 'SQS_NQS_DSSC/FPGA/PPT_Q'
def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):
from xfel_calibrate.calibrate import balance_sequences as bs
return bs(in_folder, run, sequences, sequences_per_node, karabo_da)
```
%% Cell type:code id: tags:
``` python
# make sure a cluster is running with ipcluster start --n=32, give it a while to start
import os
import sys
from collections import OrderedDict
import h5py
import matplotlib
import numpy as np
matplotlib.use("agg")
import matplotlib.pyplot as plt
from ipyparallel import Client
from IPython.display import Latex, Markdown, display
print(f"Connecting to profile {cluster_profile}")
view = Client(profile=cluster_profile)[:]
view.use_dill()
from datetime import timedelta
from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
from cal_tools.tools import (
get_constant_from_db,
get_dir_creation_date,
get_notebook_name,
map_modules_from_folder,
parse_runs,
run_prop_seq_from_path,
)
from dateutil import parser
from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions
```
%% Cell type:code id: tags:
``` python
creation_time = None
if use_dir_creation_date:
creation_time = get_dir_creation_date(in_folder, run)
print(f"Using {creation_time} as creation time")
if sequences[0] == -1:
sequences = None
h5path = h5path.format(karabo_id, receiver_id)
h5path_idx = h5path_idx.format(karabo_id, receiver_id)
if karabo_da[0] == '-1':
if modules[0] == -1:
modules = list(range(16))
karabo_da = ["DSSC{:02d}".format(i) for i in modules]
else:
modules = [int(x[-2:]) for x in karabo_da]
print("Process modules: ",
', '.join([f"Q{x // 4 + 1}M{x % 4 + 1}" for x in modules]))
CHUNK_SIZE = 512
MAX_PAR = 32
if in_folder[-1] == "/":
in_folder = in_folder[:-1]
print(f"Outputting to {out_folder}")
if not os.path.exists(out_folder):
os.makedirs(out_folder)
elif not overwrite:
raise AttributeError("Output path exists! Exiting")
import warnings
warnings.filterwarnings('ignore')
print(f"Detector in use is {karabo_id}")
```
%% Cell type:code id: tags:
``` python
# set everything up filewise
mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
mapped_files, mod_ids, total_sequences, sequences_qm, file_size = mmf
MAX_PAR = min(MAX_PAR, total_sequences)
```
%% Cell type:markdown id: tags:
## Processed Files ##
%% Cell type:code id: tags:
``` python
import copy
import tabulate
from IPython.display import HTML, Latex, Markdown, display
print(f"Processing a total of {total_sequences} sequence files in chunks of {MAX_PAR}")
table = []
mfc = copy.copy(mapped_files)
ti = 0
for k, files in mfc.items():
i = 0
while not files.empty():
f = files.get()
if i == 0:
table.append((ti, k, i, f))
else:
table.append((ti, "", i, f))
i += 1
ti += 1
if len(table):
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["#", "module", "# module", "file"])))
# restore the queue
mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)
mapped_files, mod_ids, total_sequences, sequences_qm, file_size = mmf
```
%% Cell type:code id: tags:
``` python
import copy
from functools import partial
def correct_module(total_sequences, sequences_qm, karabo_id, dinstance, mask_noisy_asic,
mask_cold_asic, noisy_pix_threshold, chunksize, mem_cells, bias_voltage,
cal_db_timeout, creation_time, cal_db_interface, h5path, h5path_idx, inp):
import binascii
import copy
import struct
from hashlib import blake2b
import h5py
import numpy as np
from cal_tools.dssclib import get_dssc_ctrl_data, get_pulseid_checksum
from cal_tools.dssclib import (
get_dssc_ctrl_data,
get_num_cells,
get_pulseid_checksum,
)
from cal_tools.enums import BadPixels
from cal_tools.tools import get_constant_from_db_and_time
from iCalibrationDB import (
Conditions,
ConstantMetaData,
Constants,
Detectors,
Versions,
)
filename, filename_out, channel, karabo_da, qm, conditions = inp
# DSSC correction requires path without the leading "/"
if h5path[0] == '/':
h5path = h5path[1:]
if h5path_idx[0] == '/':
h5path_idx = h5path_idx[1:]
h5path = h5path.format(channel)
h5path_idx = h5path_idx.format(channel)
low_edges = None
hists_signal_low = None
high_edges = None
hists_signal_high = None
pulse_edges = None
err = None
offset_not_found = False
def get_num_cells(fname, h5path):
with h5py.File(fname, "r") as f:
cells = f[f"{h5path}/cellId"][()]
maxcell = np.max(cells)
options = [100, 200, 400, 500, 600, 700, 800]
dists = np.array([(o-maxcell) for o in options])
dists[dists<0] = 10000 # assure to always go higher
return options[np.argmin(dists)]
if mem_cells == 0:
mem_cells = get_num_cells(filename, h5path)
pulseid_checksum = get_pulseid_checksum(filename, h5path, h5path_idx)
print(f"Memcells: {mem_cells}")
condition = Conditions.Dark.DSSC(bias_voltage=bias_voltage, memory_cells=mem_cells,\
pulseid_checksum=pulseid_checksum,\
acquisition_rate=conditions['acquisition_rate'],\
target_gain=conditions['target_gain'],\
encoded_gain=conditions['encoded_gain'])
detinst = getattr(Detectors, dinstance)
device = getattr(detinst, qm)
with h5py.File(filename, "r") as infile:
y = infile[f"{h5path}/data"].shape[2]
x = infile[f"{h5path}/data"].shape[3]
offset, when = get_constant_from_db_and_time(karabo_id, karabo_da,
Constants.DSSC.Offset(),
condition,
None,
cal_db_interface,
creation_time=creation_time,
timeout=cal_db_timeout)
if offset is not None:
offset = np.moveaxis(np.moveaxis(offset[...], 2, 0), 2, 1)
else:
offset_not_found = True
print("No offset found in the database")
def copy_and_sanitize_non_cal_data(infile, outfile):
# these are touched in the correct function, do not copy them here
dont_copy = ["data"]
dont_copy = [h5path + "/{}".format(do)
for do in dont_copy]
# a visitor to copy everything else
def visitor(k, item):
if k not in dont_copy:
if isinstance(item, h5py.Group):
outfile.create_group(k)
elif isinstance(item, h5py.Dataset):
group = str(k).split("/")
group = "/".join(group[:-1])
infile.copy(k, outfile[group])
infile.visititems(visitor)
try:
with h5py.File(filename, "r") as infile:
with h5py.File(filename_out, "w") as outfile:
copy_and_sanitize_non_cal_data(infile, outfile)
# get indices of last images in each train
first_arr = np.squeeze(infile[f"{h5path_idx}/first"]).astype(np.int)
last_arr = np.concatenate((first_arr[1:], np.array([-1,]))).astype(np.int)
assert first_arr.size == last_arr.size
oshape = list(infile[f"{h5path}/data"].shape)
if len(oshape) == 4:
oshape = [oshape[0],]+oshape[2:]
chunks = (chunksize, oshape[1], oshape[2])
ddset = outfile.create_dataset(f"{h5path}/data",
oshape, chunks=chunks,
dtype=np.float32,
fletcher32=True)
mdset = outfile.create_dataset(f"{h5path}/mask",
oshape, chunks=chunks,
dtype=np.uint32,
compression="gzip",
compression_opts=1,
shuffle=True,
fletcher32=True)
for train in range(first_arr.size):
first = first_arr[train]
last = last_arr[train]
if first == last:
continue
data = np.squeeze(infile[f"{h5path}/data"][first:last, ...].astype(np.float32))
cellId = np.squeeze(infile[f"{h5path}/cellId"][first:last, ...])
pulseId = np.squeeze(infile[f"{h5path}/pulseId"][first:last, ...])
if not offset_not_found:
data[...] -= offset[cellId,...]
if hists_signal_low is None:
pulseId = np.repeat(pulseId[:, None], data.shape[1], axis=1)
pulseId = np.repeat(pulseId[:,:,None], data.shape[2], axis=2)
bins = (55, int(pulseId.max()))
rnge = [[-5, 50], [0, int(pulseId.max())]]
hists_signal_low, low_edges, pulse_edges = np.histogram2d(data.flatten(),
pulseId.flatten(),
bins=bins,
range=rnge)
rnge = [[-5, 300], [0, pulseId.max()]]
hists_signal_high, high_edges, _ = np.histogram2d(data.flatten(),
pulseId.flatten(),
bins=bins,
range=rnge)
ddset[first:last, ...] = data
# find static and noisy values in dark images
data = infile[f"{h5path}/data"][last, ...].astype(np.float32)
bpix = np.zeros(oshape[1:], np.uint32)
dark_std = np.std(data, axis=0)
bpix[dark_std > noisy_pix_threshold] = BadPixels.NOISE_OUT_OF_THRESHOLD.value
for i in range(8):
for j in range(2):
count_noise = np.count_nonzero(bpix[i*64:(i+1)*64, j*64:(j+1)*64])
asic_std = np.std(data[:, i*64:(i+1)*64, j*64:(j+1)*64])
if mask_noisy_asic:
if count_noise/(64*64) > mask_noisy_asic:
bpix[i*64:(i+1)*64, j*64:(j+1)*64] = BadPixels.NOISY_ADC.value
if mask_cold_asic:
count_cold = np.count_nonzero(asic_std < 0.5)
if count_cold/(64*64) > mask_cold_asic:
bpix[i*64:(i+1)*64, j*64:(j+1)*64] = BadPixels.ASIC_STD_BELOW_NOISE.value
except Exception as e:
print(e)
success = False
reason = "Error"
err = e
if err is None and offset_not_found:
err = "Offset not found in database!. No offset correction applied."
return (hists_signal_low, hists_signal_high, low_edges, high_edges, pulse_edges, when, qm, err)
done = False
first_files = {}
inp = []
left = total_sequences
hists_signal_low = 0
hists_signal_high = 0
low_edges, high_edges, pulse_edges = None, None, None
tGain, encodedGain, operatingFreq = get_dssc_ctrl_data(in_folder\
+ "/r{:04d}/".format(run),\
slow_data_pattern,slow_data_aggregators, run, slow_data_path)
whens = []
qms = []
Errors = []
while not done:
dones = []
for i, k_da in zip(modules, karabo_da):
qm = "Q{}M{}".format(i//4 +1, i % 4 + 1)
if qm in mapped_files:
if not mapped_files[qm].empty():
fname_in = str(mapped_files[qm].get())
dones.append(mapped_files[qm].empty())
else:
print(f"{qm} file is missing")
continue
else:
print(f"Skipping {qm}")
continue
fout = os.path.abspath("{}/{}".format(out_folder, (os.path.split(fname_in)[-1]).replace("RAW", "CORR")))
first_files[i] = (fname_in, fout)
conditions = {}
conditions['acquisition_rate'] = operatingFreq[qm]
conditions['target_gain'] = tGain[qm]
conditions['encoded_gain'] = encodedGain[qm]
inp.append((fname_in, fout, i, k_da, qm, conditions))
if len(inp) >= min(MAX_PAR, left):
print(f"Running {len(inp)} tasks parallel")
p = partial(correct_module, total_sequences, sequences_qm,
karabo_id, dinstance, mask_noisy_asic, mask_cold_asic,
noisy_pix_threshold, chunk_size_idim, mem_cells,
bias_voltage, cal_db_timeout, creation_time, cal_db_interface,
h5path, h5path_idx)
r = view.map_sync(p, inp)
#r = list(map(p, inp))
inp = []
left -= MAX_PAR
for rr in r:
if rr is not None:
hl, hh, low_edges, high_edges, pulse_edges, when, qm, err = rr
whens.append(when)
qms.append(qm)
Errors.append(err)
if hl is not None: # any one being None will also make the others None
hists_signal_low += hl.astype(np.float64)
hists_signal_high += hh.astype(np.float64)
done = all(dones)
whens = [x for _,x in sorted(zip(qms,whens))]
qms = sorted(qms)
for i, qm in enumerate(qms):
try:
when = whens[i].isoformat()
except:
when = whens[i]
if Errors[i] is not None:
# Avoid writing wrong injection date if cons. not found.
if "not found" in str(Errors[i]):
print(f"ERROR! {qm}: {Errors[i]}")
else:
print(f"Offset for {qm} was injected on {when}, ERROR!: {Errors[i]}")
else:
print(f"Offset for {qm} was injected on {when}")
```
%% Cell type:code id: tags:
``` python
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from matplotlib.ticker import FormatStrFormatter, LinearLocator
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
def do_3d_plot(data, edges, x_axis, y_axis):
fig = plt.figure(figsize=(10,10))
ax = fig.gca(projection='3d')
# Make data.
X = edges[0][:-1]
Y = edges[1][:-1]
X, Y = np.meshgrid(X, Y)
Z = data.T
# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
linewidth=0, antialiased=False)
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
ax.set_zlabel("Counts")
```
%% Cell type:code id: tags:
``` python
def do_2d_plot(data, edges, y_axis, x_axis):
from matplotlib.colors import LogNorm
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)
extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]
im = ax.imshow(data[::-1,:], extent=extent, aspect="auto", norm=LogNorm(vmin=1, vmax=np.max(data)))
ax.set_xlabel(x_axis)
ax.set_ylabel(y_axis)
cb = fig.colorbar(im)
cb.set_label("Counts")
```
%% Cell type:markdown id: tags:
## Mean Intensity per Pulse ##
The following plots show the mean signal for each pulse in a detailed and expanded intensity region.
%% Cell type:code id: tags:
``` python
do_3d_plot(hists_signal_low, [low_edges, pulse_edges], "Signal (ADU)", "Pulse id")
do_2d_plot(hists_signal_low, [low_edges, pulse_edges], "Signal (ADU)", "Pulse id")
do_3d_plot(hists_signal_high, [high_edges, pulse_edges], "Signal (ADU)", "Pulse id")
do_2d_plot(hists_signal_high, [high_edges, pulse_edges], "Signal (ADU)", "Pulse id")
```
%% Cell type:code id: tags:
``` python
corrected = []
raw = []
mask = []
pulse_ids = []
train_ids = []
for channel, ff in first_files.items():
try:
raw_file, corr_file = ff
data_path = h5path.format(channel)
index_path = h5path_idx.format(channel)
try:
infile = h5py.File(raw_file, "r")
first_idx = int(np.array(infile[f"{index_path}/first"])[0])
raw_d = np.array(infile[f"{data_path}/data"])
# Use first 128 images for plotting
if raw_d.shape[0] >= 128:
# random number for plotting
plt_im = 128
else:
plt_im = d.shape[0]
last_idx = first_idx + plt_im
raw.append((channel,raw_d[first_idx:last_idx,0,...]))
finally:
infile.close()
infile = h5py.File(corr_file, "r")
try:
corrected.append((channel, np.array(infile[f"{data_path}/data"][first_idx:last_idx,...])))
mask.append((channel, np.array(infile[f"{data_path}/mask"][first_idx:last_idx,...])))
pulse_ids.append((channel, np.squeeze(infile[f"{data_path}/pulseId"][first_idx:last_idx,...])))
train_ids.append((channel, np.squeeze(infile[f"{data_path}/trainId"][first_idx:last_idx,...])))
finally:
infile.close()
except Exception as e:
print(e)
```
%% Cell type:code id: tags:
``` python
def combine_stack(d, sdim):
combined = np.zeros((sdim, 1300,1300), np.float32)
combined[...] = 0
dy = 0
quad_pos = [
(0, 145),
(130, 140),
(125, 15),
(0, 15),
]
px = 0.236
py = 0.204
with h5py.File(geo_file, "r") as gf:
# TODO: refactor to -> for ch, f in d:
for i in range(len(d)):
ch = d[i][0]
mi = 3-(ch%4)
mp = gf["Q{}/M{}/Position".format(ch//4+1, mi%4+1)][()]
t1 = gf["Q{}/M{}/T01/Position".format(ch//4+1, ch%4+1)][()]
t2 = gf["Q{}/M{}/T02/Position".format(ch//4+1, ch%4+1)][()]
if ch//4 < 2:
t1, t2 = t2, t1
if ch // 4 == 0 or ch // 4 == 1:
td = d[i][1][:,::-1,:]
else:
td = d[i][1][:,:,::-1]
t1d = td[:,:,:256]
t2d = td[:,:,256:]
x0t1 = int((t1[0]+mp[0])/px)
y0t1 = int((t1[1]+mp[1])/py)
x0t2 = int((t2[0]+mp[0])/px)
y0t2 = int((t2[1]+mp[1])/py)
x0t1 += int(quad_pos[i//4][1]/px)
x0t2 += int(quad_pos[i//4][1]/px)
y0t1 += int(quad_pos[i//4][0]/py)+combined.shape[1]//16
y0t2 += int(quad_pos[i//4][0]/py)+combined.shape[1]//16
combined[:,y0t1:y0t1+128,x0t1:x0t1+256] = t1d
combined[:,y0t2:y0t2+128,x0t2:x0t2+256] = t2d
return combined
```
%% Cell type:code id: tags:
``` python
combined = combine_stack(corrected, last_idx-first_idx)
combined_raw = combine_stack(raw, last_idx-first_idx)
combined_mask = combine_stack(mask, last_idx-first_idx)
```
%% Cell type:markdown id: tags:
### Mean RAW Preview ###
%% Cell type:code id: tags:
``` python
display(Markdown("The per pixel mean of the first {} images of the RAW data".format(plt_im)))
```
%% Cell type:code id: tags:
``` python
%matplotlib inline
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.mean(combined_raw[:,...],axis=0),
vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), -5),
vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 50), cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Single Shot Preview ###
A single shot image from cell 2 of the first train
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
dim = combined[2,...]
im = ax.imshow(dim, vmin=-0, vmax=max(1.5*np.median(dim[dim > 0]), 50), cmap="jet", interpolation="nearest")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
h = ax.hist(dim.flatten(), bins=100, range=(0, 100))
```
%% Cell type:markdown id: tags:
### Mean CORRECTED Preview ###
%% Cell type:code id: tags:
``` python
display(Markdown("The per pixel mean of the first {} images of the CORRECTED data".format(plt_im)))
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.mean(combined[:,...], axis=0), vmin=0,
vmax=max(1.5*np.median(combined[combined > 0]), 10), cmap="jet", interpolation="nearest")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Max CORRECTED Preview ###
The per pixel maximum of the first 128 images of the CORRECTED data
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.max(combined[:,...], axis=0), vmin=0,
vmax=max(100*np.median(combined[combined > 0]), 20), cmap="jet", interpolation="nearest")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
combined[combined <= 0] = 0
h = ax.hist(combined.flatten(), bins=100, range=(-5, 100), log=True)
```
%% Cell type:markdown id: tags:
## Bad Pixels ##
The mask contains dedicated entries for all pixels and memory cells as well as all three gains stages. Each mask entry is encoded in 32 bits as:
%% Cell type:code id: tags:
``` python
import tabulate
from cal_tools.enums import BadPixels
from IPython.display import HTML, Latex, Markdown, display
table = []
for item in BadPixels:
table.append((item.name, "{:016b}".format(item.value)))
md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=["Bad pixel type", "Bit mask"])))
```
%% Cell type:markdown id: tags:
### Full Train Bad Pixels ###
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.log2(np.max(combined_mask[:,...], axis=0)), vmin=0,
vmax=32, cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
%% Cell type:markdown id: tags:
### Full Train Bad Pixels - Only Dark Char. Related ###
%% Cell type:code id: tags:
``` python
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
im = ax.imshow(np.max((combined_mask.astype(np.uint32)[:,...] & BadPixels.NOISY_ADC.value) != 0, axis=0), vmin=0,
vmax=1, cmap="jet")
cb = fig.colorbar(im, ax=ax)
```
......
......@@ -9,6 +9,16 @@ import h5py
import numpy as np
def get_num_cells(fname, h5path):
with h5py.File(fname, "r") as f:
cells = f[f"{h5path}/cellId"][()]
if cells == []:
return
maxcell = np.max(cells)
return maxcell+1
def get_pulseid_checksum(fname, h5path, h5path_idx):
"""generates hash value from pulse pattern (veto defined)."""
with h5py.File(fname, "r") as infile:
......
......@@ -584,11 +584,7 @@ automated_test_config = {
"run": "9028", # Original run: "1723",
"karabo-id": "SCS_DET_DSSC1M-1",
"slow-data-path": "SCS_CDIDET_DSSC/FPGA/PPT_Q",
"slow-data-aggregators":
- 1
- 2
- 3
- 4
"slow-data-aggregators": [1, 2, 3, 4]
},
"reference-folder": "{}/{}/{}",
},
......@@ -602,6 +598,7 @@ automated_test_config = {
"run": "9028", # Original run: "1723",
"karabo-id": "SCS_DET_DSSC1M-1",
"slow-data-path": "SCS_CDIDET_DSSC/FPGA/PPT_Q",
"slow-data-aggregators": [1, 2, 3, 4]
},
"reference-folder": "{}/{}/{}",
},
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment