Skip to content
Snippets Groups Projects
Commit 4db2ca0a authored by Steffen Hauf's avatar Steffen Hauf
Browse files

Feat/lpd corr lib

See merge request detectors/pycalibration!18
parents 0988897b 71ed595b
No related branches found
No related tags found
1 merge request!18Feat/lpd corr lib
import numpy as np
import copy
import h5py
from cal_tools.enums import BadPixels
class LpdCorrections:
"""
The LpdCorrections class perfroms LPD offline correction
The following example shows a typical use case::
infile = h5py.File(filename, "r", driver="core")
outfile = h5py.File(filename_out, "w")
lpd_corr = LpdCorrections(infile, outfile, max_cells, channel, max_pulses,
bins_gain_vs_signal, bins_signal_low_range,
bins_signal_high_range)
try:
lpd_corr.initialize(offset, rel_gain, rel_gain_offset, mask, noise, flatfield)
except IOError:
return
for irange in lpd_corr.get_iteration_range():
lpd_corr.correct_lpd(irange)
hists, edges = lpd_corr.get_histograms()
"""
def __init__(self, infile, outfile, max_cells, channel, max_pulses,
bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
raw_fmt_version = 2, chunk_size = 512,
h5_data_path = "INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/",
h5_index_path = "INDEX/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/"):
"""
Initialize an LpdCorrections Class
:param infile: to be corrected h5py input file
:param outfile: writeable h5py output file
:param max_cell: maximum number of memory cells to handle, e.g. if
calibration constants only exist for a subset of cells
:param channel: module/channel to correct
:param max_pulses: maximum pulse id to consider for preview histograms
:param bins_gain_vs_signal: number of bins for gain vs signal histogram
:param bins_signal_low_range: number of bins for the low signal range histogram
:param bins_signal_high_range: number of bins for the high signal range histogram
:param raw_fmt_version: raw file format version to use
:param chunk_size: images per chunk to compute upon
:param h5_data_path: path in HDF5 file which is prefixed to the image/data section
:param h5_index_path: path in HDF5 file which is prefixed to the index section
"""
self.lpd_base = h5_data_path.format(channel)
self.idx_base = h5_index_path.format(channel)
self.infile = infile
self.outfile = outfile
self.channel = channel
self.index_v = raw_fmt_version
self.chunksize = chunk_size
self.initialized = False
self.max_pulses = max_pulses
self.max_cells = max_cells
self.hists_signal_low = 0
self.hists_signal_high = 0
self.hists_gain_vs_signal = 0
self.bins_gain_vs_signal = bins_gain_vs_signal
self.bins_signal_low_range = bins_signal_low_range
self.bins_signal_high_range = bins_signal_high_range
self.cidx = 0
def get_iteration_range(self):
"""Returns a range expression over which to iterate in chunks
"""
return np.array_split(self.firange, self.firange.size//self.chunksize)
def initialize(self, offset, rel_gain, rel_gain_offset, mask, noise, flatfield, swap_axis=True):
""" Initialize the calibration constants and the output data file.
Any data that is not touched by the corrections is copied into the output
file at this point. Also data validity tests are performed. This functions
must be called before `correct_lpd` is executed.
:param offset: offset map to use for corrections
:param rel_gain: relative gain map to use for corrections
:param rel_gain_offset: relative gain offset to use for corrections
:param mask: bad pixel mask to use for corrections
:param noise: noise map to use for corrections
:param flatfield: flatfield map to use for corrections
:param swap_axis: set to True if using data from the calibration database.
"""
if swap_axis:
offset = np.ascontiguousarray(np.moveaxis(np.moveaxis(offset, 2, 0), 2, 1))
rel_gain = np.ascontiguousarray(np.moveaxis(np.moveaxis(rel_gain, 2, 0), 2, 1))
mask = np.ascontiguousarray(np.moveaxis(np.moveaxis(mask, 2, 0), 2, 1))
rel_gain_offset = np.ascontiguousarray(np.moveaxis(np.moveaxis(rel_gain_offset, 2, 0), 2, 1))
noise = np.ascontiguousarray(np.moveaxis(np.moveaxis(noise, 2, 0), 2, 1))
flatfield = np.ascontiguousarray(np.moveaxis(flatfield, 1, 0))
self.offset = offset
self.rel_gain = rel_gain
self.rel_gain_b = rel_gain_offset
self.mask = mask
self.noise = noise
self.flatfield = flatfield
self.median_noise = np.nanmedian(noise[0,...])
self.max_cells = np.min([offset.shape[0], rel_gain.shape[0], mask.shape[0],
rel_gain_offset.shape[0], self.max_cells])
self.get_valid_image_idx()
self.gen_valid_range()
self.copy_and_sanitize_non_cal_data()
self.create_output_datasets()
self.initialized = True
def split_gain(self, d):
""" Split gain information off 16-bit LPD data
Gain information can be found in bits 12 and 13 (0-based)
"""
msk = np.zeros(d.shape, np.uint16)
msk[...] = 0b0000111111111111
data = np.bitwise_and(d, msk)
gain = np.right_shift(d, 12)
msk[...] = 0b0000000000000011
gain = np.bitwise_and(gain, msk)
return data, gain
def correct_lpd(self, irange):
""" Correct Raw LPD data for offset and relative gain effects
:param irange: list of image indices to work on, should be contiguous
Will raise an RuntimeError if `initialize()` has not been called first.
"""
if not self.initialized:
raise RuntimeError("Must call initialize() first!")
lpd_base = self.lpd_base
cidx = self.cidx
im = np.array(self.infile[lpd_base+"image/data"][irange,...])
trainId = np.squeeze(self.infile[lpd_base+"/image/trainId"][irange, ...])
pulseId = np.squeeze(self.infile[lpd_base+"image/pulseId"][irange, ...])
status = np.squeeze(self.infile[lpd_base+"/image/status"][irange, ...])
cells = np.squeeze(np.array(self.infile[lpd_base+"/image/cellId"][irange, ...]))
length = np.squeeze(np.array(self.infile[lpd_base+"/image/length"][irange, ...]))
im, gain = self.split_gain(im[:,0,...])
im = im.astype(np.float32)
im[gain > 2] = np.nan
if cidx == 0:
H, xe, ye = np.histogram2d(im.flatten(), gain.flatten(),
bins=self.bins_gain_vs_signal,
range=[[0, 4096], [0, 4]])
self.hists_gain_vs_signal += H
self.signal_edges = (xe, ye)
gain[gain > 2] = 0
om = self.offset[cells,...]
rc = self.rel_gain[cells,...]
rbc = self.rel_gain_b[cells,...]
og = np.choose(gain, (om[...,0], om[...,1], om[...,2]))
rg = np.choose(gain, (rc[...,0], rc[...,1], rc[...,2]))
rgb = np.choose(gain, (rbc[...,0], rbc[...,1], rbc[...,2]))
mskg = self.mask[cells,...]
msk = np.choose(gain, (mskg[...,0], mskg[...,1], mskg[...,2]))
im -= og
# hacky way of smoothening transition region between med and low
cfac = 0.314*np.exp(-im*0.001)
im = (im-rgb)/rg
im /= self.flatfield[None,:,:]
# hacky way of smoothening transition region between med and low
im[gain == 2] -= im[gain == 2]*cfac[gain == 2]
nidx = int(cidx+irange.size)
bidx = ~np.isfinite(im)
im[bidx] = 0
msk[bidx] = BadPixels.VALUE_IS_NAN.value
bidx = (im < -1e7) | (im > 1e7)
im[bidx] = 0
msk[bidx] = BadPixels.VALUE_OUT_OF_RANGE.value
if cidx == 0:
copim = copy.copy(im)
copim[copim < self.median_noise] = np.nan
H, xe, ye = np.histogram2d(np.nanmean(copim, axis=(1,2)), pulseId,
bins=(self.bins_signal_low_range, self.max_pulses),
range=[[-50, 1000],[0, self.max_pulses+1]])
self.hists_signal_low += H
self.low_edges = (xe, ye)
H, xe, ye = np.histogram2d(np.nanmean(copim, axis=(1,2)), pulseId,
bins=(self.bins_signal_high_range, self.max_pulses),
range=[[0, 200000],[0, self.max_pulses+1]])
self.hists_signal_high += H
self.high_edges = (xe, ye)
self.ddset[cidx:nidx,...] = im
self.gdset[cidx:nidx,...] = gain
self.mdset[cidx:nidx,...] = msk
self.outfile[lpd_base+"image/cellId"][cidx:nidx] = cells
self.outfile[lpd_base+"image/trainId"][cidx:nidx] = trainId
self.outfile[lpd_base+"image/pulseId"][cidx:nidx] = pulseId
self.outfile[lpd_base+"image/status"][cidx:nidx] = status
self.outfile[lpd_base+"image/length"][cidx:nidx] = length
self.cidx = nidx
def get_valid_image_idx(self):
""" Return the indices of valid data
"""
lpd_base = self.idx_base
if self.index_v == 2:
count = np.squeeze(self.infile[lpd_base+"image/count"])
first = np.squeeze(self.infile[lpd_base+"image/first"])
if np.count_nonzero(count != 0) == 0:
raise IOError("File has no valid counts")
valid = count != 0
idxtrains = np.squeeze(self.infile["/INDEX/trainId"])
medianTrain = np.nanmedian(idxtrains)
valid &= (idxtrains > medianTrain - 1e4) & (idxtrains < medianTrain + 1e4)
last_index = int(first[valid][-1]+count[valid][-1])
first_index = int(first[valid][0])
elif self.index_v == 1:
status = np.squeeze(self.infile[lpd_base+"image/status"])
if np.count_nonzero(status != 0) == 0:
raise IOError("File {} has no valid counts".format(infile))
last = np.squeeze(self.infile[lpd_base+"image/last"])
valid = status != 0
last_index = int(last[status != 0][-1])
first_index = int(last[status != 0][0])
else:
raise AttributeError("Not a known raw format version: {}".format(self.index_v))
self.valid = valid
self.first_index = first_index
self.last_index = last_index
self.idxtrains = idxtrains
def gen_valid_range(self):
""" Generate an index range to pass to `correctLPD`.
"""
first_index = self.first_index
last_index = self.last_index
max_cells = self.max_cells
lpd_base = self.lpd_base
allcells = np.squeeze(np.array(self.infile[lpd_base+"image/cellId"][first_index:last_index, ...]))
single_image = np.array(np.array(self.infile[lpd_base+"image/data"][first_index, ...]))
can_calibrate = allcells < max_cells
if np.count_nonzero(can_calibrate) == 0:
return
allcells = allcells[can_calibrate]
firange = np.arange(first_index, last_index)
firange = firange[can_calibrate]
self.oshape = (firange.size, single_image.shape[1], single_image.shape[2])
self.firange = firange
self.single_image = single_image
def copy_and_sanitize_non_cal_data(self):
""" Copy and sanitize data in `infile` that is not touched by `correctLPD`
"""
lpd_base = self.lpd_base
idx_base = self.idx_base
first_index = self.first_index
last_index = self.last_index
valid = self.valid
idxtrains = self.idxtrains
firange = self.firange
alltrains = np.squeeze(self.infile[lpd_base+"image/trainId"][first_index:last_index,...])
dont_copy = ["data", "cellId", "trainId", "pulseId", "status", "length"]
dont_copy = [lpd_base+"image/{}".format(do)
for do in dont_copy]
dont_copy += [idx_base+"{}/first".format(do)
for do in ["detector", "header", "image", "trailer"]]
dont_copy += [idx_base+"{}/count".format(do)
for do in ["detector", "header", "image", "trailer"]]
def visitor(k, item):
if k not in dont_copy:
if isinstance(item, h5py.Group):
self.outfile.create_group(k)
elif isinstance(item, h5py.Dataset):
group = str(k).split("/")
group = "/".join(group[:-1])
self.infile.copy(k, self.outfile[group])
self.infile.visititems(visitor)
# sanitize indices
for do in ["detector", "header", "image", "trailer"]:
existing = np.squeeze(self.infile[idx_base+"{}/first".format(do)])
updated = existing-np.min(existing[valid])
self.outfile[idx_base+"{}/first".format(do)] = updated
existing = np.squeeze(self.infile[idx_base+"{}/count".format(do)])
new_counts, _ = np.histogram(alltrains[firange], bins=np.count_nonzero(valid)+1,
range=(np.min(idxtrains[valid]), np.max(idxtrains[valid]+1)))
updated = existing
updated[valid] = new_counts[:-1]
self.outfile[idx_base+"{}/count".format(do)] = updated
def create_output_datasets(self):
""" Initialize output data sets
"""
lpdbase = self.lpd_base
chunksize = self.chunksize
firange = self.firange
oshape = self.oshape
self.ddset = self.outfile.create_dataset(lpdbase+"image/data",
oshape, chunks=(chunksize, oshape[1], oshape[2]), dtype=np.float32)
self.gdset = self.outfile.create_dataset(lpdbase+"image/gain",
oshape, chunks=(chunksize, oshape[1], oshape[2]), dtype=np.uint8)
self.mdset = self.outfile.create_dataset(lpdbase+"image/mask",
oshape, chunks=(chunksize, oshape[1], oshape[2]), dtype=np.uint32)
self.outfile[lpdbase+"image/cellId"] = np.zeros(firange.size, np.uint16)
self.outfile[lpdbase+"image/trainId"] = np.zeros(firange.size, np.uint64)
self.outfile[lpdbase+"image/pulseId"] = np.zeros(firange.size, np.uint64)
self.outfile[lpdbase+"image/status"] = np.zeros(firange.size, np.uint16)
self.outfile[lpdbase+"image/length"] = np.zeros(firange.size, np.uint32)
self.outfile.flush()
def get_histograms(self):
""" Return preview histograms computed from the first chunk
"""
return ((self.hists_signal_low, self.hists_signal_high, self.hists_gain_vs_signal),
(self.low_edges, self.high_edges, self.signal_edges))
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment