Feat/lpd corr lib

See merge request detectors/pycalibration!18

Feat/lpd corr lib
See merge request detectors/pycalibration!18
4db2ca0a · Steffen Hauf · 0988897b · 71ed595b · 4db2ca0a · 4db2ca0a
Commit 4db2ca0a authored 6 years ago by Steffen Hauf
--- a/cal_tools/cal_tools/lpdlib.py
+++ b/cal_tools/cal_tools/lpdlib.py
+import numpy as np
+import copy
+import h5py
+from cal_tools.enums import BadPixels
+class LpdCorrections:
+    """
+    The LpdCorrections class perfroms LPD offline correction
+    The following example shows a typical use case::
+        infile = h5py.File(filename, "r", driver="core")
+        outfile = h5py.File(filename_out, "w")
+        lpd_corr = LpdCorrections(infile, outfile, max_cells, channel, max_pulses,
+                                  bins_gain_vs_signal, bins_signal_low_range,
+                                  bins_signal_high_range)
+        try:
+            lpd_corr.initialize(offset, rel_gain, rel_gain_offset, mask, noise, flatfield)
+        except IOError:
+            return
+        for irange in lpd_corr.get_iteration_range():
+            lpd_corr.correct_lpd(irange)
+        hists, edges = lpd_corr.get_histograms()
+    """  
+    def __init__(self, infile, outfile, max_cells, channel, max_pulses,
+                 bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,
+                 raw_fmt_version = 2, chunk_size = 512,
+                 h5_data_path = "INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/",
+                 h5_index_path = "INDEX/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/"):
+        """
+        Initialize an LpdCorrections Class
+        :param infile: to be corrected h5py input file
+        :param outfile: writeable h5py output file
+        :param max_cell: maximum number of memory cells to handle, e.g. if
+                         calibration constants only exist for a subset of cells
+        :param channel: module/channel to correct
+        :param max_pulses: maximum pulse id to consider for preview histograms
+        :param bins_gain_vs_signal: number of bins for gain vs signal histogram
+        :param bins_signal_low_range: number of bins for the low signal range histogram
+        :param bins_signal_high_range: number of bins for the high signal range histogram
+        :param raw_fmt_version: raw file format version to use
+        :param chunk_size: images per chunk to compute upon
+        :param h5_data_path: path in HDF5 file which is prefixed to the image/data section
+        :param h5_index_path: path in HDF5 file which is prefixed to the index section
+        """
+        self.lpd_base = h5_data_path.format(channel)
+        self.idx_base = h5_index_path.format(channel)
+        self.infile = infile
+        self.outfile = outfile
+        self.channel = channel
+        self.index_v = raw_fmt_version
+        self.chunksize = chunk_size
+        self.initialized = False
+        self.max_pulses = max_pulses
+        self.max_cells = max_cells
+        self.hists_signal_low = 0
+        self.hists_signal_high = 0
+        self.hists_gain_vs_signal = 0
+        self.bins_gain_vs_signal = bins_gain_vs_signal
+        self.bins_signal_low_range = bins_signal_low_range
+        self.bins_signal_high_range = bins_signal_high_range
+        self.cidx = 0                
+    def get_iteration_range(self):
+        """Returns a range expression over which to iterate in chunks       
+        """
+        return np.array_split(self.firange, self.firange.size//self.chunksize)
+    def initialize(self,  offset, rel_gain, rel_gain_offset, mask, noise, flatfield, swap_axis=True):
+        """ Initialize the calibration constants and the output data file.
+        Any data that is not touched by the corrections is copied into the output
+        file at this point. Also data validity tests are performed. This functions
+        must be called before `correct_lpd` is executed.
+        :param offset: offset map to use for corrections
+        :param rel_gain: relative gain map to use for corrections
+        :param rel_gain_offset: relative gain offset to use for corrections
+        :param mask: bad pixel mask to use for corrections
+        :param noise: noise map to use for corrections
+        :param flatfield: flatfield map to use for corrections
+        :param swap_axis: set to True if using data from the calibration database.        
+        """
+        if swap_axis:
+            offset = np.ascontiguousarray(np.moveaxis(np.moveaxis(offset, 2, 0), 2, 1))
+            rel_gain = np.ascontiguousarray(np.moveaxis(np.moveaxis(rel_gain, 2, 0), 2, 1))
+            mask = np.ascontiguousarray(np.moveaxis(np.moveaxis(mask, 2, 0), 2, 1))
+            rel_gain_offset = np.ascontiguousarray(np.moveaxis(np.moveaxis(rel_gain_offset, 2, 0), 2, 1))
+            noise = np.ascontiguousarray(np.moveaxis(np.moveaxis(noise, 2, 0), 2, 1))
+            flatfield  = np.ascontiguousarray(np.moveaxis(flatfield, 1, 0))
+        self.offset = offset
+        self.rel_gain = rel_gain
+        self.rel_gain_b = rel_gain_offset
+        self.mask = mask
+        self.noise = noise
+        self.flatfield = flatfield
+        self.median_noise = np.nanmedian(noise[0,...])        
+        self.max_cells = np.min([offset.shape[0], rel_gain.shape[0], mask.shape[0],
+                                 rel_gain_offset.shape[0], self.max_cells])
+        self.get_valid_image_idx()
+        self.gen_valid_range()        
+        self.copy_and_sanitize_non_cal_data()
+        self.create_output_datasets()
+        self.initialized = True
+    def split_gain(self, d):
+        """ Split gain information off 16-bit LPD data
+        Gain information can be found in bits 12 and 13 (0-based)
+        """
+        msk = np.zeros(d.shape, np.uint16)
+        msk[...] = 0b0000111111111111
+        data = np.bitwise_and(d, msk)
+        gain = np.right_shift(d, 12)
+        msk[...] = 0b0000000000000011
+        gain = np.bitwise_and(gain, msk)
+        return data, gain
+    def correct_lpd(self, irange):
+        """ Correct Raw LPD data for offset and relative gain effects
+        :param irange: list of image indices to work on, should be contiguous
+        Will raise an RuntimeError if `initialize()` has not been called first.
+        """
+        if not self.initialized:
+            raise RuntimeError("Must call initialize() first!")
+        lpd_base = self.lpd_base
+        cidx = self.cidx
+        im =  np.array(self.infile[lpd_base+"image/data"][irange,...])
+        trainId = np.squeeze(self.infile[lpd_base+"/image/trainId"][irange, ...])
+        pulseId = np.squeeze(self.infile[lpd_base+"image/pulseId"][irange, ...])
+        status = np.squeeze(self.infile[lpd_base+"/image/status"][irange, ...])
+        cells = np.squeeze(np.array(self.infile[lpd_base+"/image/cellId"][irange, ...]))
+        length = np.squeeze(np.array(self.infile[lpd_base+"/image/length"][irange, ...]))
+        im, gain = self.split_gain(im[:,0,...])
+        im = im.astype(np.float32)
+        im[gain > 2] = np.nan
+        if cidx == 0:
+            H, xe, ye =  np.histogram2d(im.flatten(), gain.flatten(),
+                                        bins=self.bins_gain_vs_signal,
+                                        range=[[0, 4096], [0, 4]])
+            self.hists_gain_vs_signal += H
+            self.signal_edges = (xe, ye)
+        gain[gain > 2] = 0
+        om = self.offset[cells,...]
+        rc = self.rel_gain[cells,...]
+        rbc = self.rel_gain_b[cells,...]
+        og = np.choose(gain, (om[...,0], om[...,1], om[...,2]))
+        rg = np.choose(gain, (rc[...,0], rc[...,1], rc[...,2]))        
+        rgb = np.choose(gain, (rbc[...,0], rbc[...,1], rbc[...,2]))
+        mskg = self.mask[cells,...]
+        msk = np.choose(gain, (mskg[...,0], mskg[...,1], mskg[...,2]))
+        im -= og
+        # hacky way of smoothening transition region between med and low
+        cfac = 0.314*np.exp(-im*0.001)  
+        im = (im-rgb)/rg
+        im /= self.flatfield[None,:,:]
+        # hacky way of smoothening transition region between med and low
+        im[gain == 2] -= im[gain == 2]*cfac[gain == 2] 
+        nidx = int(cidx+irange.size)
+        bidx = ~np.isfinite(im)
+        im[bidx] = 0
+        msk[bidx] = BadPixels.VALUE_IS_NAN.value
+        bidx = (im < -1e7) | (im > 1e7)
+        im[bidx] = 0
+        msk[bidx] = BadPixels.VALUE_OUT_OF_RANGE.value
+        if cidx == 0:
+            copim = copy.copy(im)
+            copim[copim < self.median_noise] = np.nan
+            H, xe, ye = np.histogram2d(np.nanmean(copim, axis=(1,2)), pulseId,
+                                       bins=(self.bins_signal_low_range, self.max_pulses),
+                                       range=[[-50, 1000],[0, self.max_pulses+1]])
+            self.hists_signal_low += H
+            self.low_edges = (xe, ye)
+            H, xe, ye = np.histogram2d(np.nanmean(copim, axis=(1,2)), pulseId,
+                                       bins=(self.bins_signal_high_range, self.max_pulses),
+                                       range=[[0, 200000],[0, self.max_pulses+1]])
+            self.hists_signal_high += H
+            self.high_edges = (xe, ye)
+        self.ddset[cidx:nidx,...] = im
+        self.gdset[cidx:nidx,...] = gain
+        self.mdset[cidx:nidx,...] = msk
+        self.outfile[lpd_base+"image/cellId"][cidx:nidx] = cells
+        self.outfile[lpd_base+"image/trainId"][cidx:nidx] = trainId
+        self.outfile[lpd_base+"image/pulseId"][cidx:nidx] = pulseId
+        self.outfile[lpd_base+"image/status"][cidx:nidx] = status
+        self.outfile[lpd_base+"image/length"][cidx:nidx] = length
+        self.cidx = nidx        
+    def get_valid_image_idx(self):
+        """ Return the indices of valid data
+        """
+        lpd_base = self.idx_base
+        if self.index_v == 2:
+            count = np.squeeze(self.infile[lpd_base+"image/count"])
+            first = np.squeeze(self.infile[lpd_base+"image/first"])
+            if np.count_nonzero(count != 0) == 0:
+                raise IOError("File has no valid counts")
+            valid = count != 0
+            idxtrains = np.squeeze(self.infile["/INDEX/trainId"])
+            medianTrain = np.nanmedian(idxtrains)
+            valid &= (idxtrains > medianTrain - 1e4) & (idxtrains < medianTrain + 1e4)
+            last_index = int(first[valid][-1]+count[valid][-1])
+            first_index = int(first[valid][0])
+        elif self.index_v == 1:
+            status = np.squeeze(self.infile[lpd_base+"image/status"])
+            if np.count_nonzero(status != 0) == 0:
+                raise IOError("File {} has no valid counts".format(infile))
+            last = np.squeeze(self.infile[lpd_base+"image/last"])
+            valid = status != 0
+            last_index = int(last[status != 0][-1])
+            first_index = int(last[status != 0][0])
+        else:
+            raise AttributeError("Not a known raw format version: {}".format(self.index_v))
+        self.valid = valid
+        self.first_index = first_index
+        self.last_index = last_index
+        self.idxtrains = idxtrains
+    def gen_valid_range(self):
+        """ Generate an index range to pass to `correctLPD`.
+        """
+        first_index = self.first_index
+        last_index = self.last_index
+        max_cells = self.max_cells
+        lpd_base = self.lpd_base
+        allcells = np.squeeze(np.array(self.infile[lpd_base+"image/cellId"][first_index:last_index, ...]))
+        single_image = np.array(np.array(self.infile[lpd_base+"image/data"][first_index, ...]))
+        can_calibrate = allcells < max_cells
+        if np.count_nonzero(can_calibrate) == 0:
+            return
+        allcells = allcells[can_calibrate]
+        firange = np.arange(first_index, last_index)
+        firange = firange[can_calibrate]
+        self.oshape = (firange.size, single_image.shape[1], single_image.shape[2])        
+        self.firange = firange
+        self.single_image = single_image
+    def copy_and_sanitize_non_cal_data(self):
+        """ Copy and sanitize data in `infile` that is not touched by `correctLPD`
+        """
+        lpd_base = self.lpd_base
+        idx_base = self.idx_base
+        first_index = self.first_index
+        last_index = self.last_index
+        valid = self.valid
+        idxtrains = self.idxtrains
+        firange = self.firange
+        alltrains = np.squeeze(self.infile[lpd_base+"image/trainId"][first_index:last_index,...])        
+        dont_copy = ["data", "cellId", "trainId", "pulseId", "status", "length"]
+        dont_copy = [lpd_base+"image/{}".format(do)
+                    for do in dont_copy]
+        dont_copy += [idx_base+"{}/first".format(do)
+                     for do in ["detector", "header", "image", "trailer"]]
+        dont_copy += [idx_base+"{}/count".format(do)
+                     for do in ["detector", "header", "image", "trailer"]]
+        def visitor(k, item):
+            if k not in dont_copy:
+                if isinstance(item, h5py.Group):
+                    self.outfile.create_group(k)
+                elif isinstance(item, h5py.Dataset):
+                    group = str(k).split("/")
+                    group = "/".join(group[:-1])
+                    self.infile.copy(k, self.outfile[group])
+        self.infile.visititems(visitor)
+        # sanitize indices
+        for do in ["detector", "header", "image", "trailer"]:
+            existing = np.squeeze(self.infile[idx_base+"{}/first".format(do)])
+            updated = existing-np.min(existing[valid])
+            self.outfile[idx_base+"{}/first".format(do)] = updated
+            existing = np.squeeze(self.infile[idx_base+"{}/count".format(do)])
+            new_counts, _ = np.histogram(alltrains[firange], bins=np.count_nonzero(valid)+1,
+                                        range=(np.min(idxtrains[valid]), np.max(idxtrains[valid]+1)))
+            updated = existing
+            updated[valid] = new_counts[:-1]
+            self.outfile[idx_base+"{}/count".format(do)] = updated
+    def create_output_datasets(self):
+        """ Initialize output data sets
+        """
+        lpdbase = self.lpd_base
+        chunksize = self.chunksize
+        firange = self.firange
+        oshape = self.oshape
+        self.ddset = self.outfile.create_dataset(lpdbase+"image/data",
+                                                 oshape, chunks=(chunksize, oshape[1], oshape[2]), dtype=np.float32)
+        self.gdset = self.outfile.create_dataset(lpdbase+"image/gain",
+                                                 oshape, chunks=(chunksize, oshape[1], oshape[2]), dtype=np.uint8)
+        self.mdset = self.outfile.create_dataset(lpdbase+"image/mask",
+                                                 oshape, chunks=(chunksize, oshape[1], oshape[2]), dtype=np.uint32)
+        self.outfile[lpdbase+"image/cellId"] = np.zeros(firange.size, np.uint16)
+        self.outfile[lpdbase+"image/trainId"] = np.zeros(firange.size, np.uint64)
+        self.outfile[lpdbase+"image/pulseId"] = np.zeros(firange.size, np.uint64)
+        self.outfile[lpdbase+"image/status"] = np.zeros(firange.size, np.uint16)        
+        self.outfile[lpdbase+"image/length"] = np.zeros(firange.size, np.uint32)
+        self.outfile.flush()
+    def get_histograms(self):
+        """ Return preview histograms computed from the first chunk
+        """
+        return ((self.hists_signal_low, self.hists_signal_high, self.hists_gain_vs_signal),
+                (self.low_edges, self.high_edges, self.signal_edges))
\ No newline at end of file
--- a/notebooks/LPD/LPD_Correct_and_Verify.ipynb
+++ b/notebooks/LPD/LPD_Correct_and_Verify.ipynb