diff --git a/cal_tools/cal_tools/agipdlib.py b/cal_tools/cal_tools/agipdlib.py index 24dc320f743eb589059b1c825bb2ab1e4651f4e8..7d44cba5b5a966f078fcb1651e0ff6ad403ca457 100644 --- a/cal_tools/cal_tools/agipdlib.py +++ b/cal_tools/cal_tools/agipdlib.py @@ -194,944 +194,6 @@ class AgipdCorrections: Constants not to be set by the initial call should be set to `None`. """ - # swap the axes such that memory cell dimension is first - the usual - # way of operation, and the one expected by the rest of this class. - if swap_axis: - if offset is not None: - mvd = np.moveaxis(np.moveaxis(offset, 2, 0), 2, 1) - offset = np.ascontiguousarray(mvd) - if rel_gain is not None: - mvd = np.moveaxis(np.moveaxis(rel_gain, 2, 0), 2, 1) - rel_gain = np.ascontiguousarray(mvd) - if mask is not None: - mvd = np.moveaxis(np.moveaxis(mask, 2, 0), 2, 1) - mask = np.ascontiguousarray(mvd) - if noise is not None: - mvd = np.moveaxis(np.moveaxis(noise, 2, 0), 2, 1) - noise = np.ascontiguousarray(mvd) - if base_offset is not None: - fallback_bo = np.nanmedian(np.array( - [b for b in base_offset[:-1] if - isinstance(b, np.ndarray)]), axis=0) - for i in range(len(base_offset)): - if base_offset[i] is not None: - if not isinstance(base_offset[i], np.ndarray): - base_offset[i] = fallback_bo - mvd = np.moveaxis(base_offset[i], 2, 0) - mvd = np.moveaxis(mvd, 2, 1) - base_offset[i] = np.ascontiguousarray(mvd) - if thresholds is not None: - mvd = np.moveaxis(np.moveaxis(thresholds, 2, 0), 2, 1) - thresholds = np.ascontiguousarray(mvd) - - if base_offset is not None: - self.base_offset = base_offset - if offset is not None: - self.offset = offset - - if rel_gain is not None: - self.rel_gain = 1. / rel_gain - - # if a mask already exists we OR the mask to maintain what we - # already have - if mask is not None: - if not hasattr(self, "mask"): - self.mask = mask - else: - self.mask |= mask[:self.mask.shape[0], ...] - - if noise is not None: - self.noise = noise - - if thresholds is not None: - self.thresholds = thresholds - - if self.base_offset is not None and self.offset is not None: - - # calculate a delta offset from base offsets and normal offsets - dohigh = np.zeros(self.offset.shape[:-1], np.float32) - for i in range(8): - for j in range(2): - asico = self.offset[:, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64, 0] - asic_m = np.nanmedian(asico, axis=(1, 2)) - global_m = np.nanmedian(self.offset[:, ..., 0], - axis=(1, 2)) - doa = (asic_m - global_m)[:, None, None] - dohigh[:, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] = doa - - self.dohigh = dohigh - - dopc = self.base_offset[1] - self.base_offset[0] - dooff = self.offset[..., 1] - self.offset[..., 0] - doff = dopc - dooff - for i in range(8): - for j in range(2): - ado = doff[:, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - med = np.nanmedian(ado, axis=(1, 2))[:, None, None] - doff[:, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] = med - - # This is deprecated part of code, which is not compatible with - # existing PC constants. - # self.offset[..., 1] += doff - if self.adjust_mg_baseline and slopesPC is not None: - x = np.linspace(0, 1000, 1000) - m_h = np.moveaxis( - np.moveaxis(slopesPC[..., :self.max_cells, 0], 0, 2), 0, 1) - b_h = np.moveaxis( - np.moveaxis(slopesPC[..., :self.max_cells, 1], 0, 2), 0, 1) - m_m = np.moveaxis( - np.moveaxis(slopesPC[..., :self.max_cells, 3], 0, 2), 0, 1) - b_m = np.moveaxis( - np.moveaxis(slopesPC[..., :self.max_cells, 4], 0, 2), 0, 1) - b_ho = self.offset[..., 0] - b_mo = self.offset[..., 1] - mz = -(b_m - b_mo) / m_m - cx = np.zeros(mz.shape) - for i in range(cx.shape[0]): - for j in range(cx.shape[1]): - for k in range(cx.shape[2]): - cx[i, j, k] = x[np.argmin(np.abs(mz[i, j, k] - x))] - doz = (m_h * cx + b_h - m_m * cx - b_m) - self.mg_bl_adjust = doz - - # this is the first call, usually from the database - # we also generate file structures here, as the second call is optional - if not self.initialized: - self.median_noise = np.nanmedian(self.noise[0, ...]) - cops = [self.offset.shape[0], self.mask.shape[0], self.max_cells] - self.max_cells = np.min(cops) - if not self.karabo_data_mode: - self.gen_valid_range() - self.copy_and_sanitize_non_cal_data() - self.create_output_datasets() - self.initialized = True - print("Offset medians are {}".format( - np.nanmedian(self.offset, axis=(0, 1, 2)))) - if hasattr(self, "rel_gain"): - print("Gain medians are {}".format( - np.nanmedian(self.rel_gain, axis=(0, 1, 2)))) - print("Threshold medians are {}".format( - np.nanmedian(self.thresholds, axis=(0, 1, 2)))) - else: - print("After reading from file: ") - print("Offset medians are {}".format( - np.nanmedian(self.offset, axis=(0, 1, 2)))) - print("Gain medians are {}".format( - np.nanmedian(self.rel_gain, axis=(0, 1, 2)))) - print("Threshold medians are {}".format( - np.nanmedian(self.thresholds, axis=(0, 1, 2)))) - - def split_gain(self, d): - """ Split gain information off AGIPD Data - """ - return d[:, 0, ...], d[:, 1, ...] - - def split_gain_il(self, d): - """ Split gain information off AGIPD IL Data - """ - return d[0::2, 0, ...], d[1::2, 0, ...] - - def baseline_correct_via_noise(self, d, noise, g): - """ Correct baseline shifts by evaluating position of the noise peak - - :param: d: the data to correct, should be a single image - :param noise: the mean noise for the cell id of `d` - :param g: gain array matching `d` array - - Correction is done by identifying the left-most (significant) peak - in the histogram of `d` and shifting it to be centered on 0. - This is done via a continous wavelet transform (CWT), using a Ricker - wavelet. - - Only high gain data is evaulated, and data larger than 50 ADU, - equivalent of roughly a 9 keV photon is ignored. - - Two passes are executed: the first shift is accurate to 10 ADU and - will catch baseline shifts smaller then -2000 ADU. CWT is evaluated - for peaks of widths one, three and five time the noise. - The baseline is then shifted by the position of the summed CWT maximum. - - In a second pass histogram is evaluated within a range of - +- 5*noise of the initial shift value, for peak of width noise. - - Baseline shifts larger than the maximum threshold or positive shifts - larger 10 are discarded, and the original data is returned, otherwise - the shift corrected data is returned. - - """ - # we work on a copy to be able to filter values by setting them to - # np.nan - dd = copy.copy(d) - dd[g != 0] = np.nan # only high gain data - dd[dd > 50] = np.nan # only noise data - h, e = np.histogram(dd, bins=210, range=(-2000, 100)) - c = (e[1:] + e[:-1]) / 2 - - try: - cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise]) - except: - return d - cwtall = np.sum(cwtmatr, axis=0) - marg = np.argmax(cwtall) - pc = c[marg] - - high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise) - dd[~high_res_range] = np.nan - h, e = np.histogram(dd, bins=200, - range=(pc - 5 * noise, pc + 5 * noise)) - c = (e[1:] + e[:-1]) / 2 - try: - cwtmatr = cwt(h, ricker, [noise, ]) - except: - return d - marg = np.argmax(cwtmatr) - pc = c[marg] - # too large shift to be easily decernable via noise - if pc > 10 or pc < -self.baseline_corr_noise_threshold: - return d - return d - pc - - def correct_baseline_via_hist(self, d, pcm, g): - """ Correct baseline shifts by matching edges of high and medium - gain histogram - - :param d: single image to correct - :param pcm: relative gain slope for medium gain of each pixel in `d` - :param g: gain values for `d` - - As a preparation the median value of medium gain pixels is shifted in - increments of 50 ADU as long as it is negative. - - Correction is then performed by evaluating histograms for high gain - and medium gain pixels in `d`. The right-most significant bin of the - high gain histogram is then shifted such that it coincides with the - left-most significant bin of the medium gain histogram. Significant - here means that bin counts are at least 10% of the average bin count - of the histogram for all bins larger 10 counts. This initial evaluation - uses histograms in range between -10000 and +10000 ADU with a - resolution of 100 ADU per bin. The shift required to match both - histogram borders is an initial estimate for the baseline shift. - - Finally, the mean bin count of the five outermost bins of the high and - medium gain histograms is compared. The baseline is shifted by - increments of 1 ADU until they are within 20% of each other. - - From this point on additional shifts are performed while the - deviation stays within 30% of each other. The final shift is then - evaluated as the point of minimal deviation of these values. - - A maximum iteration count of 200 is imposed on the procedure. If no - convergence is reached within this limit, the original data is - returned, otherwise the baseline corrected data is returned. - """ - dd = copy.copy(d) - # shift while the medium gain produces negative values on average - pc = 0 - it = 0 - max_it = 100 - while np.nanmedian(dd[g == 1] - pc) < 0: - pc -= 50 - if it > max_it: - return d - it += 1 - - def min_hist_distance(pc, bins=100, ran=(-10000, 10000), dec=20, - minbin=10): - hh, e = np.histogram(dd[g == 0] - pc, bins=bins, range=ran) - hm, e = np.histogram((dd[g == 1] - pc) * pcm[g == 1], bins=bins, - range=ran) - - # right most significant value of high gain histogram - hhm = np.nanmean(hh[hh > minbin]) - rmh = hh.size - np.argmax((hh > 0.1 * hhm)[::-1]) - - # left most significant value of high gain histogram - hmm = np.nanmean(hm[hm > minbin]) - lmm = np.argmax((hm > 0.1 * hmm)) - med_pcm = np.nanmedian(pcm[g == 1]) - eh = e[rmh] - el = e[lmm] - pc += (el - eh) / (med_pcm - 1) - return pc - - # set a pc one halfstep higher than initial guesstimate - pc += 50 - pc = min_hist_distance(pc) - - # now start rolling back until we have similar signal levels - def comp_signal_level(pc, minbin=1): - hh, e = np.histogram(dd[g == 0] - pc, bins=100, range=(5000, 7000)) - hm, e = np.histogram((dd[g == 1] - pc) * pcm[g == 1], bins=100, - range=(5000, 7000)) - - # right most significant value of high gain histogram - hhm = np.nanmean(hh[hh > minbin]) - rmh = hh.size - np.argmax((hh > 0.1 * hhm)[::-1]) - - # left most significant value of high gain histogram - hmm = np.nanmean(hm[hm > minbin]) - lmm = np.argmax((hm > 0.1 * hmm)) - - reg = 5 - ret = np.abs(np.sum(hh[rmh - reg:rmh]) - np.sum(hm[lmm:lmm + reg])) - ret /= np.sum(hh[rmh - reg:rmh]) - return ret - - it = 0 - max_it = 200 - opc = pc - m_it_break = False - while comp_signal_level(pc) > 0.2: - pc += 1 - if it > max_it: - pc = opc - m_it_break = True - break - it += 1 - - it = 0 - if not m_it_break: - pcs = [pc] - slevs = [] - slev = comp_signal_level(pc) - slevs.append(slev) - while slev < 0.3: - pc += 1 - slev = comp_signal_level(pc) - slevs.append(slev) - pcs.append(pc) - it += 1 - - pc = pcs[np.argmin(slevs)] - - return d - pc - - def correct_baseline_via_hist_asic(self, d, g): - """ Correct diagonal falloff on ASICs by matching corner histograms - - :param d: single image data - :param g: gain values for `d` - - Corrections are performed for the top and bottom row of ASICs - seperately. - - In easch row, starting from the beam-hole closest ASIC, the histogram - of a square region of size 8x8 pixels is evaluted for its maximum - bin count location (only medium gain values), and compared to a - histogram produced from a neighbouring region on the next ASIC. - Double sized pixels are avoided. - - The reasoning is that the histograms should not dramatically differ - for continuously distributed photon events. Each ASIC is then shifted - such that histograms match and the corrected data is returned. - - Warning: this feature is very experimental! - """ - - rsize = 8 - - def hist_ends(dm, bins=5000, ran=(-5000, 5000), - minbin=4 / (16 / rsize)): - - hm, e = np.histogram(dm, bins=bins, range=ran) - - # left most significant value of medium gain histogram - # hmm = np.nanmean(hm[hm > minbin]) - lmm = np.argmax((hm > minbin)) - - return e[lmm], np.sum(hm) - - for i in range(1, 8): - - # upper asics - casic = np.concatenate( - (d[i * 64 - rsize:i * 64 - 1, 64:64 + rsize].flatten(), - d[i * 64 + 1:i * 64 + rsize, 64 - rsize:64].flatten())) - - casic_g = np.concatenate( - (g[i * 64 - rsize:i * 64 - 1, 64:64 + rsize].flatten(), - g[i * 64 + 1:i * 64 + rsize, 64 - rsize:64].flatten())) - - idxm = casic_g == 1 - cme, cms = hist_ends(casic[idxm]) - - asic = d[i * 64 + 1:i * 64 + rsize, 64:64 + rsize].flatten() - asic_g = g[i * 64 + 1:i * 64 + rsize, 64:64 + rsize].flatten() - - idxm = asic_g == 1 - me, ms = hist_ends(asic[idxm]) - - if cms > rsize * rsize / 10 and ms > rsize * rsize / 10: - pc = cme - me - else: - pc = 0 - - if np.abs(pc) > 500: - # somthing when wrong - pc = 0 - d[i * 64:(i + 1) * 64, 64:] += pc - - for i in range(0, 7): - # lower asics - casic = np.concatenate((d[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, - 64:64 + rsize].flatten(), - d[(i + 1) * 64 + 1:(i + 1) * 64 + rsize, - 64 - rsize:64].flatten())) - - casic_g = np.concatenate((g[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, - 64:64 + rsize].flatten(), - g[(i + 1) * 64 + 1:(i + 1) * 64 + rsize, - 64 - rsize:64].flatten())) - - idxm = casic_g == 1 - cme, cms = hist_ends(casic[idxm]) - - asic = d[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, - 64 - rsize:64].flatten() - asic_g = g[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, - 64 - rsize:64].flatten() - - idxm = asic_g == 1 - me, ms = hist_ends(asic[idxm]) - - if cms > rsize * rsize / 10 and ms > rsize * rsize / 10: - pc = cme - me - else: - pc = 0 - - if np.abs(pc) > 500: - # somthing went wrong - pc = 0 - d[i * 64:(i + 1) * 64, :64] += pc - - return d - - def match_asic_borders(self, d, chunk=8): - """ Match border pixels of the two ASIC rows to the same median value - - :param d: single image data - :param chunk: number of pixels on each ASIC boundary to generate - mean values for - - Each ASIC has `n=64/chunk` mean values calculated along its two border - pixel rows. The deviation of chunk pairs is then evaluated and the - upper/lower ASICs are shifted by the mean deviation for the - right/left hemisphere of the detector. - - The corrected data is returned. - """ - for i in range(8): - - these_asics = d[:, i * 64:(i + 1) * 64, :] - diffs = np.zeros((d.shape[0], 8)) - for k in range(64 // chunk): - ldat = these_asics[:, k * chunk:(k + 1) * chunk, 62:64] - lowerasic = np.nanmedian(ldat, axis=(1, 2)) - udat = these_asics[:, k * chunk:(k + 1) * chunk, 64:66] - upperasic = np.nanmedian(udat, axis=(1, 2)) - low_up = lowerasic - upperasic - up_low = upperasic - lowerasic - diff = low_up if self.channel < 8 else up_low - diffs[:, k] = diff - - mn = np.nanmean(diffs, axis=1)[:, None, None] / 2 - if self.channel < 8: - d[:, i * 64:(i + 1) * 64, 64:] += mn - d[:, i * 64:(i + 1) * 64, :64] -= mn - else: - d[:, i * 64:(i + 1) * 64, :64] += mn - d[:, i * 64:(i + 1) * 64, 64:] -= mn - return d - - def melt_snowy_pixels(self, raw, im, gain, rgain, resolution=None): - """ Identify (and optionally interpolate) 'snowy' pixels - - :param raw: raw data - :param im: offset and relative gain corrected data: - :param gain: gain values for `raw` - :param rgain: raw gain data, scaled by the threshold for - high-to-medium gain switching - :param resolution: resolution strategy, should be of enum-type - `SnowResolution` - - Snowy pixels are pixels which are already identified as pixels in - the medium gain stage by their gain values, but which have - transitional image values between the largest high gain value and - the smalles medium gain value. As these values may be encountered again - in the context of medium gain, they are ambiguous and cannot - readily be identified by simple thresholding alone. - - It is attempted to identify these snowy pixels by using a Gaussian - mixture clustering algorithm acting on multispectral input. - Positions in the cluster are identified as follows: - - x: abs(p_r-p_bg)*p_rg**2 - y: p_r - - where p_r is a given pixel raw value, p_bg is the mean value of the - 8 direct neighbor pixels, and p_rg is the raw gain value of the pixel. - - Note that these cluster params are purely empirically derived, - and do not have any connection to ASIC design etc. - - Only pixels in medium gain are evaluated, and evaluation is - image-wise, but subdivided further into ASICs. - - The so-created graph [[x_0, x_1, ..., x_n], [y_0, y_1, ..., y_n]] is - scaled using a `sklearn.StandardScaler` and input into a two-component - `GaussianMixture` clustering algorithm. This results in a set of - labels, identifying pixels as likely snowy, or not. However, - for a given image we do not know which label is which from the - output. Hence, labels are differentiated under the assumption that - snowy pixel occur to be out-of-context, i.e. their surrounding pixels - are more likely at lower signal values, than if the pixel is in a region - were gain switching led to a large value. - - Depending on the resolution strategy so-identified pixels are either - set to `np.nan` or to the interpolated value of the direct (high-gain) - neighbors. - - The corrected data is returned alongside an updated gain mask and bad - pixel - mask. - """ - snow_mask = np.zeros(im.shape, np.uint32) - for k in range(im.shape[0]): - for i in range(8): - for j in range(2): - - fidx = gain[k, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64] == 1 - fidx_h = gain[k, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64] == 0 - - # need at least two pixels in medium gain to be able to - # cluster in two groups - if np.count_nonzero(fidx) < 2: - continue - - # data for a given asic - asic = raw[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - asic_g = gain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - asic_r = rgain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - - asic_h = copy.copy(asic) - asic_h[~fidx_h] = np.nan - - # calculate the background of by averaging the 8 direct - # neighbours of each pixel - rl = np.roll(asic, 1, axis=0) - rr = np.roll(asic, -1, axis=0) - ru = np.roll(asic, 1, axis=1) - rd = np.roll(asic, -1, axis=1) - rlu = np.roll(rl, 1, axis=1) - rld = np.roll(rl, -1, axis=1) - rru = np.roll(rr, 1, axis=1) - rrd = np.roll(rr, -1, axis=1) - bg = (rl + rr + ru + rd + rlu + rld + rru + rrd) / 8 - - # calculate the background of by averaging the 8 direct - # neighbours of each pixel - # here only for high gain pixels - rl = np.roll(asic_h, 1, axis=0) - rr = np.roll(asic_h, -1, axis=0) - ru = np.roll(asic_h, 1, axis=1) - rd = np.roll(asic_h, -1, axis=1) - rlu = np.roll(asic_h, 1, axis=1) - rld = np.roll(asic_h, -1, axis=1) - rru = np.roll(asic_h, 1, axis=1) - rrd = np.roll(asic_h, -1, axis=1) - bg_h = np.nanmean( - np.array([rl, rr, ru, rd, rlu, rld, rru, rrd]), axis=0) - - # construct a graph - graph = np.array( - [np.abs(asic[fidx] - bg[fidx]) * asic_r[fidx] ** 2, - asic[fidx]]).T - # scale it - graph = StandardScaler().fit_transform(graph) - # perform clustering - spc = GaussianMixture(n_components=2, random_state=0) - spc.fit(graph) - # and get labels - labels = spc.predict(graph) - - asic = im[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - asic_msk = snow_mask[k, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64] - mim = asic[fidx] - asicb = bg_h - mimb = asicb[fidx] - mimg = asic_g[fidx] - mmsk = asic_msk[fidx] - - # identify which labels are which - mn1 = np.nanmean(bg[fidx][labels == 0]) - mn2 = np.nanmean(bg[fidx][labels == 1]) - implabel = 1 - if mn1 > mn2: - implabel = 0 - - # if we've misidentified, then we will have to many - # snowy pixels - # happens if the ASIC is generally on a high signal level - if np.count_nonzero([labels == implabel]) > 64 * 64 / 3: - continue - - # or a large portion of misidenfied label covers the ASIC - if np.count_nonzero([labels != implabel]) > 0.8 * 64 * 64: - continue - - # set to NaN if requested - if resolution is SnowResolution.NONE: - mim[labels == implabel] = np.nan - # or use the interpolated value - elif resolution is SnowResolution.INTERPOLATE: - mim[labels == implabel] = mimb[labels == implabel] - mimg[labels == implabel] = 0 - # identify these pixels in a bad pixel mask - mmsk[ - labels == implabel] = BadPixels.TRANSITION_REGION.value - - # now set back to data - asic[fidx] = mim - asic_g[fidx] = mimg - asic_msk[fidx] = mmsk - im[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] = asic - gain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] = asic_g - snow_mask[k, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64] = asic_msk - return im, gain, snow_mask - - def make_noisy_adc_mask(self, bmask): - """ Mask entire ADC if they are noise above a relative threshold - """ - msk = np.count_nonzero( - ((bmask & BadPixels.NOISE_OUT_OF_THRESHOLD.value != 0) - | (bmask & BadPixels.OFFSET_NOISE_EVAL_ERROR.value != 0)).astype( - np.int), axis=0) - - fmask = np.zeros_like(msk).astype(np.uint32) - adc_i = bmask.shape[1] // 8 - adc_j = bmask.shape[2] // 8 - for i in range(adc_i): - for j in range(adc_j): - adc_cnt = np.count_nonzero( - msk[i * adc_i:(i + 1) * adc_i, j * adc_j:(j + 1) * adc_j] > - bmask.shape[0] // 32) - if adc_cnt / (adc_i * adc_j) >= self.mask_noisy_adc: - fmask[i * adc_i:(i + 1) * adc_i, - j * adc_j:(j + 1) * adc_j] = BadPixels.NOISY_ADC.value - return fmask - - def correct_agipd(self, irange): - """ Correct Raw AGIPD data for offset and relative gain effects - - :param irange: list of image indices to work on, should be contiguous, - or karabo_data structure to work on - - Will raise an RuntimeError if `initialize()` has not been called first. - """ - if not self.initialized: - raise RuntimeError("Must call initialize() first!") - - if not self.karabo_data_mode: - agipd_base = self.agipd_base - cidx = self.cidx - im = np.array(self.infile[agipd_base + "image/data"][irange, ...]) - trainId = np.squeeze( - self.infile[agipd_base + "/image/trainId"][irange, ...]) - pulseId = np.squeeze( - self.infile[agipd_base + "image/pulseId"][irange, ...]) - status = np.squeeze( - self.infile[agipd_base + "/image/status"][irange, ...]) - cellid = np.squeeze( - np.array( - self.infile[agipd_base + "/image/cellId"][irange, ...])) - length = np.squeeze( - np.array( - self.infile[agipd_base + "/image/length"][irange, ...])) - - # this far end of the image index range we are working on - nidx = int(cidx + irange.size) - else: - cidx = 1 # do not produce any histograms - im = irange['image.data'] - trainId = np.squeeze(irange['image.trainId']) - status = np.squeeze(irange['image.status']) - pulseId = np.squeeze(irange['image.pulseId']) - cellid = np.squeeze(irange['image.cellId']) - length = np.squeeze(irange['image.length']) - - # split off image and gain into two separate arrays - im, ga = self.gsfun(im) - - # we will work on float data from now on - im = im.astype(np.float32) - - # some correction require us to maintain a copy of the raw data - raw = copy.copy(im) - - # handle very old interlaced data - if self.il_mode: - trainId = trainId[0::2] - pulseId = pulseId[0::2] - status = status[0::2] - cellid = cellid[0::2] - length = length[0::2] - - # first evaluate the gain into 0, 1, 2 --> high, medium, low - gain = np.zeros(ga.shape, np.uint8) - gain[...] = 0 - t0 = self.thresholds[..., 0] - t1 = self.thresholds[..., 1] - # exceeding first threshold means data is medium or low gain - gain[ga > t0[cellid, ...]] = 1 - # exceeding also second threshold means data is low gain - gain[(ga > t1[cellid, ...]) & ( - t1[cellid, ...] > 1.05 * t0[cellid, ...])] = 2 - - # force into high or medium gain if requested - if self.force_mg_if_below is not None and self.force_mg_if_below > 0: - gain[(gain == 2) & ((im - self.offset[ - cellid, ..., 1]) < self.force_mg_if_below)] = 1 - - if self.force_hg_if_below is not None and self.force_hg_if_below > 0: - gain[(gain > 0) & ((im - self.offset[ - cellid, ..., 0]) < self.force_hg_if_below)] = 0 - - for i in range(3): - self.gain_stats[i] = np.sum(gain == i) - - # check if any data has zero standard deviation, and mark this in - # the bad pixel maks - # this can be done on otherwise not corrected data. - if self.sig_zero_mask is None: - self.sig_zero_mask = np.zeros( - (self.max_cells, im.shape[1], im.shape[2]), np.uint32) - for c in range(self.max_cells): - std = np.nanstd(im[cellid == c, ...], axis=0) - self.sig_zero_mask[ - c, std == 0] = BadPixels.DATA_STD_IS_ZERO.value - - # for feedback we produced histograms for the first chunk - if cidx == 0: - H, xe, ye = np.histogram2d(im.flatten(), ga.flatten(), - bins=self.bins_gain_vs_signal, - range=[[4000, 8192], [4000, 8192]]) - self.hists_gain_vs_signal += H - self.signal_edges = (xe, ye) - - H, xe, ye = np.histogram2d(im.flatten(), gain.flatten(), - bins=self.bins_dig_gain_vs_signal, - range=[[4000, 8192], [0, 4]]) - self.hists_dig_gain_vs_signal += H - self.dig_signal_edges = (xe, ye) - - # get the correct constant depending on cell-id - offsetb = self.offset[cellid, ...] - tmask = self.mask[cellid, ...] - - # choose constants according to gain setting - off = np.choose(gain, - (offsetb[..., 0], offsetb[..., 1], offsetb[..., 2])) - import copy -from enum import Enum - -import h5py -import numpy as np -from cal_tools.enums import BadPixels -from cal_tools.tools import get_constant_from_db, get_constant_from_db_and_time -from iCalibrationDB import Constants, Conditions, Detectors -from scipy.signal import cwt, ricker -from sklearn.mixture import GaussianMixture -from sklearn.preprocessing import StandardScaler - - -def get_num_cells(fname, loc, module): - with h5py.File(fname, "r") as f: - cells = \ - f["INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId".format(loc, module)][()] - if cells.shape[0] == 0: - return None - maxcell = np.max(cells) - options = [4, 32, 64, 76, 128, 176, 202, 250] - dists = [abs(o - maxcell) for o in options] - return options[np.argmin(dists)] - - -def get_acq_rate(fname, loc, module): - with h5py.File(fname, "r") as f: - try: - pulses = \ - np.squeeze(f["INSTRUMENT/{}/DET/{}CH0:xtdf/image/pulseId".format( - loc, module)][:2]) - diff = pulses[1] - pulses[0] - except: - diff = 0 - options = {8: 0.5, 4: 1.1, 2: 2.2, 1: 4.5} - return options.get(diff, None) - - -class SnowResolution(Enum): - """ An Enum specifying how to resolve snowy pixels - """ - NONE = "none" - INTERPOLATE = "interpolate" - - -class AgipdCorrections: - """ - The AgipdCorrections class perfroms AGIPD offline correction - - The following example shows a typical use case:: - - - infile = h5py.File(filename, "r", driver="core") - outfile = h5py.File(filename_out, "w") - - agp_corr = AgipdCorrections(infile, outfile, max_cells, channel, - max_pulses, - bins_gain_vs_signal, bins_signal_low_range, - bins_signal_high_range) - - try: - agp_corr.initialize(offset, rel_gain, rel_gain_offset, mask, - noise, flatfield) - except IOError: - return - - for irange in agp_corr.get_iteration_range(): - agp_corr.correct_agipd(irange) - - hists, edges = agp_corr.get_histograms() - - """ - - def __init__(self, infile, outfile, max_cells, channel, max_pulses, - bins_gain_vs_signal, bins_signal_low_range, - bins_signal_high_range, - bins_dig_gain_vs_signal, raw_fmt_version=2, chunk_size=512, - h5_data_path="INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", - h5_index_path="INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", - do_rel_gain=True, chunk_size_idim=512, il_mode=False, - cal_det_instance="AGIPD1M1", karabo_data_mode=False, - force_hg_if_below=None, force_mg_if_below=None, - mask_noisy_adc=False, adjust_mg_baseline=False, - acquisition_rate=None, dont_zero_nans=False, - dont_zero_orange=False): - """ - Initialize an AgipdCorrections Class - - :param infile: to be corrected h5py input file - :param outfile: writeable h5py output file - :param max_cell: maximum number of memory cells to handle, e.g. if - calibration constants only exist for a subset of cells - :param channel: module/channel to correct - :param max_pulses: a range list of pulse indices used for - calibration and histogram. [start, end, step] - :param bins_gain_vs_signal: number of bins for gain vs signal histogram - :param bins_signal_low_range: number of bins for the low signal - range histogram - :param bins_signal_high_range: number of bins for the high signal - range histogram - :param raw_fmt_version: raw file format version to use - :param chunk_size: images per chunk to compute upon - :param h5_data_path: path in HDF5 file which is prefixed to the - image/data section - :param h5_index_path: path in HDF5 file which is prefixed to the - index section - :param do_rel_gain: do relative gain corrections - :param chunk_size_idim: chunking size on image dimension when - writing data out - :param il_mode: set to true if AGIPD data is interlaced (pre-Nov 2017 data) - :param karabo_data_mode: set to true to use data iterated with karabo data - :param force_hg_if_below: set to a value different to None/0 to force a pixels - gain to high, if the pixel value is below the given - value after high gain offset subtraction. - """ - self.agipd_base = h5_data_path.format(channel) - self.idx_base = h5_index_path.format(channel) - self.infile = infile - self.outfile = outfile - self.channel = channel - self.index_v = raw_fmt_version - self.chunksize = chunk_size - self.initialized = False - self.pulses_lst = list(range(*max_pulses)) - self.min_pulse = self.pulses_lst[0] - self.max_pulse = self.pulses_lst[-1] - self.max_cells = max_cells - self.hist_pulses = 0 - self.hists_signal_low = 0 - self.hists_signal_high = 0 - self.hists_gain_vs_signal = 0 - self.hists_dig_gain_vs_signal = 0 - self.bins_gain_vs_signal = bins_gain_vs_signal - self.bins_signal_low_range = bins_signal_low_range - self.bins_signal_high_range = bins_signal_high_range - self.bins_dig_gain_vs_signal = bins_dig_gain_vs_signal - self.cidx = 0 - self.do_rel_gain = do_rel_gain - self.sig_zero_mask = None - self.base_offset = None - self.baseline_corr_using_noise = False - self.baseline_corr_noise_threshold = 100 - self.baseline_corr_using_hmatch = True - self.correct_asic_diag = True - self.melt_snow = SnowResolution.NONE - self.match_asics = True - self.chunk_size_idim = chunk_size_idim - self.dohigh = 0 - self.gsfun = self.split_gain_il if il_mode else self.split_gain - self.il_mode = il_mode - self.cal_det_instance = cal_det_instance - self.karabo_data_mode = karabo_data_mode - self.force_hg_if_below = force_hg_if_below - self.force_mg_if_below = force_mg_if_below - self.mask_noisy_adc = mask_noisy_adc - self.adc_mask = None - self.gain_stats = [0, 0, 0] - self.adjust_mg_baseline = adjust_mg_baseline - self.mg_bl_adjust = 0 - self.acquisition_rate = acquisition_rate - self.dont_zero_nans = dont_zero_nans - self.dont_zero_orange = dont_zero_orange - self.valid_indices = None - - def get_iteration_range(self): - """Returns a range expression over which to iterate in chunks - """ - splitlength = self.firange.size // self.chunksize - splits = np.arange(0, self.firange.size, self.chunksize) - return [a for a in np.array_split(self.firange, splits) if a.size > 0] - - def initialize(self, offset, rel_gain, mask, noise, thresholds, - base_offset, slopesPC=None, swap_axis=True): - """ Initialize the calibration constants and the output data file. - - Any data that is not touched by the corrections is copied into the - output file at this point. Also data validity tests are performed. - These functions must be called before `correct_agipd` is executed. - - :param offset: offset map to use for corrections - :param rel_gain: relative gain map to use for corrections - :param mask: bad pixel mask to use for corrections - :param noise: noise map to use for corrections - :param thresholds: thresholds for gain determination - :param base_offset: base offsets from PC and FF data: should be a - tuple: - PC offset for high gain, PC offset for medium gain, FF offset - for high gain - :param swap_axis: set to True if using data from the calibration - database. - - Note that this function may be called twice, when initializing - partially from DB and file. - Constants not to be set by the initial call should be set to `None`. - """ - # swap the axes such that memory cell dimension is first - the usual # way of operation, and the one expected by the rest of this class. if swap_axis: diff --git a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb index 5a85f526f4f7f191036d73963e17066ff12bb0c1..a8e51df7155948bb01e9bdf3a23b49d6df6235a0 100644 --- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb +++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb @@ -38,7 +38,7 @@ "bias_voltage = 300\n", "cal_db_interface = \"tcp://max-exfl016:8015#8045\" # the database interface to use\n", "use_dir_creation_date = True # use the creation data of the input dir for database queries\n", - "sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n", + "sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n", "photon_energy = 9.2 # photon energy in keV\n", "index_v = 2 # version of RAW index type\n", "nodb = False # if set only file-based constants will be used\n",