diff --git a/cal_tools/cal_tools/agipdlib.py b/cal_tools/cal_tools/agipdlib.py index 0b5ba63060d81833ded5fc64383d08860fb29b73..7a83a61d959060bbd21d224462aa513a3582ed9c 100644 --- a/cal_tools/cal_tools/agipdlib.py +++ b/cal_tools/cal_tools/agipdlib.py @@ -1,16 +1,12 @@ -import copy -from enum import Enum -import os - import h5py import numpy as np -from scipy.signal import cwt, ricker -from sklearn.mixture import GaussianMixture -from sklearn.preprocessing import StandardScaler +import sharedmem +import traceback -from cal_tools.agipdutils import assemble_constant_dict -from cal_tools.enums import BadPixels -from cal_tools.tools import get_constant_from_db, get_constant_from_db_and_time +from cal_tools.agipdutils import * +from cal_tools.cython import agipdalgs as calgs +from cal_tools.enums import BadPixels, SnowResolution +from cal_tools.tools import get_constant_from_db_and_time from iCalibrationDB import Constants, Conditions, Detectors @@ -69,91 +65,60 @@ def get_gain_setting(fname, h5path_ctrl): raise ValueError('Could not derive gain setting from setupr and patternTypeIndex') # noqa -class SnowResolution(Enum): - """ An Enum specifying how to resolve snowy pixels - """ - NONE = "none" - INTERPOLATE = "interpolate" - - class AgipdCorrections: - """ - The AgipdCorrections class perfroms AGIPD offline correction - - The following example shows a typical use case:: - - - infile = h5py.File(filename, "r", driver="core") - outfile = h5py.File(filename_out, "w") - - agp_corr = AgipdCorrections(infile, outfile, max_cells, channel, - max_pulses, - bins_gain_vs_signal, bins_signal_low_range, - bins_signal_high_range) - - try: - agp_corr.initialize(offset, rel_gain, rel_gain_offset, mask, - noise, flatfield) - except IOError: - return - - for irange in agp_corr.get_iteration_range(): - agp_corr.correct_agipd(irange) - - hists, edges = agp_corr.get_histograms() - """ - - def __init__(self, infile, outfile, max_cells, channel, max_pulses, - bins_gain_vs_signal, bins_signal_low_range, - bins_signal_high_range, - bins_dig_gain_vs_signal, raw_fmt_version=2, chunk_size=512, + def __init__(self, max_cells, max_pulses, h5_data_path="INSTRUMENT/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", h5_index_path="INDEX/SPB_DET_AGIPD1M-1/DET/{}CH0:xtdf/", - chunk_size_idim=512, il_mode=False, - cal_det_instance="AGIPD1M1", karabo_data_mode=False, - force_hg_if_below=None, force_mg_if_below=None, - mask_noisy_adc=False, acquisition_rate=None, gain_setting=None, - corr_bools=None): + corr_bools={}): """ Initialize an AgipdCorrections Class - :param infile: to be corrected h5py input file - :param outfile: writeable h5py output file - :param max_cell: maximum number of memory cells to handle, e.g. if + :param max_cells: maximum number of memory cells to handle, e.g. if calibration constants only exist for a subset of cells - :param channel: module/channel to correct :param max_pulses: a range list of pulse indices used for calibration and histogram. [start, end, step] - :param bins_gain_vs_signal: number of bins for gain vs signal histogram - :param bins_signal_low_range: number of bins for the low signal - range histogram - :param bins_signal_high_range: number of bins for the high signal - range histogram - :param raw_fmt_version: raw file format version to use - :param chunk_size: images per chunk to compute upon :param h5_data_path: path in HDF5 file which is prefixed to the image/data section :param h5_index_path: path in HDF5 file which is prefixed to the index section - :param chunk_size_idim: chunking size on image dimension when - writing data out - :param il_mode: set to true if AGIPD data is interlaced (pre-Nov 2017 data) - :param karabo_data_mode: set to true to use data iterated with karabo data - :param force_hg_if_below: set to a value different to None/0 to force a pixels - gain to high, if the pixel value is below the given - value after high gain offset subtraction. :param corr_bools: A dict with all of the correction booleans requested or available + + The following example shows a typical use case: + .. code-block:: python + + agipd_corr = AgipdCorrections(max_cells, max_pulses, + h5_data_path=h5path, + h5_index_path=h5path_idx, + corr_bools=corr_bools) + + agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128)) + + with open(f'{out_folder}/retrieved_constants.yml', "r") as f: + const_yaml = yaml.load(f.read(), Loader=yaml.FullLoader) + + for mod in modules: + qm = f"Q{mod // 4 + 1}M{mod % 4 + 1}" + device = getattr(getattr(Detectors, dinstance), qm) + agipd_corr.initialize_from_yaml(const_yaml, mod, device) + + data_shape = (n_images_max, 512, 128) + agipd_corr.allocate_images(data_shape, n_cores_files) + + i_proc=0 + agipd_corr.read_file(i_proc, file_name) + + # Correct first 1000 images + agipd_corr.correct_agipd(i_proc, first=0, last=1000) + + agipd_corr.write_file(i_proc, file_name, ofile_name) + """ - self.agipd_base = h5_data_path.format(channel) - self.idx_base = h5_index_path.format(channel) - self.infile = infile - self.outfile = outfile - self.channel = channel - self.index_v = raw_fmt_version - self.chunksize = chunk_size - self.initialized = False + + # Data description + self.h5_data_path = h5_data_path + self.h5_index_path = h5_index_path self.rng_pulses = max_pulses # avoid list(range(*[0]])) self.pulses_lst = list(range(*max_pulses)) \ @@ -161,50 +126,42 @@ class AgipdCorrections: self.min_pulse = self.pulses_lst[0] self.max_pulse = self.pulses_lst[-1] self.max_cells = max_cells - self.hist_pulses = 0 - self.hists_signal_low = 0 - self.hists_signal_high = 0 - self.hists_gain_vs_signal = 0 - self.hists_dig_gain_vs_signal = 0 - self.bins_gain_vs_signal = bins_gain_vs_signal - self.bins_signal_low_range = bins_signal_low_range - self.bins_signal_high_range = bins_signal_high_range - self.bins_dig_gain_vs_signal = bins_dig_gain_vs_signal - self.cidx = 0 - self.sig_zero_mask = None - self.base_offset = None - self.baseline_corr_noise_threshold = 100 - self.melt_snow = SnowResolution.NONE - self.chunk_size_idim = chunk_size_idim - self.dohigh = 0 - self.gsfun = self.split_gain_il if il_mode else self.split_gain - self.il_mode = il_mode - self.cal_det_instance = cal_det_instance - self.karabo_data_mode = karabo_data_mode - self.force_hg_if_below = force_hg_if_below - self.force_mg_if_below = force_mg_if_below - self.mask_noisy_adc = mask_noisy_adc - self.adc_mask = None - self.gain_stats = [0, 0, 0] - self.acquisition_rate = acquisition_rate - self.gain_setting = gain_setting - self.valid_indices = None - self.frac_high_med = 0 - self.md_additional_offset = 0 - self.xray_cor = 0 + + # Correction parameters + self.baseline_corr_noise_threshold = -1000 + self.snow_resolution = SnowResolution.INTERPOLATE + self.cm_dark_fraction = 0.66 + self.cm_n_itr = 4 + self.mg_hard_threshold = 100 + self.hg_hard_threshold = 100 + self.noisy_adc_threshold = 0.25 + + # Shared variables for data and constants + self.shared_dict = [] + self.offset = {} + self.noise = {} + self.thresholds = {} + self.frac_high_med = {} + self.md_additional_offset = {} + self.rel_gain = {} + self.mask = {} + self.xray_cor = {} # check if given corr_bools are correct tot_corr_bools = ['only_offset', 'adjust_mg_baseline', 'rel_gain', 'xray_corr', 'blc_noise', 'blc_hmatch', 'blc_stripes', 'blc_set_min', 'match_asics', - 'corr_asic_diag', 'dont_zero_nans', - 'dont_zero_orange'] + 'corr_asic_diag', 'zero_nans', + 'zero_orange', 'force_hg_if_below', + 'force_mg_if_below', 'mask_noisy_adc', + 'melt_snow', 'common_mode', 'mask_zero_std', + 'low_medium_gap'] if set(corr_bools).issubset(tot_corr_bools): self.corr_bools = corr_bools else: - raise Exception('Correction Booleans: {} are not available!' - .format(list(set(corr_bools) - set(tot_corr_bools)))) + bools = list(set(corr_bools) - set(tot_corr_bools)) + raise Exception(f'Correction Booleans: {bools} are not available!') # noqa # Flags allowing for pulse capacitor constant retrieval. self.pc_bools = [self.corr_bools.get("rel_gain"), @@ -212,994 +169,348 @@ class AgipdCorrections: self.corr_bools.get('blc_noise'), self.corr_bools.get('blc_hmatch'), self.corr_bools.get('blc_stripes'), - self.melt_snow] - def get_iteration_range(self): - """Returns a range expression over which to iterate in chunks - """ - splitlength = self.firange.size // self.chunksize - splits = np.arange(0, self.firange.size, self.chunksize) - return [a for a in np.array_split(self.firange, splits) if a.size > 0] - - def initialize(self, offset, rel_gain, xray_cor=None, mask=None, - noise=None, thresholds=None, swap_axis=True, - frac_high_med=None, md_additional_offset=None): - """ Initialize the calibration constants and the output data file. - - Any data that is not touched by the corrections is copied into the - output file at this point. Also data validity tests are performed. - These functions must be called before `correct_agipd` is executed. - - :param offset: offset map to use for corrections - :param rel_gain: relative gain map to use for corrections - :param xray_cor: xray-driven correction map - :param mask: bad pixel mask to use for corrections - :param noise: noise map to use for corrections - :param thresholds: thresholds for gain determination - :param swap_axis: set to True if using data from the calibration - database. - :param frac_high_med: ratio high to medium gain across cells - :param md_additional_offset: additional medium-gain offset - - Note that this function may be called twice, when initializing - partially from DB and file. - Constants not to be set by the initial call should be set to `None`. - """ + self.corr_bools.get('melt_snow')] - # swap the axes such that memory cell dimension is first - the usual - # way of operation, and the one expected by the rest of this class. - if swap_axis: - if offset is not None: - mvd = np.moveaxis(np.moveaxis(offset, 2, 0), 2, 1) - offset = np.ascontiguousarray(mvd) - if rel_gain is not None: - mvd = np.moveaxis(np.moveaxis(rel_gain, 2, 0), 2, 1) - rel_gain = np.ascontiguousarray(mvd) - if xray_cor is not None: - mvd = np.moveaxis(xray_cor, 1, 0) - xray_cor = np.ascontiguousarray(mvd) - if md_additional_offset is not None: - mvd = np.moveaxis(np.moveaxis(md_additional_offset, 2, 0), 2, 1) - md_additional_offset = np.ascontiguousarray(mvd) - if mask is not None: - mvd = np.moveaxis(np.moveaxis(mask, 2, 0), 2, 1) - mask = np.ascontiguousarray(mvd) - if noise is not None: - mvd = np.moveaxis(np.moveaxis(noise, 2, 0), 2, 1) - noise = np.ascontiguousarray(mvd) - if thresholds is not None: - mvd = np.moveaxis(np.moveaxis(thresholds, 2, 0), 2, 1) - thresholds = np.ascontiguousarray(mvd) - - if offset is not None: - self.offset = offset - if rel_gain is not None: - self.rel_gain = rel_gain - if xray_cor is not None: - self.xray_cor = xray_cor - if frac_high_med is not None: - self.frac_high_med = frac_high_med - if md_additional_offset is not None: - self.md_additional_offset = md_additional_offset - - # if a mask already exists we OR the mask to maintain what we - # already have - if mask is not None: - if not hasattr(self, "mask"): - self.mask = mask - else: - self.mask |= mask[:self.mask.shape[0], ...] - - if noise is not None: - self.noise = noise - - if thresholds is not None: - self.thresholds = thresholds - - # this is the first call, usually from the database - # we also generate file structures here, as the second call is optional - if not self.initialized: - self.median_noise = np.nanmedian(self.noise[0, ...]) - cops = [self.offset.shape[0], self.mask.shape[0], self.max_cells] - self.max_cells = np.min(cops) - if not self.karabo_data_mode: - self.gen_valid_range() - self.copy_and_sanitize_non_cal_data() - self.create_output_datasets() - self.initialized = True - print("Offset medians are {}".format( - np.nanmedian(self.offset, axis=(0, 1, 2)))) - if hasattr(self, "rel_gain"): - print("Gain medians are {}".format( - np.nanmedian(self.rel_gain, axis=(0, 1, 2)))) - print("Threshold medians are {}".format( - np.nanmedian(self.thresholds, axis=(0, 1, 2)))) - else: - print("After reading from file: ") - print("Offset medians are {}".format( - np.nanmedian(self.offset, axis=(0, 1, 2)))) - if hasattr(self, "rel_gain"): - print("Gain medians are {}".format( - np.nanmedian(self.rel_gain, axis=(0, 1, 2)))) - print("Threshold medians are {}".format( - np.nanmedian(self.thresholds, axis=(0, 1, 2)))) - - def get_shadowed_stripe(self, data, threshold, fraction): - """ - Return list of shadowed regions. - Shadowed region is presented by the list of lines of pixels along - numpy axis 1. Regions are defined with respect of threshold - and fraction. Margin of one pixel is used. - Double-pixels are stored in separate regions - - :param data: (numpy.ndarray, rank=2) offset corrected single image - :param threshold: (float) a threshold, below which - pixes is considered as dark - :param fraction: (float) a fraction of pixels in a line along axis 1 - below which a full line is considered as dark + def read_file(self, i_proc, file_name): """ + Read file with raw data to shared memory - npx_all = np.count_nonzero(~np.isnan(data), axis=1) - npx_sh = np.count_nonzero(data < threshold, axis=1) - - A = np.array(np.where(npx_sh / npx_all > fraction)[0]) - - dp_idx = np.arange(64, 512, 64) - dp_idx = np.sort(np.hstack((dp_idx, dp_idx - 1))) - - # grouping indices - sh_idx = [] - tmp_idx = [] - for idx, i in enumerate(A[1:-1]): - if i - 1 not in A: - continue - if len(tmp_idx) == 0: - tmp_idx.append(i) - continue - if tmp_idx[-1] + 1 == i and ( - (tmp_idx[-1] in dp_idx) == (i in dp_idx)) and (i + 1 in A): - tmp_idx.append(i) - if i != A[-2]: - continue - if len(tmp_idx) > 1: - sh_idx.append(list(tmp_idx)) - tmp_idx = [i] - - return sh_idx - - def baseline_correct_via_stripe(self, d, g, m, frac_high_med): - """ Correct baseline shifts using shadowed stripes + :param file_name: Name of input file including path + :param i_proc: Index of shared memory array + :return: + - first_index - Index of the first image to be corrected + - last_index - Index of the last image to be corrected + - firange - List of images to be corrected - :param d: the data to correct, should be offset corrected single image - :param frac_high_med: ratio of high to medium PC slopes - :param g: gain array matching `d` array - :param m: bad pixel mask """ + module_idx = int(file_name.split('/')[-1].split('-')[2][-2:]) + agipd_base = self.h5_data_path.format(module_idx) + idx_base = self.h5_index_path.format(module_idx) + data_path = f'{agipd_base}/image' + data_dict = self.shared_dict[i_proc] - dd = copy.copy(d) - dd[g != 0] = np.nan # only high gain data - dd[m != 0] = np.nan # only good pixels - - sh_idxs = self.get_shadowed_stripe(dd, 30, 0.95) - - # collect all shadowed regions excluding double pixels - idx = [] - for sh_idx in sh_idxs: - if len(sh_idx)>2: - idx += sh_idx - - if len(idx)<3: - return d - - shift = np.nanmean(dd[idx, :]) - d[g==0] -= shift - d[g==1] -= shift/frac_high_med - return d - - def split_gain(self, d): - """ Split gain information off AGIPD Data + try: + f = h5py.File(file_name, 'r') + group = f[data_path] + + valid, first_index, last_index, valid_trains, valid_indices = \ + self.get_valid_image_idx(idx_base, f) + firange = self.gen_valid_range(first_index, last_index, + self.max_cells, agipd_base, f, + valid_indices) + n_img = firange.shape[0] + + data_dict['cellId'][:n_img] = np.squeeze(group['cellId'][firange]) + data_dict['pulseId'][:n_img] = np.squeeze( + group['pulseId'][firange]) + data_dict['trainId'][:n_img] = np.squeeze( + group['trainId'][firange]) + data_dict['moduleIdx'][0] = module_idx + data_dict['nImg'][0] = n_img + + raw_data = group['data'][firange] + data_dict['data'][:n_img] = raw_data[:, 0] + data_dict['rawgain'][:n_img] = raw_data[:, 1] + + except Exception as e: + print(f'Error during reading data from file {file_name}: {e}') + print(f'Error traceback: {traceback.format_exc()}') + n_img = 0 + data_dict['nImg'][0] = 0 + + return n_img + + def write_file(self, i_proc, file_name, ofile_name): """ - return d[:, 0, ...], d[:, 1, ...] + Create output file and write corrected data to it - def split_gain_il(self, d): - """ Split gain information off AGIPD IL Data + :param file_name: Name of input file including path + :param ofile_name: Name of output file including path + :param i_proc: Index of shared memory array """ - return d[0::2, 0, ...], d[1::2, 0, ...] - - def baseline_correct_via_noise(self, d, noise, g): - """ Correct baseline shifts by evaluating position of the noise peak - - :param: d: the data to correct, should be a single image - :param noise: the mean noise for the cell id of `d` - :param g: gain array matching `d` array - - Correction is done by identifying the left-most (significant) peak - in the histogram of `d` and shifting it to be centered on 0. - This is done via a continous wavelet transform (CWT), using a Ricker - wavelet. - - Only high gain data is evaulated, and data larger than 50 ADU, - equivalent of roughly a 9 keV photon is ignored. - - Two passes are executed: the first shift is accurate to 10 ADU and - will catch baseline shifts smaller then -2000 ADU. CWT is evaluated - for peaks of widths one, three and five time the noise. - The baseline is then shifted by the position of the summed CWT maximum. - - In a second pass histogram is evaluated within a range of - +- 5*noise of the initial shift value, for peak of width noise. - - Baseline shifts larger than the maximum threshold or positive shifts - larger 10 are discarded, and the original data is returned, otherwise - the shift corrected data is returned. + module_idx = int(file_name.split('/')[-1].split('-')[2][-2:]) + agipd_base = self.h5_data_path.format(module_idx) + idx_base = self.h5_index_path.format(module_idx) + data_path = f'{agipd_base}/image' + data_dict = self.shared_dict[i_proc] + + with h5py.File(file_name, 'r') as infile: + with h5py.File(ofile_name, 'w') as outfile: + + n_img = data_dict['nImg'][0] + if n_img == 0: + return + trains = data_dict['trainId'][:n_img] + self.copy_and_sanitize_non_cal_data(infile, outfile, + agipd_base, + idx_base, trains) + + outfile[data_path]['data'] = data_dict['data'][:n_img] + outfile[data_path]['gain'] = data_dict['gain'][:n_img] + outfile[data_path]['blShift'] = data_dict['blShift'][:n_img] + outfile[data_path]['mask'] = data_dict['mask'][:n_img] + outfile[data_path]['cellId'] = data_dict['cellId'][:n_img] + outfile[data_path]['pulseId'] = data_dict['pulseId'][:n_img] + outfile[data_path]['trainId'] = data_dict['trainId'][:n_img] + + def cm_correction(self, i_proc, asic): """ - # we work on a copy to be able to filter values by setting them to - # np.nan - dd = copy.copy(d) - dd[g != 0] = np.nan # only high gain data - dd[dd > 50] = np.nan # only noise data - h, e = np.histogram(dd, bins=210, range=(-2000, 100)) - c = (e[1:] + e[:-1]) / 2 + Perform common-mode correction of data in shared memory - try: - cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise]) - except: - return d - cwtall = np.sum(cwtmatr, axis=0) - marg = np.argmax(cwtall) - pc = c[marg] - - high_res_range = (dd > pc - 5 * noise) & (dd < pc + 5 * noise) - dd[~high_res_range] = np.nan - h, e = np.histogram(dd, bins=200, - range=(pc - 5 * noise, pc + 5 * noise)) - c = (e[1:] + e[:-1]) / 2 - try: - cwtmatr = cwt(h, ricker, [noise, ]) - except: - return d - marg = np.argmax(cwtmatr) - pc = c[marg] - # too large shift to be easily decernable via noise - if pc > 10 or pc < -self.baseline_corr_noise_threshold: - return d - return d - pc - - def correct_baseline_via_hist(self, d, pcm, g): - """ Correct baseline shifts by matching edges of high and medium - gain histogram - - :param d: single image to correct - :param pcm: relative gain slope for medium gain of each pixel in `d` - :param g: gain values for `d` - - As a preparation the median value of medium gain pixels is shifted in - increments of 50 ADU as long as it is negative. - - Correction is then performed by evaluating histograms for high gain - and medium gain pixels in `d`. The right-most significant bin of the - high gain histogram is then shifted such that it coincides with the - left-most significant bin of the medium gain histogram. Significant - here means that bin counts are at least 10% of the average bin count - of the histogram for all bins larger 10 counts. This initial evaluation - uses histograms in range between -10000 and +10000 ADU with a - resolution of 100 ADU per bin. The shift required to match both - histogram borders is an initial estimate for the baseline shift. - - Finally, the mean bin count of the five outermost bins of the high and - medium gain histograms is compared. The baseline is shifted by - increments of 1 ADU until they are within 20% of each other. - - From this point on additional shifts are performed while the - deviation stays within 30% of each other. The final shift is then - evaluated as the point of minimal deviation of these values. - - A maximum iteration count of 200 is imposed on the procedure. If no - convergence is reached within this limit, the original data is - returned, otherwise the baseline corrected data is returned. - """ - dd = copy.copy(d) - # shift while the medium gain produces negative values on average - pc = 0 - it = 0 - max_it = 100 - while np.nanmedian(dd[g == 1] - pc) < 0: - pc -= 50 - if it > max_it: - return d - it += 1 - - def min_hist_distance(pc, bins=100, ran=(-10000, 10000), dec=20, - minbin=10): - hh, e = np.histogram(dd[g == 0] - pc, bins=bins, range=ran) - hm, e = np.histogram((dd[g == 1] - pc) * pcm[g == 1], bins=bins, - range=ran) - - # right most significant value of high gain histogram - hhm = np.nanmean(hh[hh > minbin]) - rmh = hh.size - np.argmax((hh > 0.1 * hhm)[::-1]) - - # left most significant value of high gain histogram - hmm = np.nanmean(hm[hm > minbin]) - lmm = np.argmax((hm > 0.1 * hmm)) - med_pcm = np.nanmedian(pcm[g == 1]) - eh = e[rmh] - el = e[lmm] - pc += (el - eh) / (med_pcm - 1) - return pc - - # set a pc one halfstep higher than initial guesstimate - pc += 50 - pc = min_hist_distance(pc) - - # now start rolling back until we have similar signal levels - def comp_signal_level(pc, minbin=1): - hh, e = np.histogram(dd[g == 0] - pc, bins=100, range=(5000, 7000)) - hm, e = np.histogram((dd[g == 1] - pc) * pcm[g == 1], bins=100, - range=(5000, 7000)) - - # right most significant value of high gain histogram - hhm = np.nanmean(hh[hh > minbin]) - rmh = hh.size - np.argmax((hh > 0.1 * hhm)[::-1]) - - # left most significant value of high gain histogram - hmm = np.nanmean(hm[hm > minbin]) - lmm = np.argmax((hm > 0.1 * hmm)) - - reg = 5 - ret = np.abs(np.sum(hh[rmh - reg:rmh]) - np.sum(hm[lmm:lmm + reg])) - ret /= np.sum(hh[rmh - reg:rmh]) - return ret - - it = 0 - max_it = 200 - opc = pc - m_it_break = False - while comp_signal_level(pc) > 0.2: - pc += 1 - if it > max_it: - pc = opc - m_it_break = True - break - it += 1 - - it = 0 - if not m_it_break: - pcs = [pc] - slevs = [] - slev = comp_signal_level(pc) - slevs.append(slev) - while slev < 0.3: - pc += 1 - slev = comp_signal_level(pc) - slevs.append(slev) - pcs.append(pc) - it += 1 - - pc = pcs[np.argmin(slevs)] - - return d - pc - - def correct_baseline_via_hist_asic(self, d, g): - """ Correct diagonal falloff on ASICs by matching corner histograms - - :param d: single image data - :param g: gain values for `d` - - Corrections are performed for the top and bottom row of ASICs - seperately. - - In easch row, starting from the beam-hole closest ASIC, the histogram - of a square region of size 8x8 pixels is evaluted for its maximum - bin count location (only medium gain values), and compared to a - histogram produced from a neighbouring region on the next ASIC. - Double sized pixels are avoided. - - The reasoning is that the histograms should not dramatically differ - for continuously distributed photon events. Each ASIC is then shifted - such that histograms match and the corrected data is returned. - - Warning: this feature is very experimental! + :param i_proc: Index of shared memory array to process + :param asic: Asic number to process """ - rsize = 8 - - def hist_ends(dm, bins=5000, ran=(-5000, 5000), - minbin=4 / (16 / rsize)): - - hm, e = np.histogram(dm, bins=bins, range=ran) - - # left most significant value of medium gain histogram - # hmm = np.nanmean(hm[hm > minbin]) - lmm = np.argmax((hm > minbin)) - - return e[lmm], np.sum(hm) - - for i in range(1, 8): + if not self.corr_bools.get("common_mode"): + return - # upper asics - casic = np.concatenate( - (d[i * 64 - rsize:i * 64 - 1, 64:64 + rsize].flatten(), - d[i * 64 + 1:i * 64 + rsize, 64 - rsize:64].flatten())) + fraction = self.cm_dark_fraction + n_itr = self.cm_n_itr + + cell_id = self.shared_dict[i_proc]['cellId'] + train_id = self.shared_dict[i_proc]['trainId'] + n_img = self.shared_dict[i_proc]['nImg'][0] + cell_ids = cell_id[train_id == train_id[0]] + n_cells = cell_ids.size + data = self.shared_dict[i_proc]['data'][:n_img].reshape(-1, n_cells, 8, + 64, 2, 64) + + # Loop over iterations + for i in range(n_itr): + # Loop over rows of cells + first = 0 + for cell_row in range(11): + last = first + cell_ids[(cell_ids // 32) == cell_row].size + if first == last: + continue + asic_data = data[:, first:last, asic % 8, :, asic // 8, :] - casic_g = np.concatenate( - (g[i * 64 - rsize:i * 64 - 1, 64:64 + rsize].flatten(), - g[i * 64 + 1:i * 64 + rsize, 64 - rsize:64].flatten())) + # Cell common mode + cell_cm_sum, cell_cm_count = \ + calgs.sum_and_count_in_range_cell(asic_data, -25., 25.) + cell_cm = cell_cm_sum / cell_cm_count - idxm = casic_g == 1 - cme, cms = hist_ends(casic[idxm]) + cell_cm[cell_cm_count < fraction * 32 * 256] = 0 + asic_data[...] -= cell_cm[None, None, :, :] - asic = d[i * 64 + 1:i * 64 + rsize, 64:64 + rsize].flatten() - asic_g = g[i * 64 + 1:i * 64 + rsize, 64:64 + rsize].flatten() + # Asics common mode + asic_cm_sum, asic_cm_count = \ + calgs.sum_and_count_in_range_asic(asic_data, -25., 25.) + asic_cm = asic_cm_sum / asic_cm_count - idxm = asic_g == 1 - me, ms = hist_ends(asic[idxm]) + asic_cm[asic_cm_count < fraction * 64 * 64] = 0 + asic_data[...] -= asic_cm[:, :, None, None] - if cms > rsize * rsize / 10 and ms > rsize * rsize / 10: - pc = cme - me - else: - pc = 0 - - if np.abs(pc) > 500: - # somthing when wrong - pc = 0 - d[i * 64:(i + 1) * 64, 64:] += pc - - for i in range(0, 7): - # lower asics - casic = np.concatenate((d[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, - 64:64 + rsize].flatten(), - d[(i + 1) * 64 + 1:(i + 1) * 64 + rsize, - 64 - rsize:64].flatten())) - - casic_g = np.concatenate((g[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, - 64:64 + rsize].flatten(), - g[(i + 1) * 64 + 1:(i + 1) * 64 + rsize, - 64 - rsize:64].flatten())) - - idxm = casic_g == 1 - cme, cms = hist_ends(casic[idxm]) - - asic = d[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, - 64 - rsize:64].flatten() - asic_g = g[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, - 64 - rsize:64].flatten() - - idxm = asic_g == 1 - me, ms = hist_ends(asic[idxm]) - - if cms > rsize * rsize / 10 and ms > rsize * rsize / 10: - pc = cme - me - else: - pc = 0 + first = last - if np.abs(pc) > 500: - # somthing went wrong - pc = 0 - d[i * 64:(i + 1) * 64, :64] += pc + def mask_zero_std(self, i_proc, cells): + """ + Add bad pixel bit: DATA_STD_IS_ZERO to the mask of bad pixels - return d + Pixel is bad if standard deviation for a given pixel and + given memory cell is zero - def match_asic_borders(self, d, chunk=8): - """ Match border pixels of the two ASIC rows to the same median value + :param i_proc: Index of shared memory array to process + :param cells: List of cells to be considered + """ + if not self.corr_bools.get("mask_zero_std"): + return - :param d: single image data - :param chunk: number of pixels on each ASIC boundary to generate - mean values for + module_idx = self.shared_dict[i_proc]['moduleIdx'][0] + n_img = self.shared_dict[i_proc]['nImg'][0] + data = self.shared_dict[i_proc]['data'][:n_img] + cellid = self.shared_dict[i_proc]['cellId'][:n_img] + mask = self.mask[module_idx] # shape of n_cells, x, y - Each ASIC has `n=64/chunk` mean values calculated along its two border - pixel rows. The deviation of chunk pairs is then evaluated and the - upper/lower ASICs are shifted by the mean deviation for the - right/left hemisphere of the detector. + for c in cells: + std = np.nanstd(data[cellid == c, ...], axis=0) + mask[:, c, std == 0] |= BadPixels.DATA_STD_IS_ZERO.value - The corrected data is returned. - """ - for i in range(8): - - these_asics = d[:, i * 64:(i + 1) * 64, :] - diffs = np.zeros((d.shape[0], 8)) - for k in range(64 // chunk): - ldat = these_asics[:, k * chunk:(k + 1) * chunk, 62:64] - lowerasic = np.nanmedian(ldat, axis=(1, 2)) - udat = these_asics[:, k * chunk:(k + 1) * chunk, 64:66] - upperasic = np.nanmedian(udat, axis=(1, 2)) - low_up = lowerasic - upperasic - up_low = upperasic - lowerasic - diff = low_up if self.channel < 8 else up_low - diffs[:, k] = diff - - mn = np.nanmean(diffs, axis=1)[:, None, None] / 2 - if self.channel < 8: - d[:, i * 64:(i + 1) * 64, 64:] += mn - d[:, i * 64:(i + 1) * 64, :64] -= mn - else: - d[:, i * 64:(i + 1) * 64, :64] += mn - d[:, i * 64:(i + 1) * 64, 64:] -= mn - return d - - def melt_snowy_pixels(self, raw, im, gain, rgain, resolution=None): - """ Identify (and optionally interpolate) 'snowy' pixels - - :param raw: raw data - :param im: offset and relative gain corrected data: - :param gain: gain values for `raw` - :param rgain: raw gain data, scaled by the threshold for - high-to-medium gain switching - :param resolution: resolution strategy, should be of enum-type - `SnowResolution` - - Snowy pixels are pixels which are already identified as pixels in - the medium gain stage by their gain values, but which have - transitional image values between the largest high gain value and - the smalles medium gain value. As these values may be encountered again - in the context of medium gain, they are ambiguous and cannot - readily be identified by simple thresholding alone. - - It is attempted to identify these snowy pixels by using a Gaussian - mixture clustering algorithm acting on multispectral input. - Positions in the cluster are identified as follows: - - x: abs(p_r-p_bg)*p_rg**2 - y: p_r - - where p_r is a given pixel raw value, p_bg is the mean value of the - 8 direct neighbor pixels, and p_rg is the raw gain value of the pixel. - - Note that these cluster params are purely empirically derived, - and do not have any connection to ASIC design etc. - - Only pixels in medium gain are evaluated, and evaluation is - image-wise, but subdivided further into ASICs. - - The so-created graph [[x_0, x_1, ..., x_n], [y_0, y_1, ..., y_n]] is - scaled using a `sklearn.StandardScaler` and input into a two-component - `GaussianMixture` clustering algorithm. This results in a set of - labels, identifying pixels as likely snowy, or not. However, - for a given image we do not know which label is which from the - output. Hence, labels are differentiated under the assumption that - snowy pixel occur to be out-of-context, i.e. their surrounding pixels - are more likely at lower signal values, than if the pixel is in a region - were gain switching led to a large value. - - Depending on the resolution strategy so-identified pixels are either - set to `np.nan` or to the interpolated value of the direct (high-gain) - neighbors. - - The corrected data is returned alongside an updated gain mask and bad - pixel - mask. + def correct_agipd(self, i_proc, first, last): """ - snow_mask = np.zeros(im.shape, np.uint32) - for k in range(im.shape[0]): - for i in range(8): - for j in range(2): - - fidx = gain[k, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64] == 1 - fidx_h = gain[k, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64] == 0 - - # need at least two pixels in medium gain to be able to - # cluster in two groups - if np.count_nonzero(fidx) < 2: - continue - - # data for a given asic - asic = raw[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - asic_g = gain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - asic_r = rgain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - - asic_h = copy.copy(asic) - asic_h[~fidx_h] = np.nan - - # calculate the background of by averaging the 8 direct - # neighbours of each pixel - rl = np.roll(asic, 1, axis=0) - rr = np.roll(asic, -1, axis=0) - ru = np.roll(asic, 1, axis=1) - rd = np.roll(asic, -1, axis=1) - rlu = np.roll(rl, 1, axis=1) - rld = np.roll(rl, -1, axis=1) - rru = np.roll(rr, 1, axis=1) - rrd = np.roll(rr, -1, axis=1) - bg = (rl + rr + ru + rd + rlu + rld + rru + rrd) / 8 - - # calculate the background of by averaging the 8 direct - # neighbours of each pixel - # here only for high gain pixels - rl = np.roll(asic_h, 1, axis=0) - rr = np.roll(asic_h, -1, axis=0) - ru = np.roll(asic_h, 1, axis=1) - rd = np.roll(asic_h, -1, axis=1) - rlu = np.roll(asic_h, 1, axis=1) - rld = np.roll(asic_h, -1, axis=1) - rru = np.roll(asic_h, 1, axis=1) - rrd = np.roll(asic_h, -1, axis=1) - bg_h = np.nanmean( - np.array([rl, rr, ru, rd, rlu, rld, rru, rrd]), axis=0) - - # construct a graph - graph = np.array( - [np.abs(asic[fidx] - bg[fidx]) * asic_r[fidx] ** 2, - asic[fidx]]).T - # scale it - graph = StandardScaler().fit_transform(graph) - # perform clustering - spc = GaussianMixture(n_components=2, random_state=0) - spc.fit(graph) - # and get labels - labels = spc.predict(graph) - - asic = im[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] - asic_msk = snow_mask[k, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64] - mim = asic[fidx] - asicb = bg_h - mimb = asicb[fidx] - mimg = asic_g[fidx] - mmsk = asic_msk[fidx] - - # identify which labels are which - mn1 = np.nanmean(bg[fidx][labels == 0]) - mn2 = np.nanmean(bg[fidx][labels == 1]) - implabel = 1 - if mn1 > mn2: - implabel = 0 - - # if we've misidentified, then we will have to many - # snowy pixels - # happens if the ASIC is generally on a high signal level - if np.count_nonzero([labels == implabel]) > 64 * 64 / 3: - continue - - # or a large portion of misidenfied label covers the ASIC - if np.count_nonzero([labels != implabel]) > 0.8 * 64 * 64: - continue - - # set to NaN if requested - if resolution is SnowResolution.NONE: - mim[labels == implabel] = np.nan - # or use the interpolated value - elif resolution is SnowResolution.INTERPOLATE: - mim[labels == implabel] = mimb[labels == implabel] - mimg[labels == implabel] = 0 - # identify these pixels in a bad pixel mask - mmsk[ - labels == implabel] = BadPixels.TRANSITION_REGION.value - - # now set back to data - asic[fidx] = mim - asic_g[fidx] = mimg - asic_msk[fidx] = mmsk - im[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] = asic - gain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] = asic_g - snow_mask[k, i * 64:(i + 1) * 64, - j * 64:(j + 1) * 64] = asic_msk - return im, gain, snow_mask - - def make_noisy_adc_mask(self, bmask): - """ Mask entire ADC if they are noise above a relative threshold - """ - msk = np.count_nonzero( - ((bmask & BadPixels.NOISE_OUT_OF_THRESHOLD.value != 0) - | (bmask & BadPixels.OFFSET_NOISE_EVAL_ERROR.value != 0)).astype( - np.int), axis=0) - - fmask = np.zeros_like(msk).astype(np.uint32) - adc_i = bmask.shape[1] // 8 - adc_j = bmask.shape[2] // 8 - for i in range(adc_i): - for j in range(adc_j): - adc_cnt = np.count_nonzero( - msk[i * adc_i:(i + 1) * adc_i, j * adc_j:(j + 1) * adc_j] > - bmask.shape[0] // 32) - if adc_cnt / (adc_i * adc_j) >= self.mask_noisy_adc: - fmask[i * adc_i:(i + 1) * adc_i, - j * adc_j:(j + 1) * adc_j] = BadPixels.NOISY_ADC.value - return fmask - - def correct_agipd(self, irange): - """ Correct Raw AGIPD data for offset and relative gain effects - - :param irange: list of image indices to work on, should be contiguous, - or karabo_data structure to work on - - Will raise an RuntimeError if `initialize()` has not been called first. + Perform image-wise correction of data in shared memory + + :param first: Index of the first image to be corrected + :param last: Index of the last image to be corrected + :param i_proc: Index of shared memory array to process """ - if not self.initialized: - raise RuntimeError("Must call initialize() first!") - if not self.karabo_data_mode: - agipd_base = self.agipd_base - cidx = self.cidx - im = np.array(self.infile[agipd_base + "image/data"][irange, ...]) - trainId = np.squeeze( - self.infile[agipd_base + "/image/trainId"][irange, ...]) - pulseId = np.squeeze( - self.infile[agipd_base + "image/pulseId"][irange, ...]) - status = np.squeeze( - self.infile[agipd_base + "/image/status"][irange, ...]) - cellid = np.squeeze( - np.array( - self.infile[agipd_base + "/image/cellId"][irange, ...])) - length = np.squeeze( - np.array( - self.infile[agipd_base + "/image/length"][irange, ...])) - - # this far end of the image index range we are working on - nidx = int(cidx + irange.size) - else: - cidx = 1 # do not produce any histograms - im = irange['image.data'] - trainId = np.squeeze(irange['image.trainId']) - status = np.squeeze(irange['image.status']) - pulseId = np.squeeze(irange['image.pulseId']) - cellid = np.squeeze(irange['image.cellId']) - length = np.squeeze(irange['image.length']) - - # split off image and gain into two separate arrays - im, ga = self.gsfun(im) - - # we will work on float data from now on - im = im.astype(np.float32) - - # some correction require us to maintain a copy of the raw data - raw = copy.copy(im) - - # handle very old interlaced data - if self.il_mode: - trainId = trainId[0::2] - pulseId = pulseId[0::2] - status = status[0::2] - cellid = cellid[0::2] - length = length[0::2] + + module_idx = self.shared_dict[i_proc]['moduleIdx'][0] + data = self.shared_dict[i_proc]['data'][first:last] + bl_shift = self.shared_dict[i_proc]['blShift'][first:last] + rawgain = self.shared_dict[i_proc]['rawgain'][first:last] + gain = self.shared_dict[i_proc]['gain'][first:last] + cellid = self.shared_dict[i_proc]['cellId'][first:last] + mask = self.shared_dict[i_proc]['mask'][first:last] # first evaluate the gain into 0, 1, 2 --> high, medium, low - gain = np.zeros(ga.shape, np.uint8) - gain[...] = 0 - t0 = self.thresholds[..., 0] - t1 = self.thresholds[..., 1] + t0 = self.thresholds[module_idx][0] + t1 = self.thresholds[module_idx][1] + + rgain = None + raw_data = None + if self.corr_bools.get('melt_snow'): + rgain = rawgain / t0[cellid, ...] + raw_data = np.copy(data) + + # Often most pixels are in high-gain, so it's more efficient to + # set the whole output block to zero than select the right pixels. + gain[:] = 0 # exceeding first threshold means data is medium or low gain - gain[ga > t0[cellid, ...]] = 1 + gain[rawgain > t0[cellid, ...]] = 1 # exceeding also second threshold means data is low gain - gain[(ga > t1[cellid, ...]) & ( - t1[cellid, ...] > 1.05 * t0[cellid, ...])] = 2 + gain[rawgain > t1[cellid, ...]] = 2 + + offsetb = self.offset[module_idx][:, cellid] # force into high or medium gain if requested - if self.force_mg_if_below is not None and self.force_mg_if_below > 0: - gain[(gain == 2) & ((im - self.offset[ - cellid, ..., 1]) < self.force_mg_if_below)] = 1 - if self.force_hg_if_below is not None and self.force_hg_if_below > 0: - gain[(gain > 0) & ((im - self.offset[ - cellid, ..., 0]) < self.force_hg_if_below)] = 0 - - for i in range(3): - self.gain_stats[i] = np.sum(gain == i) - - # check if any data has zero standard deviation, and mark this in - # the bad pixel maks - # this can be done on otherwise not corrected data. - if self.sig_zero_mask is None: - self.sig_zero_mask = np.zeros( - (self.max_cells, im.shape[1], im.shape[2]), np.uint32) - for c in range(self.max_cells): - std = np.nanstd(im[cellid == c, ...], axis=0) - self.sig_zero_mask[ - c, std == 0] = BadPixels.DATA_STD_IS_ZERO.value - # for feedback we produced histograms for the first chunk - if cidx == 0: - H, xe, ye = np.histogram2d(im.flatten(), ga.flatten(), - bins=self.bins_gain_vs_signal, - range=[[4000, 8192], [4000, 8192]]) - self.hists_gain_vs_signal += H - self.signal_edges = (xe, ye) - - H, xe, ye = np.histogram2d(im.flatten(), gain.flatten(), - bins=self.bins_dig_gain_vs_signal, - range=[[4000, 8192], [0, 4]]) - self.hists_dig_gain_vs_signal += H - self.dig_signal_edges = (xe, ye) - # now get the correct constants depending on cell id - offsetb = self.offset[cellid, ...] - tmask = self.mask[cellid, ...] + if self.corr_bools.get('force_mg_if_below'): + gain[(gain == 2) & ( + (data - offsetb[1]) < self.mg_hard_threshold)] = 1 + + if self.corr_bools.get('force_hg_if_below'): + gain[(gain > 0) & ( + (data - offsetb[0]) < self.hg_hard_threshold)] = 0 + # choose constants according to gain setting - off = np.choose(gain, - (offsetb[..., 0], offsetb[..., 1], offsetb[..., 2])) + off = calgs.gain_choose(gain, offsetb) + del offsetb + + tmask = self.mask[module_idx][:, cellid] + msk = calgs.gain_choose_int(gain, tmask) + del tmask - msk = np.choose(gain, (tmask[..., 0], tmask[..., 1], tmask[..., 2])) # same for relative gain and then bad pixel mask if hasattr(self, "rel_gain"): # get the correct rel_gain depending on cell-id - rc = self.rel_gain[cellid, ...] - rel_cor = np.choose(gain, (rc[..., 0], rc[..., 1], rc[..., 2])) - # scale raw gain for use in the identifying snowy pixels - rgain = None - if self.melt_snow is not False: - rgain = ga / t0[cellid, ...] + rc = self.rel_gain[module_idx][:, cellid] + rel_cor = calgs.gain_choose(gain, rc) + del rc + # subtract offset - im -= off + data -= off + del off + # before doing relative gain correction we need to evaluate any # baseline shifts # as they are effectively and additional offset in the data if (self.corr_bools.get('blc_noise') or - self.corr_bools.get('blc_hmatch') or - self.corr_bools.get('blc_stripes')): + self.corr_bools.get('blc_hmatch') or + self.corr_bools.get('blc_stripes')): # do this image wise, as the shift is per image - for i in range(im.shape[0]): + for i in range(data.shape[0]): # first correction requested may be to evaluate shift via # noise peak if self.corr_bools.get('blc_noise'): - mn_noise = np.nanmean(self.noise[cellid[i], ..., 0]) - dd = self.baseline_correct_via_noise(im[i, ...], - mn_noise, - gain[i, ...]) + mn_noise = np.nanmean(self.noise[module_idx][0, cellid[i]]) #noqa + dd, sh = baseline_correct_via_noise(data[i], + mn_noise, + gain[i], + self.baseline_corr_noise_threshold) #noqa # if not we continue with initial data else: - dd = im[i, ...] + dd = data[i] # if we have enough pixels in medium or low gain and # correction via hist matching is requested to this now - gcrit = np.count_nonzero(gain[i, ...] > 0) > 1000 - if gcrit and self.corr_bools.get('blc_hmatch') and hasattr(self, "rel_gain"): - dd2 = self.correct_baseline_via_hist(im[i, ...], - rel_cor[i, ...], - gain[i, ...]) - im[i, ...] = np.maximum(dd, dd2) - + gcrit = np.count_nonzero(gain[i] > 0) > 1000 + if (gcrit and self.corr_bools.get('blc_hmatch') and + hasattr(self, "rel_gain")): + dd2, sh2 = correct_baseline_via_hist(data[i], + rel_cor[i], + gain[i]) + data[i] = np.maximum(dd, dd2) + sh = np.minimum(sh, sh2) # finally correct diagonal effects if requested if self.corr_bools.get('corr_asic_diag'): - ii = im[i, ...] + ii = data[i, ...] gg = gain[i, ...] - adim = self.correct_baseline_via_hist_asic(ii, gg) - im[i, ...] = adim + adim = correct_baseline_via_hist_asic(ii, gg) + data[i, ...] = adim # if there is not enough medium or low gain data to do an # evaluation, do nothing else: - im[i, ...] = dd + data[i, ...] = dd + bl_shift[i] = sh if self.corr_bools.get('blc_stripes'): - fmh = self.frac_high_med[cellid[i]] - dd = self.baseline_correct_via_stripe(im[i, ...], - gain[i, ...], - msk[i, ...], - fmh) - im[i, ...] = dd - - # now we can correct for relative gain if requested + fmh = self.frac_high_med[module_idx][cellid[i]] + dd, sh = baseline_correct_via_stripe(data[i, ...], + gain[i, ...], + msk[i, ...], + fmh) + data[i, ...] = dd + bl_shift[i] = sh + + # Correct for relative gain if self.corr_bools.get("rel_gain") and hasattr(self, "rel_gain"): - im *= rel_cor - - if self.corr_bools.get("adjust_mg_baseline"): - mgbc = self.md_additional_offset[cellid, ...] - im[gain == 1] += mgbc[gain == 1] + data *= rel_cor + del rel_cor # Set negative values for medium gain to 0 if self.corr_bools.get('blc_set_min'): - im[(im < 0) & (gain == 1)] = 0 + data[(data < 0) & (gain == 1)] = 0 + + # Adjust medium gain baseline to match highest high gain value + if self.corr_bools.get("adjust_mg_baseline"): + mgbc = self.md_additional_offset[module_idx][cellid, ...] + data[gain == 1] += mgbc[gain == 1] + del mgbc # Do xray correction if requested if self.corr_bools.get("xray_corr"): - im *= self.xray_cor - - # try to identify snowy pixels at this point - if self.melt_snow is not False: - ms = self.melt_snow - im, gain, snowmask = self.melt_snowy_pixels(raw, - im, - gain, - rgain, - resolution=ms) - - # finally, with all corrections performed we can match ASIC borders - # if needed + data *= self.xray_cor[module_idx] + + if self.corr_bools.get('melt_snow'): + melt_snowy_pixels(raw_data, data, gain, rgain, + self.snow_resolution) + del raw_data + del rgain + + # Inner ASIC borders are matched to the same signal level if self.corr_bools.get("match_asics"): - im = self.match_asic_borders(im) + data = match_asic_borders(data, 8, module_idx) - # create a bad pixel mask for the data - # we add any non-finite values to the mask - if not self.corr_bools.get("dont_zero_nans"): - bidx = ~np.isfinite(im) - im[bidx] = 0 + # Add any non-finite values to the mask, zero them + if self.corr_bools.get("zero_nans"): + bidx = ~np.isfinite(data) + data[bidx] = 0 msk[bidx] |= BadPixels.VALUE_IS_NAN.value + del bidx - # and such with have unrealistically high and low pixel values - if self.corr_bools.get("dont_zero_orange"): - bidx = (im < -1e7) | (im > 1e7) - im[bidx] = 0 + # Add pixels with unrealistically high and low values to the mask. + # Zero them. + if self.corr_bools.get("zero_orange"): + bidx = (data < -1e7) | (data > 1e7) + data[bidx] = 0 msk[bidx] |= BadPixels.VALUE_OUT_OF_RANGE.value + del bidx - # include pixels with zero standard deviation in the dataset into - # the mask - msk |= self.sig_zero_mask[cellid, ...] - if self.melt_snow is not False: - msk |= snowmask - # for the first chunk output some statistics - if cidx == 0: - copim = copy.copy(im) - copim[copim < self.median_noise] = np.nan - - # avoid 0 hist_pulses, otherwise histogram plot will fail - # hist_pulse must be of a type(int) - if self.max_pulse == 0: - self.hist_pulses = int(self.max_pulse + 1) - else: - self.hist_pulses = int(self.max_pulse) - - bins = (self.bins_signal_low_range, self.hist_pulses) - rnge = [[-50, 1000], [self.min_pulse, - self.max_pulse + 1]] - H, xe, ye = np.histogram2d(np.nanmean(copim, axis=(1, 2)), - pulseId, - bins=bins, - range=rnge) - - self.hists_signal_low += H - self.low_edges = (xe, ye) - bins = (self.bins_signal_high_range, self.hist_pulses) - rnge = [[0, 200000], [self.min_pulse, - self.max_pulse + 1]] - H, xe, ye = np.histogram2d(np.nanmean(copim, axis=(1, 2)), - pulseId, - bins=bins, - range=rnge) - self.hists_signal_high += H - self.high_edges = (xe, ye) - - # apply noisy ADC mask if requested - if self.mask_noisy_adc is not None and self.mask_noisy_adc != 0: - if self.adc_mask is None: - self.adc_mask = self.make_noisy_adc_mask(msk) - msk |= self.adc_mask - - # now write out the data - sd = 1 if not self.il_mode else 2 - - if not self.karabo_data_mode: - self.ddset[cidx // sd:nidx // sd, ...] = im - self.gdset[cidx // sd:nidx // sd, ...] = gain - self.mdset[cidx // sd:nidx // sd, ...] = msk - self.outfile[agipd_base + "image/cellId"][cidx:nidx] = cellid - self.outfile[agipd_base + "image/trainId"][cidx:nidx] = trainId - self.outfile[agipd_base + "image/pulseId"][cidx:nidx] = pulseId - self.outfile[agipd_base + "image/status"][cidx:nidx] = status - self.outfile[agipd_base + "image/length"][cidx:nidx] = length - self.cidx = nidx - else: - irange['image.data'] = im - irange['image.gain'] = gain - irange['image.mask'] = msk - irange['image.cellId'] = cellid - irange['image.trainId'] = trainId - irange['image.pulseId'] = pulseId - irange['image.status'] = status - irange['image.length'] = length - return irange - - def get_valid_image_idx(self): + # Mask entire ADC if they are noise above a threshold + if self.corr_bools.get("mask_noisy_adc"): + make_noisy_adc_mask(msk, self.noisy_adc_threshold) + + mask[...] = msk[...] + + def get_valid_image_idx(self, idx_base, infile, index_v=2): """ Return the indices of valid data """ - idx_base = self.idx_base - if self.index_v == 2: - count = np.squeeze(self.infile[idx_base + "image/count"]) - first = np.squeeze(self.infile[idx_base + "image/first"]) + if index_v == 2: + count = np.squeeze(infile[idx_base + "image/count"]) + first = np.squeeze(infile[idx_base + "image/first"]) if np.count_nonzero(count != 0) == 0: raise IOError("File has no valid counts") valid = count != 0 - idxtrains = np.squeeze(self.infile["/INDEX/trainId"]) + idxtrains = np.squeeze(infile["/INDEX/trainId"]) medianTrain = np.nanmedian(idxtrains) lowok = (idxtrains > medianTrain - 1e4) highok = (idxtrains < medianTrain + 1e4) valid &= lowok & highok - # Do not filter out double trains - #uq, fidxv, cntsv = np.unique(idxtrains, - # return_index=True, - # return_counts=True) - #valid &= cntsv == 1 # filter out double trains - - self.valid = valid last_index = int(first[valid][-1] + count[valid][-1]) first_index = int(first[valid][0]) @@ -1209,53 +520,45 @@ class AgipdCorrections: validf[i]+validc[i]) for i in range(validf.size)], axis=0) - self.valid_indices = np.squeeze(valid_indices).astype(np.int32) + valid_indices = np.squeeze(valid_indices).astype(np.int32) - elif self.index_v == 1: - status = np.squeeze(self.infile[idx_base + "image/status"]) + elif index_v == 1: + status = np.squeeze(infile[idx_base + "image/status"]) if np.count_nonzero(status != 0) == 0: raise IOError("File {} has no valid counts".format( - self.infile)) - last = np.squeeze(self.infile[idx_base + "image/last"]) - first = np.squeeze(self.infile[idx_base + "image/first"]) + infile)) + last = np.squeeze(infile[idx_base + "image/last"]) + first = np.squeeze(infile[idx_base + "image/first"]) valid = status != 0 last_index = int(last[status != 0][-1]) + 1 first_index = int(first[status != 0][0]) - idxtrains = np.squeeze(self.infile["/INDEX/trainId"]) + idxtrains = np.squeeze(infile["/INDEX/trainId"]) medianTrain = np.nanmedian(idxtrains) lowok = (idxtrains > medianTrain - 1e4) highok = (idxtrains < medianTrain + 1e4) valid &= lowok & highok - self.valid = valid + valid_indices = None else: raise AttributeError( - "Not a known raw format version: {}".format(self.index_v)) + "Not a known raw format version: {}".format(index_v)) - self.valid = valid - self.first_index = first_index - self.last_index = last_index - self.idxtrains = idxtrains + return valid, first_index, last_index, idxtrains, valid_indices - def gen_valid_range(self): + def gen_valid_range(self, first_index, last_index, max_cells, agipd_base, + infile, valid_indices=None): """ Generate an index range to pass to `correctAGIPD`. """ - first_index = self.first_index - last_index = self.last_index - max_cells = self.max_cells - agipd_base = self.agipd_base - allcells = np.squeeze(self.infile[agipd_base + "image/cellId"]) - allpulses = np.squeeze(self.infile[agipd_base + "image/pulseId"]) - if self.valid_indices is not None: - allcells = allcells[self.valid_indices] - allpulses = allpulses[self.valid_indices] + + allcells = np.squeeze(infile[agipd_base + "image/cellId"]) + allpulses = np.squeeze(infile[agipd_base + "image/pulseId"]) + if valid_indices is not None: + allcells = allcells[valid_indices] + allpulses = allpulses[valid_indices] else: allcells = allcells[first_index:last_index] allpulses = allpulses[first_index:last_index] - single_image = self.infile[agipd_base + "image/data"][first_index, ...] - single_image = np.array(single_image) - # Calculate the pulse step from the chosen max_pulse range if len(self.rng_pulses) == 3: pulse_step = self.rng_pulses[2] @@ -1294,36 +597,19 @@ class AgipdCorrections: if np.count_nonzero(can_calibrate) == 0: return - allcells = allcells[can_calibrate] - if self.valid_indices is None: + if valid_indices is None: firange = np.arange(first_index, last_index) else: - firange = self.valid_indices + firange = valid_indices firange = firange[can_calibrate] - self.oshape = (firange.size if not self.il_mode else firange.size // 2, - single_image.shape[1], - single_image.shape[2]) - self.firange = firange - self.single_image = single_image + return firange - def copy_and_sanitize_non_cal_data(self): + def copy_and_sanitize_non_cal_data(self, infile, outfile, agipd_base, + idx_base, trains): """ Copy and sanitize data in `infile` that is not touched by `correctAGIPD` """ - agipd_base = self.agipd_base - idx_base = self.idx_base - first_index = self.first_index - last_index = self.last_index - valid = self.valid - idxtrains = self.idxtrains - firange = self.firange - if self.il_mode: - firange = firange[0::2] - alltrains = self.infile[agipd_base + "image/trainId"] - - alltrains = np.squeeze( - alltrains[first_index:last_index, ...]) # these are touched in the correct function, do not copy them here dont_copy = ["data", "cellId", "trainId", "pulseId", "status", @@ -1346,13 +632,13 @@ class AgipdCorrections: if k not in dont_copy: if isinstance(item, h5py.Group): - self.outfile.create_group(k) + outfile.create_group(k) elif isinstance(item, h5py.Dataset): group = str(k).split("/") group = "/".join(group[:-1]) - self.infile.copy(k, self.outfile[group]) + infile.copy(k, outfile[group]) - self.infile.visititems(visitor) + infile.visititems(visitor) # sanitize indices for do in ["image", ]: @@ -1362,13 +648,13 @@ class AgipdCorrections: # Extract parameters through identifying # unique trains, index and numbers. - uq, fidxv, cntsv = np.unique(alltrains[firange - firange[0]], - return_index=True, + uq, fidxv, cntsv = np.unique(trains, return_index=True, return_counts=True) # Validate calculated CORR INDEX contents by checking difference between # trainId stored in RAW data and trains from - train_diff = np.isin(np.array(self.infile["/INDEX/trainId"]), uq, invert=True) + train_diff = np.isin(np.array(infile["/INDEX/trainId"]), uq, + invert=True) # Insert zeros for missing trains. # fidxv and cntsv should have same length as @@ -1398,69 +684,17 @@ class AgipdCorrections: fidxv = np.append(fidxv, fidxv[i-1]) # save INDEX contents (first, count) in CORR files - self.outfile.create_dataset(idx_base + "{}/first".format(do), + outfile.create_dataset(idx_base + "{}/first".format(do), fidxv.shape, dtype=fidxv.dtype, data=fidxv, fletcher32=True) - self.outfile.create_dataset(idx_base + "{}/count".format(do), + outfile.create_dataset(idx_base + "{}/count".format(do), cntsv.shape, dtype=cntsv.dtype, data=cntsv, fletcher32=True) - def create_output_datasets(self): - """ Initialize output data sets - """ - agipd_base = self.agipd_base - chunksize = self.chunk_size_idim - firange = self.firange - oshape = self.oshape - chunks = (chunksize, oshape[1], oshape[2]) - self.ddset = self.outfile.create_dataset(agipd_base + "image/data", - oshape, chunks=chunks, - dtype=np.float32, - fletcher32=True) - self.gdset = self.outfile.create_dataset(agipd_base + "image/gain", - oshape, chunks=chunks, - dtype=np.uint8, - compression="gzip", - compression_opts=1, - shuffle=True, - fletcher32=True) - self.mdset = self.outfile.create_dataset(agipd_base + "image/mask", - oshape, chunks=chunks, - dtype=np.uint32, - compression="gzip", - compression_opts=1, - shuffle=True, - fletcher32=True) - - fsz = list(firange.shape) - if self.il_mode: - fsz[0] = fsz[0] // 2 - - self.outfile.create_dataset(agipd_base + "image/cellId", fsz, - dtype=np.uint16, fletcher32=True) - self.outfile.create_dataset(agipd_base + "image/trainId", fsz, - dtype=np.uint64, fletcher32=True) - self.outfile.create_dataset(agipd_base + "image/pulseId", fsz, - dtype=np.uint64, fletcher32=True) - self.outfile.create_dataset(agipd_base + "image/status", fsz, - dtype=np.uint16, fletcher32=True) - self.outfile.create_dataset(agipd_base + "image/length", fsz, - dtype=np.uint32, fletcher32=True) - self.outfile.flush() - - def get_histograms(self): - """ Return preview histograms computed from the first chunk - """ - return ((self.hists_signal_low, self.hists_signal_high, - self.hists_gain_vs_signal, self.hists_dig_gain_vs_signal, - self.hist_pulses), - (self.low_edges, self.high_edges, self.signal_edges, - self.dig_signal_edges)) - def retrieve_constant_and_time(self, const_dict, device, cal_db_interface, creation_time): """ @@ -1493,10 +727,11 @@ class AgipdCorrections: condition, getattr(np, cval[0])(cval[1]), cal_db_interface, - creation_time) + creation_time, + print_once=0) return cons_data, when - def init_constants(self, cons_data, only_dark): + def init_constants(self, cons_data, module_idx): """ For CI derived gain, a mean multiplication factor of 4.48 compared to medium gain is used, as no reliable CI data for all memory cells @@ -1530,7 +765,7 @@ class AgipdCorrections: where m_ff is the flat field derived (high gain) slope of all memory cells of a given pixel. The pixel values are then scaled to - the complete module. Note that the first 32 memory cells are known + the complete module_idx. Note that the first 32 memory cells are known to exhibit differing behaviour and are skipped if possible. With this data the relative gain for the three gain stages evaluates @@ -1541,18 +776,20 @@ class AgipdCorrections: low gain = medium gain / 4.48 :param cons_data: A dictionary for each retrieved constant value. - :param only_dark: A flag for retrieving only dark constants. + :param module_idx: A module_idx index :return: """ - bpixels = cons_data["BadPixelsDark"].astype(np.uint32) + self.offset[module_idx][...] = cons_data["Offset"].transpose()[...] + self.noise[module_idx][...] = cons_data["Noise"].transpose()[...] + self.thresholds[module_idx][...] = cons_data["ThresholdsDark"].transpose()[:3,...] # noqa - if self.corr_bools.get('only_offset') or only_dark: - self.initialize(offset=cons_data["Offset"], - rel_gain=None, mask=bpixels, - noise=cons_data["Noise"], - thresholds=cons_data["ThresholdsDark"]) - return + if self.corr_bools.get("low_medium_gap"): + t0 = self.thresholds[module_idx][0] + t1 = self.thresholds[module_idx][1] + t1[t1 <= 1.05 * t0] = np.iinfo(np.uint16).max + + bpixels = cons_data["BadPixelsDark"].astype(np.uint32) if self.corr_bools.get("xray_corr"): bpixels |= cons_data["BadPixelsFF"].astype(np.uint32)[..., :bpixels.shape[2], None] # noqa @@ -1565,26 +802,26 @@ class AgipdCorrections: # them # when calculating the mean X-ray derived gain slope for each pixel if slopesFF.shape[2] > 32: - xray_cor = np.nanmedian(slopesFF[..., 32:min(96, self.max_cells)], - axis=2) + xray_cor = np.nanmedian( + slopesFF[..., 32:min(96, self.max_cells)], axis=2) elif slopesFF.shape[2] > 2: - xray_cor = np.nanmedian(slopesFF[..., :min(96, self.max_cells)], - axis=2) + xray_cor = np.nanmedian( + slopesFF[..., :min(96, self.max_cells)], axis=2) else: xray_cor = np.squeeze(slopesFF[..., 0]) # relative X-ray correction is normalized by the median of all pixels xray_cor /= np.nanmedian(xray_cor) - else: - xray_cor = None + self.xray_cor[module_idx][...] = xray_cor.transpose()[...] + # add additional bad pixel information if any(self.pc_bools): - bppc = np.moveaxis(cons_data["BadPixelsPC"].astype(np.uint32), 0, 2) + bppc = np.moveaxis(cons_data["BadPixelsPC"].astype(np.uint32), 0, 2) # noqa bpixels |= bppc[..., :bpixels.shape[2], None] slopesPC = cons_data["SlopesPC"].astype(np.float32) - # this will handle some historical data in a slighly different format + # this will handle some historical data in a different format # constant dimension injected first if slopesPC.shape[0] == 10 or slopesPC.shape[0] == 11: slopesPC = np.moveaxis(slopesPC, 0, 3) @@ -1611,32 +848,26 @@ class AgipdCorrections: rel_gain[..., 0] = pc_high_m / pc_high_ave rel_gain[..., 1] = pc_med_m / pc_med_ave * frac_high_med rel_gain[..., 2] = rel_gain[..., 1] * 4.48 - else: - md_additional_offset = None - rel_gain = None - frac_high_med = None - - self.initialize(cons_data["Offset"], rel_gain, xray_cor, bpixels, - cons_data["Noise"], - cons_data["ThresholdsDark"], - frac_high_med=frac_high_med, - md_additional_offset=md_additional_offset) + + self.md_additional_offset[module_idx][...] = md_additional_offset.transpose()[...] # noqa + self.rel_gain[module_idx][...] = rel_gain[...].transpose() + self.frac_high_med[module_idx][...] = frac_high_med + + self.mask[module_idx][...] = bpixels.transpose()[...] + return - def initialize_from_yaml(self, const_yaml, qm, only_dark=False): + def initialize_from_yaml(self, const_yaml, module_idx, device): """ Initialize calibration constants from a yaml file :param const_yaml: (Dict) fromed from a yaml file in pre-notebook, which consists of metadata of either the constant file path or the empty constant shape, and the creation-time of the retrieved constants - :param qm: Module's virtual name (e.g. Q1M1) - :param only_dark: A flag for retrieving only dark constants. + :param module_idx: Index of module :return: """ - device = getattr(getattr(Detectors, self.cal_det_instance), qm) - # string of the device name. dname = device.device_name cons_data = dict() @@ -1650,13 +881,15 @@ class AgipdCorrections: else: # Create empty constant using the list elements cons_data[cname] = \ - getattr(np, mdata["file-path"][0])(mdata["file-path"][1]) # noqa + getattr(np, mdata["file-path"][0])(mdata["file-path"][1]) - self.init_constants(cons_data, only_dark) + self.init_constants(cons_data, module_idx) return when - def initialize_from_db(self, dbparms, qm, only_dark=False): + def initialize_from_db(self, cal_db_interface, creation_time, memory_cells, + bias_voltage, photon_energy, gain_setting, + acquisition_rate, module_idx, device, only_dark=False): """ Initialize calibration constants from the calibration database :param dbparms: a tuple containing relevant database parameters, @@ -1667,7 +900,7 @@ class AgipdCorrections: database is set to the global value * cal_db_interface, creation_time, max_cells_db, bias_voltage, - photon_energy, max_cells_db_dark, timeout_cal_db + photon_energy, max_cells_db_dark additionally a timeout is given :param qm: quadrant and module of the constants to load in Q1M1 @@ -1713,176 +946,70 @@ class AgipdCorrections: * Constants.AGIPD.SlopesFF """ - max_cells_db_dark = None - - # dbparms has a length of 5 in - # playground/QuickCorrect-SingleModule.ipynb - if len(dbparms) == 5: - (cal_db_interface, creation_time, max_cells_db, - bias_voltage, photon_energy) = dbparms - else: - (cal_db_interface, creation_time, max_cells_db, - bias_voltage, photon_energy, max_cells_db_dark, - timeout_cal_db) = dbparms - - if max_cells_db_dark is None: - max_cells_db_dark = max_cells_db - - # Device object for retrieving constants - device = getattr(getattr(Detectors, self.cal_det_instance), qm) const_dict = \ - assemble_constant_dict(self.corr_bools, self.pc_bools, max_cells_db_dark, - bias_voltage, self.gain_setting, - self.acquisition_rate, photon_energy, + assemble_constant_dict(self.corr_bools, self.pc_bools, + memory_cells, bias_voltage, gain_setting, + acquisition_rate, photon_energy, beam_energy=None, only_dark=only_dark) - cons_data, when =\ + cons_data, when = \ self.retrieve_constant_and_time(const_dict, device, cal_db_interface, creation_time) - self.init_constants(cons_data, only_dark) + self.init_constants(cons_data, module_idx) return when - def initialize_from_file(self, filename, qm, with_dark=True): - """ Initialize calibration constants from a calibration file - - :param filename: path to a file containing the calibration - constants. It is - expected to have the following structure: - - /{qm}/BadPixelsFF/data - /{qm}/BadPixelsPC/data - /{qm}/Offset/data - /{qm}/Noise/data - /{qm}/ThresholdsDark/data - /{qm}/BadPixelsDark/data - /{qm}/SlopesFF/data - /{qm}/SlopesPC/data - - where qm is the `qm` parameter. - - :param qm: quadrant and module of the constants to load in Q1M1 - notation - :param with_dark: also load dark image derived constants from the - file. This will overwrite any constants previously loaded from - the calibration database. - - For CI derived gain, a mean multiplication factor of 4.48 compared - to medium gain is used, as no reliable CI data - exists for all memory cells of the current AGIPD instances. - - Relative gain is derived both from pulse capacitor as well as flat - field data: - - * from the pulse capacitor data we get the relative slopes of a given - pixel's memory cells with respect to all memory cells of that pixel: - - rpch = m_h / median(m_h) - - where m_h is the high gain slope m of each memory cell of the pixel. - - * we also derive the factor between high and medium gain in a similar - way and scale it to be relative to the pixels memory cells: - - fpc = m_m/m_h - rfpc = fpc/ median(fpc) - - where m_m is the medium gain slope of all memory cells of a given - pixel and m_h is the high gain slope as above - - * finally, we get the relative X-ray gain of all memory cells for a - given pixel from flat field data: - - ff = median(m_ff) - ff /= median(ff) - - where m_ff is the flat field derived (high gain) slope of all memory - cells of a given pixel. The pixel values are then scaled to the - complete module. Note that the first 32 memory cells are known to - exhibit differing behaviour and are skipped if possible. - - With this data the relative gain for the three gain stages evaluates - to: - - high gain = ff * rpch - medium gain = ff * rfpc - low gain = medium gain / 4.48 - + def allocate_constants(self, modules, constant_shape): """ - offsets = None - bpixels = None - noises = None - thresholds = None - with h5py.File(filename, "r") as calfile: - - bpixels = calfile["{}/{}/data".format(qm, "BadPixelsFF")][()] - try: - bpixels |= np.moveaxis( - calfile["{}/{}/data".format(qm, "BadPixelsPC")][()], 0, 2) - except: - pass - bpixels = bpixels[..., None] # without gain dimension up to here - if with_dark: - offsets = calfile["{}/{}/data".format(qm, "Offset")][()] - noises = calfile["{}/{}/data".format(qm, "Noise")][()] - thresholds = \ - calfile["{}/{}/data".format(qm, "ThresholdsDark")][()] # noqa - bpixels |= calfile["{}/{}/data".format(qm, "BadPixelsDark")][ - ()] - - slopesFF = calfile["{}/{}/data".format(qm, "SlopesFF")][()] - slopesPC = calfile["{}/{}/data".format(qm, "SlopesPC")][()] - if slopesPC.shape[0] == 10: # constant dimension injected first - slopesPC = np.moveaxis(slopesPC, 0, 3) - slopesPC = np.moveaxis(slopesPC, 0, 2) - - # fallback options if calibration files do not contain enough - # memory cells - if slopesPC.shape[-2] < self.max_cells: - shp = list(slopesPC.shape) - shp[-2] = self.max_cells - newSlopes = np.zeros(shp, slopesPC.dtype) - newSlopes[..., :slopesPC.shape[-2], :] = slopesPC - newSlopes[..., slopesPC.shape[-2]:, :] = np.nanmedian(slopesPC, - axis=2)[..., None, :] # noqa - slopesPC = newSlopes - - if bpixels.shape[-2] < self.max_cells: - shp = list(bpixels.shape) - shp[-2] = self.max_cells - newbp = np.zeros(shp, bpixels.dtype) - newbp[..., :bpixels.shape[-2], :] = bpixels - newbp[..., bpixels.shape[-2]:, - :] = BadPixels.INTERPOLATED.value - bpixels = newbp + Allocate memory for correction constants - rel_gain = np.ones((128, 512, self.max_cells, 3), np.float32) - pc_high_m = slopesPC[..., :self.max_cells, 0] - pc_med_m = slopesPC[..., :self.max_cells, 3] - fac_high_med = pc_med_m / pc_high_m - - if len(slopesFF.shape) == 4: - slopesFF = slopesFF[..., 0] - if slopesFF.shape[2] > 32: - xray_cor = np.nanmedian(slopesFF[..., 32:], axis=2) - elif slopesFF.shape[2] > 2: - xray_cor = np.nanmedian( - slopesFF[..., :min(96, self.max_cells)], axis=2) - else: - xray_cor = np.squeeze(slopesFF[..., 0]) - - xray_cor /= np.nanmedian(xray_cor) - pcrel = pc_high_m / np.nanmedian(pc_high_m, axis=2)[..., None] - - rel_gain[..., 0] = pcrel - mfac = fac_high_med / np.nanmedian(fac_high_med, axis=2)[ - ..., None] * np.nanmedian(fac_high_med) - rel_gain[..., 1] = pcrel * mfac - rel_gain[..., 2] = rel_gain[..., 1] * 0.223 + :param modules: Module indises + :param constant_shape: Shape of expected constants (gain, cells, x, y) + """ + for module_idx in modules: + self.offset[module_idx] = sharedmem.empty(constant_shape, + dtype='f4') + self.thresholds[module_idx] = sharedmem.empty(constant_shape, + dtype='f4') + self.noise[module_idx] = sharedmem.empty(constant_shape, + dtype='f4') + + self.md_additional_offset[module_idx] = sharedmem.empty( + constant_shape[1:], dtype='f4') + self.rel_gain[module_idx] = sharedmem.empty(constant_shape, + dtype='f4') + self.frac_high_med[module_idx] = sharedmem.empty(constant_shape[1], + dtype='f4') + + self.mask[module_idx] = sharedmem.empty(constant_shape, dtype='i4') + self.xray_cor[module_idx] = sharedmem.empty(constant_shape[2:], + dtype='f4') + + def allocate_images(self, shape, n_cores_files): + """ + Allocate memory for image data - self.initialize(offsets, rel_gain, xray_cor, bpixels, noises, - thresholds, frac_high_med=fac_high_med) + :param shape: Shape of expected data (nImg, x, y) + :param n_cores_files: Number of files, handled in parallel + """ + self.shared_dict = [] + for i in range(n_cores_files): + self.shared_dict.append({}) + self.shared_dict[i]['cellId'] = sharedmem.empty(shape[0], + dtype='i4') + self.shared_dict[i]['pulseId'] = sharedmem.empty(shape[0], + dtype='i4') + self.shared_dict[i]['trainId'] = sharedmem.empty(shape[0], + dtype='i4') + self.shared_dict[i]['moduleIdx'] = sharedmem.empty(1, dtype='i4') + self.shared_dict[i]['nImg'] = sharedmem.empty(1, dtype='i4') + self.shared_dict[i]['mask'] = sharedmem.empty(shape, dtype='i4') + self.shared_dict[i]['data'] = sharedmem.empty(shape, dtype='f4') + self.shared_dict[i]['rawgain'] = sharedmem.empty(shape, dtype='u2') + self.shared_dict[i]['gain'] = sharedmem.empty(shape, dtype='u1') + self.shared_dict[i]['blShift'] = sharedmem.empty(shape[0], + dtype='f4') diff --git a/cal_tools/cal_tools/agipdutils.py b/cal_tools/cal_tools/agipdutils.py index 7322580a04757fe23714dbc7ca728dc762c4011e..daad14eb3a9218c24ecd26febb82483ef591a1cb 100644 --- a/cal_tools/cal_tools/agipdutils.py +++ b/cal_tools/cal_tools/agipdutils.py @@ -1,5 +1,11 @@ -import h5py +import copy + import numpy as np +from scipy.signal import cwt, ricker, find_peaks_cwt +from sklearn.mixture import GaussianMixture +from sklearn.preprocessing import StandardScaler + +from cal_tools.enums import BadPixels, SnowResolution def assemble_constant_dict(corr_bools, pc_bools, memory_cells, bias_voltage, @@ -65,3 +71,574 @@ def assemble_constant_dict(corr_bools, pc_bools, memory_cells, bias_voltage, illumcond] return const_dict + + +def get_shadowed_stripe(data, threshold, fraction): + """ + Return list of shadowed regions. + Shadowed region is presented by the list of lines of pixels along + numpy axis 1. Regions are defined with respect of threshold + and fraction. Margin of one pixel is used. + Double-pixels are stored in separate regions + + :param data: (numpy.ndarray, rank=2) offset corrected single image + :param threshold: (float) a threshold, below which + pixes is considered as dark + :param fraction: (float) a fraction of pixels in a line along axis 1 + below which a full line is considered as dark + """ + + npx_all = np.count_nonzero(~np.isnan(data), axis=1) + npx_sh = np.count_nonzero(data < threshold, axis=1) + + A = np.array(np.where(npx_sh / npx_all > fraction)[0]) + + dp_idx = np.arange(64, 512, 64) + dp_idx = np.sort(np.hstack((dp_idx, dp_idx - 1))) + + # grouping indices + sh_idx = [] + tmp_idx = [] + for idx, i in enumerate(A[1:-1]): + if i - 1 not in A: + continue + if len(tmp_idx) == 0: + tmp_idx.append(i) + continue + if tmp_idx[-1] + 1 == i and ( + (tmp_idx[-1] in dp_idx) == (i in dp_idx)) and (i + 1 in A): + tmp_idx.append(i) + if i != A[-2]: + continue + if len(tmp_idx) > 1: + sh_idx.append(list(tmp_idx)) + tmp_idx = [i] + + return sh_idx + + +def baseline_correct_via_stripe(d, g, m, frac_high_med): + """ Correct baseline shifts using shadowed stripes + + :param d: the data to correct, should be offset corrected single image + :param frac_high_med: ratio of high to medium PC slopes + :param g: gain array matching `d` array + :param m: bad pixel mask + """ + + dd = copy.copy(d) + dd[g != 0] = np.nan # only high gain data + dd[m != 0] = np.nan # only good pixels + + sh_idxs = get_shadowed_stripe(dd, 30, 0.95) + + # collect all shadowed regions excluding double pixels + idx = [] + for sh_idx in sh_idxs: + if len(sh_idx) > 2: + idx += sh_idx + + if len(idx) < 3: + return d, 0 + + shift = np.nanmean(dd[idx, :]) + d[g == 0] -= shift + d[g == 1] -= shift / frac_high_med + return d, shift + + +def baseline_correct_via_noise(d, noise, g, threshold): + """ Correct baseline shifts by evaluating position of the noise peak + + :param: d: the data to correct, should be a single image + :param noise: the mean noise for the cell id of `d` + :param g: gain array matching `d` array + :param threshold: threshold below which corrected is not performed + + Correction is done by identifying the left-most (significant) peak + in the histogram of `d` and shifting it to be centered on 0. + This is done via a continous wavelet transform (CWT), using a Ricker + wavelet. + + Only high gain data is evaulated, and data larger than 50 ADU, + equivalent of roughly a 9 keV photon is ignored. + + Two passes are executed: the first shift is accurate to 10 ADU and + will catch baseline shifts smaller then -2000 ADU. CWT is evaluated + for peaks of widths one, three and five time the noise. + The baseline is then shifted by the position of the summed CWT maximum. + + In a second pass histogram is evaluated within a range of + +- 5*noise of the initial shift value, for peak of width noise. + + Baseline shifts larger than the maximum threshold or positive shifts + larger 10 are discarded, and the original data is returned, otherwise + the shift corrected data is returned. + + """ + + seln = (g == 0) & (d <= 50) + h, e = np.histogram(d[seln], bins=210, range=(-2000, 100)) + c = (e[1:] + e[:-1]) / 2 + + try: + cwtmatr = cwt(h, ricker, [noise, 3. * noise, 5. * noise]) + except: + return d, 0 + cwtall = np.sum(cwtmatr, axis=0) + marg = np.argmax(cwtall) + pc = c[marg] + + high_res_range = (d > pc - 5 * noise) & (d < pc + 5 * noise) + h, e = np.histogram(d[seln & high_res_range], bins=200, + range=(pc - 5 * noise, pc + 5 * noise)) + c = (e[1:] + e[:-1]) / 2 + try: + cwtmatr = cwt(h, ricker, [noise, ]) + except: + return d, 0 + marg = np.argmax(cwtmatr) + pc = c[marg] + + # too large shift to be easily decernable via noise + if pc > 10 or pc < threshold: + return d, 0 + return d - pc, pc + + +def correct_baseline_via_hist(d, pcm, g): + """ Correct baseline shifts by matching edges of high and medium + gain histogram + + :param d: single image to correct + :param pcm: relative gain slope for medium gain of each pixel in `d` + :param g: gain values for `d` + + As a preparation the median value of medium gain pixels is shifted in + increments of 50 ADU as long as it is negative. + + Correction is then performed by evaluating histograms for high gain + and medium gain pixels in `d`. The right-most significant bin of the + high gain histogram is then shifted such that it coincides with the + left-most significant bin of the medium gain histogram. Significant + here means that bin counts are at least 10% of the average bin count + of the histogram for all bins larger 10 counts. This initial evaluation + uses histograms in range between -10000 and +10000 ADU with a + resolution of 100 ADU per bin. The shift required to match both + histogram borders is an initial estimate for the baseline shift. + + Finally, the mean bin count of the five outermost bins of the high and + medium gain histograms is compared. The baseline is shifted by + increments of 1 ADU until they are within 20% of each other. + + From this point on additional shifts are performed while the + deviation stays within 30% of each other. The final shift is then + evaluated as the point of minimal deviation of these values. + + A maximum iteration count of 200 is imposed on the procedure. If no + convergence is reached within this limit, the original data is + returned, otherwise the baseline corrected data is returned. + """ + dd = copy.copy(d) + # shift while the medium gain produces negative values on average + pc = 0 + it = 0 + max_it = 100 + while np.nanmedian(dd[g == 1] - pc) < 0: + pc -= 50 + if it > max_it: + return d, 0 + it += 1 + + def min_hist_distance(pc, bins=100, ran=(-10000, 10000), dec=20, + minbin=10): + hh, e = np.histogram(dd[g == 0] - pc, bins=bins, range=ran) + hm, e = np.histogram((dd[g == 1] - pc) * pcm[g == 1], bins=bins, + range=ran) + + # right most significant value of high gain histogram + hhm = np.nanmean(hh[hh > minbin]) + rmh = hh.size - np.argmax((hh > 0.1 * hhm)[::-1]) + + # left most significant value of high gain histogram + hmm = np.nanmean(hm[hm > minbin]) + lmm = np.argmax((hm > 0.1 * hmm)) + med_pcm = np.nanmedian(pcm[g == 1]) + eh = e[rmh] + el = e[lmm] + pc += (el - eh) / (med_pcm - 1) + return pc + + # set a pc one halfstep higher than initial guesstimate + pc += 50 + pc = min_hist_distance(pc) + + # now start rolling back until we have similar signal levels + def comp_signal_level(pc, minbin=1): + hh, e = np.histogram(dd[g == 0] - pc, bins=100, range=(5000, 7000)) + hm, e = np.histogram((dd[g == 1] - pc) * pcm[g == 1], bins=100, + range=(5000, 7000)) + + # right most significant value of high gain histogram + hhm = np.nanmean(hh[hh > minbin]) + rmh = hh.size - np.argmax((hh > 0.1 * hhm)[::-1]) + + # left most significant value of high gain histogram + hmm = np.nanmean(hm[hm > minbin]) + lmm = np.argmax((hm > 0.1 * hmm)) + + reg = 5 + ret = np.abs(np.sum(hh[rmh - reg:rmh]) - np.sum(hm[lmm:lmm + reg])) + ret /= np.sum(hh[rmh - reg:rmh]) + return ret + + it = 0 + max_it = 200 + opc = pc + m_it_break = False + while comp_signal_level(pc) > 0.2: + pc += 1 + if it > max_it: + pc = opc + m_it_break = True + break + it += 1 + + it = 0 + if not m_it_break: + pcs = [pc] + slevs = [] + slev = comp_signal_level(pc) + slevs.append(slev) + while slev < 0.3: + pc += 1 + slev = comp_signal_level(pc) + slevs.append(slev) + pcs.append(pc) + it += 1 + + pc = pcs[np.argmin(slevs)] + + return d - pc, pc + + +def correct_baseline_via_hist_asic(d, g): + """ Correct diagonal falloff on ASICs by matching corner histograms + + :param d: single image data + :param g: gain values for `d` + + Corrections are performed for the top and bottom row of ASICs + seperately. + + In easch row, starting from the beam-hole closest ASIC, the histogram + of a square region of size 8x8 pixels is evaluted for its maximum + bin count location (only medium gain values), and compared to a + histogram produced from a neighbouring region on the next ASIC. + Double sized pixels are avoided. + + The reasoning is that the histograms should not dramatically differ + for continuously distributed photon events. Each ASIC is then shifted + such that histograms match and the corrected data is returned. + + Warning: this feature is very experimental! + """ + + rsize = 8 + + def hist_ends(dm, bins=5000, ran=(-5000, 5000), + minbin=4 / (16 / rsize)): + + hm, e = np.histogram(dm, bins=bins, range=ran) + + # left most significant value of medium gain histogram + # hmm = np.nanmean(hm[hm > minbin]) + lmm = np.argmax((hm > minbin)) + + return e[lmm], np.sum(hm) + + for i in range(1, 8): + + # upper asics + casic = np.concatenate( + (d[i * 64 - rsize:i * 64 - 1, 64:64 + rsize].flatten(), + d[i * 64 + 1:i * 64 + rsize, 64 - rsize:64].flatten())) + + casic_g = np.concatenate( + (g[i * 64 - rsize:i * 64 - 1, 64:64 + rsize].flatten(), + g[i * 64 + 1:i * 64 + rsize, 64 - rsize:64].flatten())) + + idxm = casic_g == 1 + cme, cms = hist_ends(casic[idxm]) + + asic = d[i * 64 + 1:i * 64 + rsize, 64:64 + rsize].flatten() + asic_g = g[i * 64 + 1:i * 64 + rsize, 64:64 + rsize].flatten() + + idxm = asic_g == 1 + me, ms = hist_ends(asic[idxm]) + + if cms > rsize * rsize / 10 and ms > rsize * rsize / 10: + pc = cme - me + else: + pc = 0 + + if np.abs(pc) > 500: + # somthing when wrong + pc = 0 + d[i * 64:(i + 1) * 64, 64:] += pc + + for i in range(0, 7): + # lower asics + casic = np.concatenate((d[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, + 64:64 + rsize].flatten(), + d[(i + 1) * 64 + 1:(i + 1) * 64 + rsize, + 64 - rsize:64].flatten())) + + casic_g = np.concatenate((g[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, + 64:64 + rsize].flatten(), + g[(i + 1) * 64 + 1:(i + 1) * 64 + rsize, + 64 - rsize:64].flatten())) + + idxm = casic_g == 1 + cme, cms = hist_ends(casic[idxm]) + + asic = d[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, + 64 - rsize:64].flatten() + asic_g = g[(i + 1) * 64 - rsize:(i + 1) * 64 - 1, + 64 - rsize:64].flatten() + + idxm = asic_g == 1 + me, ms = hist_ends(asic[idxm]) + + if cms > rsize * rsize / 10 and ms > rsize * rsize / 10: + pc = cme - me + else: + pc = 0 + + if np.abs(pc) > 500: + # somthing went wrong + pc = 0 + d[i * 64:(i + 1) * 64, :64] += pc + + return d, np.nan + + +def make_noisy_adc_mask(bmask, noise_threshold): + """ Mask entire ADC if they are noise above a relative threshold + """ + msk = np.count_nonzero( + ((bmask & BadPixels.NOISE_OUT_OF_THRESHOLD.value != 0) + | (bmask & BadPixels.OFFSET_NOISE_EVAL_ERROR.value != 0)).astype( + np.int), axis=0) + + fmask = np.zeros_like(msk).astype(np.uint32) + adc_i = bmask.shape[1] // 8 + adc_j = bmask.shape[2] // 8 + for i in range(adc_i): + for j in range(adc_j): + adc_cnt = np.count_nonzero( + msk[i * adc_i:(i + 1) * adc_i, j * adc_j:(j + 1) * adc_j] > + bmask.shape[0] // 32) + if adc_cnt / (adc_i * adc_j) >= noise_threshold: + fmask[i * adc_i:(i + 1) * adc_i, + j * adc_j:(j + 1) * adc_j] = BadPixels.NOISY_ADC.value + return fmask + + +def match_asic_borders(d, chunk=8, channel=0): + """ Match border pixels of the two ASIC rows to the same median value + + :param d: single image data + :param chunk: number of pixels on each ASIC boundary to generate + mean values for + :param channel: Module index of given data + + Each ASIC has `n=64/chunk` mean values calculated along its two border + pixel rows. The deviation of chunk pairs is then evaluated and the + upper/lower ASICs are shifted by the mean deviation for the + right/left hemisphere of the detector. + + The corrected data is returned. + """ + for i in range(8): + + these_asics = d[:, i * 64:(i + 1) * 64, :] + diffs = np.zeros((d.shape[0], 8)) + for k in range(64 // chunk): + ldat = these_asics[:, k * chunk:(k + 1) * chunk, 62:64] + lowerasic = np.nanmedian(ldat, axis=(1, 2)) + udat = these_asics[:, k * chunk:(k + 1) * chunk, 64:66] + upperasic = np.nanmedian(udat, axis=(1, 2)) + low_up = lowerasic - upperasic + up_low = upperasic - lowerasic + diff = low_up if channel < 8 else up_low + diffs[:, k] = diff + + mn = np.nanmean(diffs, axis=1)[:, None, None] / 2 + if channel < 8: + d[:, i * 64:(i + 1) * 64, 64:] += mn + d[:, i * 64:(i + 1) * 64, :64] -= mn + else: + d[:, i * 64:(i + 1) * 64, :64] += mn + d[:, i * 64:(i + 1) * 64, 64:] -= mn + return d + + +def melt_snowy_pixels(raw, im, gain, rgain, resolution=None): + """ Identify (and optionally interpolate) 'snowy' pixels + + :param raw: raw data + :param im: offset and relative gain corrected data: + :param gain: gain values for `raw` + :param rgain: raw gain data, scaled by the threshold for + high-to-medium gain switching + :param resolution: resolution strategy, should be of enum-type + `SnowResolution` + + Snowy pixels are pixels which are already identified as pixels in + the medium gain stage by their gain values, but which have + transitional image values between the largest high gain value and + the smalles medium gain value. As these values may be encountered again + in the context of medium gain, they are ambiguous and cannot + readily be identified by simple thresholding alone. + + It is attempted to identify these snowy pixels by using a Gaussian + mixture clustering algorithm acting on multispectral input. + Positions in the cluster are identified as follows: + + x: abs(p_r-p_bg)*p_rg**2 + y: p_r + + where p_r is a given pixel raw value, p_bg is the mean value of the + 8 direct neighbor pixels, and p_rg is the raw gain value of the pixel. + + Note that these cluster params are purely empirically derived, + and do not have any connection to ASIC design etc. + + Only pixels in medium gain are evaluated, and evaluation is + image-wise, but subdivided further into ASICs. + + The so-created graph [[x_0, x_1, ..., x_n], [y_0, y_1, ..., y_n]] is + scaled using a `sklearn.StandardScaler` and input into a two-component + `GaussianMixture` clustering algorithm. This results in a set of + labels, identifying pixels as likely snowy, or not. However, + for a given image we do not know which label is which from the + output. Hence, labels are differentiated under the assumption that + snowy pixel occur to be out-of-context, i.e. their surrounding pixels + are more likely at lower signal values, than if the pixel is in a region + were gain switching led to a large value. + + Depending on the resolution strategy so-identified pixels are either + set to `np.nan` or to the interpolated value of the direct (high-gain) + neighbors. + + The corrected data is returned alongside an updated gain mask and bad + pixel + mask. + """ + snow_mask = np.zeros(im.shape, np.uint32) + for k in range(im.shape[0]): + for i in range(8): + for j in range(2): + + fidx = gain[k, i * 64:(i + 1) * 64, + j * 64:(j + 1) * 64] == 1 + fidx_h = gain[k, i * 64:(i + 1) * 64, + j * 64:(j + 1) * 64] == 0 + + # need at least two pixels in medium gain to be able to + # cluster in two groups + if np.count_nonzero(fidx) < 2: + continue + + # data for a given asic + asic = raw[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] + asic_g = gain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] + asic_r = rgain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] + + asic_h = copy.copy(asic) + asic_h[~fidx_h] = np.nan + + # calculate the background of by averaging the 8 direct + # neighbours of each pixel + rl = np.roll(asic, 1, axis=0) + rr = np.roll(asic, -1, axis=0) + ru = np.roll(asic, 1, axis=1) + rd = np.roll(asic, -1, axis=1) + rlu = np.roll(rl, 1, axis=1) + rld = np.roll(rl, -1, axis=1) + rru = np.roll(rr, 1, axis=1) + rrd = np.roll(rr, -1, axis=1) + bg = (rl + rr + ru + rd + rlu + rld + rru + rrd) / 8 + + # calculate the background of by averaging the 8 direct + # neighbours of each pixel + # here only for high gain pixels + rl = np.roll(asic_h, 1, axis=0) + rr = np.roll(asic_h, -1, axis=0) + ru = np.roll(asic_h, 1, axis=1) + rd = np.roll(asic_h, -1, axis=1) + rlu = np.roll(asic_h, 1, axis=1) + rld = np.roll(asic_h, -1, axis=1) + rru = np.roll(asic_h, 1, axis=1) + rrd = np.roll(asic_h, -1, axis=1) + bg_h = np.nanmean( + np.array([rl, rr, ru, rd, rlu, rld, rru, rrd]), axis=0) + + # construct a graph + graph = np.array( + [np.abs(asic[fidx] - bg[fidx]) * asic_r[fidx] ** 2, + asic[fidx]]).T + # scale it + graph = StandardScaler().fit_transform(graph) + # perform clustering + spc = GaussianMixture(n_components=2, random_state=0) + spc.fit(graph) + # and get labels + labels = spc.predict(graph) + + asic = im[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] + asic_msk = snow_mask[k, i * 64:(i + 1) * 64, + j * 64:(j + 1) * 64] + mim = asic[fidx] + asicb = bg_h + mimb = asicb[fidx] + mimg = asic_g[fidx] + mmsk = asic_msk[fidx] + + # identify which labels are which + mn1 = np.nanmean(bg[fidx][labels == 0]) + mn2 = np.nanmean(bg[fidx][labels == 1]) + implabel = 1 + if mn1 > mn2: + implabel = 0 + + # if we've misidentified, then we will have to many + # snowy pixels + # happens if the ASIC is generally on a high signal level + if np.count_nonzero([labels == implabel]) > 64 * 64 / 3: + continue + + # or a large portion of misidenfied label covers the ASIC + if np.count_nonzero([labels != implabel]) > 0.8 * 64 * 64: + continue + + # set to NaN if requested + if resolution is SnowResolution.NONE: + mim[labels == implabel] = np.nan + # or use the interpolated value + elif resolution is SnowResolution.INTERPOLATE: + mim[labels == implabel] = mimb[labels == implabel] + mimg[labels == implabel] = 0 + # identify these pixels in a bad pixel mask + mmsk[labels == implabel] = BadPixels.INTERPOLATED.value + + # now set back to data + asic[fidx] = mim + asic_g[fidx] = mimg + asic_msk[fidx] = mmsk + im[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] = asic + gain[k, i * 64:(i + 1) * 64, j * 64:(j + 1) * 64] = asic_g + snow_mask[k, i * 64:(i + 1) * 64, + j * 64:(j + 1) * 64] = asic_msk + return im, gain, snow_mask diff --git a/cal_tools/cal_tools/enums.py b/cal_tools/cal_tools/enums.py index ed7a0457f9a896c72e7b32afccf821f4f1e790db..513b25cb80f2827614987a27eab91ab1c9826d59 100644 --- a/cal_tools/cal_tools/enums.py +++ b/cal_tools/cal_tools/enums.py @@ -26,4 +26,11 @@ class BadPixels(Enum): OVERSCAN = 0b001000000000000000000 # bit 19 NON_SENSITIVE = 0b010000000000000000000 # bit 20 NON_LIN_RESPONSE_REGION = 0b100000000000000000000 # bit 21 + + +class SnowResolution(Enum): + """ An Enum specifying how to resolve snowy pixels + """ + NONE = "none" + INTERPOLATE = "interpolate" diff --git a/cal_tools/cal_tools/step_timing.py b/cal_tools/cal_tools/step_timing.py new file mode 100644 index 0000000000000000000000000000000000000000..8ae6e17a9e762c4cfc4ee2994f48d54430422794 --- /dev/null +++ b/cal_tools/cal_tools/step_timing.py @@ -0,0 +1,35 @@ +from collections import defaultdict +from time import perf_counter + +import numpy as np + +class StepTimer: + def __init__(self): + self.steps = defaultdict(list) + self.t0 = None + self.t_latest = None + + def start(self): + """Store a start timestamp""" + t = perf_counter() + if self.t0 is None: + self.t0 = t + self.t_latest = t + + def done_step(self, step_name): + """Record a step timing, since the last call to done_step() or start() + """ + t = perf_counter() + self.steps[step_name].append(t - self.t_latest) + print(f"{step_name}: {t - self.t_latest:.01f} s") + self.t_latest = t + + def timespan(self): + """Wall time from 1st call to start() to last start() or done_step()""" + return self.t_latest - self.t0 + + def print_summary(self): + """Show mean & std for each step""" + for step, data in self.steps.items(): + data = np.asarray(data) + print(f'{step}: {data.mean():.01f} +- {data.std():.02f}s') diff --git a/cal_tools/cython/__init__.py b/cal_tools/cython/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/cal_tools/cython/agipdalgs.pyx b/cal_tools/cython/agipdalgs.pyx new file mode 100644 index 0000000000000000000000000000000000000000..25a71e0984a879bfa971e72addcd01e476a3ce3b --- /dev/null +++ b/cal_tools/cython/agipdalgs.pyx @@ -0,0 +1,179 @@ +import numpy as np +cimport numpy as cnp +cimport cython + +@cython.boundscheck(False) +@cython.wraparound(False) +def histogram(cnp.ndarray[cnp.float32_t, ndim=2] data, range=(0,1), int bins=20, + cnp.ndarray[cnp.float32_t, ndim=2] weights=None): + """ + Calculate N histograms along axis 0 with equidistant binning. + N is a data size along axis 1. + Return histograms and bin edges. + """ + + cdef cnp.ndarray[cnp.float32_t, ndim=2] ret + cdef double min, max + min = range[0] + max = range[1] + + ret = np.zeros((bins,data.shape[1]), dtype=np.float32) + cdef double bin_width = (max - min) / bins + cdef double x + + if weights is not None: + assert (<object>weights).shape == (<object>data).shape,\ + "data & weights must have matching shape" + + for j in xrange(data.shape[1]): + for i in xrange(data.shape[0]): + x = (data[i,j] - min) / bin_width + if 0.0 <= x < bins: + if weights is None: + ret[<int>x,j] += 1.0 + else: + ret[<int>x,j] += weights[i,j] + return ret, np.linspace(min, max, bins+1) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def histogram2d(cnp.ndarray[float, ndim=1] data_x, cnp.ndarray[float, ndim=1] data_y, + range=((0,1), (0,1)), bins=(20, 20)): + """ + Calculate 2D histogram with equidistant binning. + Return histogram and bin edges. + """ + + cdef cnp.ndarray[cnp.int32_t, ndim=2] ret + cdef double min_x, max_x, min_y, max_y + cdef int bins_x, bins_y + min_x = range[0][0] + max_x = range[0][1] + min_y = range[1][0] + max_y = range[1][1] + bins_x = bins[0] + bins_y = bins[1] + + ret = np.zeros((bins_x, bins_y), dtype=np.int32) + + cdef double bin_width_x = (max_x - min_x) / bins_x + cdef double bin_width_y = (max_y - min_y) / bins_y + cdef double x, y + + assert (<object>data_x).shape == (<object>data_y).shape,\ + "data_x & data_y must have matching shape" + + for i in xrange(data_x.shape[0]): + x = (data_x[i] - min_x) / bin_width_x + y = (data_y[i] - min_y) / bin_width_y + if x >= 0.0 and x < bins_x and y>=0.0 and y<bins_y: + ret[<int>x, <int>y] += 1 + + return ret, np.linspace(min_x, max_x, bins_x+1), np.linspace(min_y, max_y, bins_y+1) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def gain_choose(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.float32_t, ndim=4] choices): + """Specialised fast equivalent of np.choose(), to select data for a per-pixel gain stage""" + cdef int i, j, k + cdef cnp.uint8_t v + cdef cnp.ndarray[cnp.float32_t, ndim=3] out + out = np.zeros_like(a, dtype=np.float32) + + assert (<object>choices).shape == (3,) + (<object>a).shape + + with nogil: + for i in range(a.shape[0]): + for j in range(a.shape[1]): + for k in range(a.shape[2]): + v = a[i, j, k] + out[i, j, k] = choices[v, i, j, k] + + return out + + +@cython.boundscheck(False) +@cython.wraparound(False) +def gain_choose_int(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.int32_t, ndim=4] choices): + """Specialised fast equivalent of np.choose(), to select data for a per-pixel gain stage""" + cdef int i, j, k + cdef cnp.uint8_t v + cdef cnp.ndarray[cnp.int32_t, ndim=3] out + out = np.zeros_like(a, dtype=np.int32) + + assert (<object>choices).shape == (3,) + (<object>a).shape + + with nogil: + for i in range(a.shape[0]): + for j in range(a.shape[1]): + for k in range(a.shape[2]): + v = a[i, j, k] + out[i, j, k] = choices[v, i, j, k] + + return out + + +@cython.boundscheck(False) +@cython.wraparound(False) +def sum_and_count_in_range_asic(cnp.ndarray[float, ndim=4] arr, float lower, float upper): + """ + Return the sum & count of values where lower <= x <= upper, + across axes 2 & 3 (pixels within an ASIC, as reshaped by AGIPD correction code). + Specialised function for performance. + """ + + cdef int i, j, k, l, m + cdef float value + cdef cnp.ndarray[unsigned long long, ndim=2] count + cdef cnp.ndarray[double, ndim=2] sum_ + + # Drop axes -2 & -1 (pixel dimensions within each ASIC) + out_shape = arr[:, :, 0, 0].shape + count = np.zeros(out_shape, dtype=np.uint64) + sum_ = np.zeros(out_shape, dtype=np.float64) + + with nogil: + for i in range(arr.shape[0]): + for k in range(arr.shape[1]): + for l in range(arr.shape[2]): + for m in range(arr.shape[3]): + value = arr[i, k, l, m] + if lower <= value <= upper: + sum_[i, k] += value + count[i, k] += 1 + return sum_, count + + +@cython.boundscheck(False) +@cython.wraparound(False) +def sum_and_count_in_range_cell(cnp.ndarray[float, ndim=4] arr, float lower, float upper): + """ + Return the sum & count of values where lower <= x <= upper, + across axes 0 & 1 (memory cells in the same row, as reshaped by AGIPD correction code). + Specialised function for performance. + """ + + cdef int i, j, k, l, m, + cdef float value + cdef cnp.ndarray[unsigned long long, ndim=2] count + cdef cnp.ndarray[double, ndim=2] sum_ + + # Drop axes 0 & 1 + out_shape = arr[0, 0, :, :].shape + count = np.zeros(out_shape, dtype=np.uint64) + sum_ = np.zeros(out_shape, dtype=np.float64) + + + with nogil: + for i in range(arr.shape[0]): + for k in range(arr.shape[1]): + for l in range(arr.shape[2]): + for m in range(arr.shape[3]): + value = arr[i, k, l, m] + if lower <= value <= upper: + sum_[l, m] += value + count[l, m] += 1 + + return sum_, count diff --git a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb index afcd3daba756e66c480d60c6350631cfcc015290..d7803821cf31e4baf5ecc115a3851ea8ff2b361d 100644 --- a/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb +++ b/notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb @@ -6,7 +6,7 @@ "source": [ "# AGIPD Offline Correction #\n", "\n", - "Author: European XFEL Detector Group, Version: 1.0\n", + "Author: European XFEL Detector Group, Version: 2.0\n", "\n", "Offline Calibration for the AGIPD Detector" ] @@ -23,19 +23,20 @@ "outputs": [], "source": [ "cluster_profile = \"noDB\"\n", - "in_folder = \"/gpfs/exfel/exp/SPB/202030//p900119/raw\" # the folder to read data from, required\n", - "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/AGIPD_SPB0\" # the folder to output to, required\n", - "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n", + "in_folder = \"/gpfs/exfel/exp/MID/202030/p900137/raw\" # the folder to read data from, required\n", + "out_folder = \"/gpfs/exfel/exp/MID/202030/p900137/scratch/karnem/r449_v06\" # the folder to output to, required\n", + "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n", "modules = [-1] # modules to correct, set to -1 for all, range allowed\n", - "run = 80 # runs to process, required\n", - "karabo_id = \"SPB_DET_AGIPD1M-1\" # karabo karabo_id\n", + "run = 449 # runs to process, required\n", + "\n", + "karabo_id = \"MID_DET_AGIPD1M-1\" # karabo karabo_id\n", "karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators\n", "receiver_id = \"{}CH0\" # inset for receiver devices\n", "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", "h5path = 'INSTRUMENT/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", "h5path_idx = 'INDEX/{}/DET/{}:xtdf/' # path in the HDF5 file to images\n", "h5path_ctrl = '/CONTROL/{}/MDL/FPGA_COMP_TEST' # path to control information\n", - "karabo_id_control = \"SPB_IRU_AGIPD1M1\" # karabo-id for control device\n", + "karabo_id_control = \"MID_IRU_AGIPD1M1\" # karabo-id for control device\n", "karabo_da_control = 'AGIPD1MCTRL00' # karabo DA for control infromation\n", "\n", "use_dir_creation_date = True # use the creation data of the input dir for database queries\n", @@ -43,27 +44,24 @@ "cal_db_timeout = 30000 # in milli seconds\n", "creation_date_offset = \"00:00:00\" # add an offset to creation date, e.g. to get different constants\n", "\n", - "calfile = \"\" # path to calibration file. Leave empty if all data should come from DB\n", - "nodb = False # if set only file-based constants will be used\n", - "mem_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", - "bias_voltage = 300\n", + "max_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", + "bias_voltage = 300 # Bias voltage\n", "acq_rate = 0. # the detector acquisition rate, use 0 to try to auto-determine\n", "gain_setting = 0.1 # the gain setting, use 0.1 to try to auto-determine\n", "photon_energy = 9.2 # photon energy in keV\n", - "interlaced = False # whether data is in interlaced layout\n", "overwrite = True # set to True if existing data should be overwritten\n", - "max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices. 3 allowed maximum list input elements. \n", - "local_input = False\n", - "sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n", - "index_v = 2 # version of RAW index type\n", + "max_pulses = [0, 500, 1] # range list [st, end, step] of maximum pulse indices within a train. 3 allowed maximum list input elements. \n", + "mem_cells_db = 0 # set to a value different than 0 to use this value for DB queries\n", + "cell_id_preview = 1 # cell Id used for preview in single-shot plots\n", + "\n", + "# Correction parameters\n", "blc_noise_threshold = 5000 # above this mean signal intensity now baseline correction via noise is attempted\n", - "melt_snow = \"\" # if set to \"none\" snowy pixels are identified and resolved to NaN, if set to \"interpolate\", the value is interpolated from neighbouring pixels\n", - "max_cells_db_dark = 0 # set to a value different than 0 to use this value for dark data DB queries\n", - "max_cells_db = 0 # set to a value different than 0 to use this value for DB queries\n", "chunk_size_idim = 1 # chunking size of imaging dimension, adjust if user software is sensitive to this.\n", - "force_hg_if_below = 1000 # set to a value other than 0 to force a pixel into high gain if it's high gain offset subtracted value is below this threshold\n", - "force_mg_if_below = 1000 # set to a value other than 0 to force a pixel into medium gain if it's medium gain offset subtracted value is below this threshold\n", - "mask_noisy_adc = 0.25 # set to a value other than 0 and below 1 to mask entire ADC if fraction of noisy pixels is above\n", + "hg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel to high gain\n", + "mg_hard_threshold = 1000 # threshold to force medium gain offset subtracted pixel from low to medium gain\n", + "noisy_adc_threshold = 0.25 # threshold to mask complete adc\n", + "cm_dark_fraction = 0.66 # threshold for empty pixels to consider module enough dark to perform CM correction\n", + "cm_n_itr = 4 # number of iterations for common mode correction\n", "\n", "# Correction Booleans\n", "only_offset = False # Apply only Offset correction. if False, Offset is applied by Default. if True, Offset is only applied.\n", @@ -74,16 +72,82 @@ "blc_hmatch = False # if set, base line correction via histogram matching is attempted \n", "match_asics = False # if set, inner ASIC borders are matched to the same signal level\n", "adjust_mg_baseline = False # adjust medium gain baseline to match highest high gain value\n", - "dont_zero_nans = False # do not zero NaN values in corrected data\n", - "dont_zero_orange = False # do not zero very negative and very large values\n", + "zero_nans = False # set NaN values in corrected data to 0\n", + "zero_orange = False # set to 0 very negative and very large values in corrected data\n", "blc_set_min = False # Shift to 0 negative medium gain pixels after offset corr\n", - "corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted \n", + "corr_asic_diag = False # if set, diagonal drop offs on ASICs are correted\n", + "force_hg_if_below = True # set high gain if mg offset subtracted value is below hg_hard_threshold\n", + "force_mg_if_below = True # set medium gain if mg offset subtracted value is below mg_hard_threshold\n", + "mask_noisy_adc = False # Mask entire ADC if they are noise above a relative threshold\n", + "common_mode = True # Common mode correction\n", + "melt_snow = False # Identify (and optionally interpolate) 'snowy' pixels\n", + "mask_zero_std = False # Mask pixels with zero standard deviation across train\n", + "low_medium_gap = True # 5% separation in thresholding between low and medium gain\n", + "\n", + "# Paralellization parameters\n", + "sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n", + "chunk_size = 1000 # Size of chunk for image-weise correction\n", + "n_cores_correct = 16 # Number of chunks to be processed in parallel\n", + "n_cores_files = 4 # Number of files to be processed in parallel\n", "\n", "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):\n", " from xfel_calibrate.calibrate import balance_sequences as bs\n", " return bs(in_folder, run, sequences, sequences_per_node, karabo_da)\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "from datetime import timedelta\n", + "from dateutil import parser\n", + "import gc\n", + "import glob\n", + "import itertools\n", + "from IPython.display import HTML, display, Markdown, Latex\n", + "import math\n", + "from multiprocessing import Pool\n", + "import os\n", + "import re\n", + "import sys\n", + "import traceback\n", + "from time import time, sleep, perf_counter\n", + "import tabulate\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "import yaml\n", + "\n", + "from extra_geom import AGIPD_1MGeometry\n", + "from extra_data import RunDirectory, stack_detector_data\n", + "from iCalibrationDB import Detectors\n", + "from mpl_toolkits.mplot3d import Axes3D\n", + "from matplotlib.ticker import LinearLocator, FormatStrFormatter\n", + "from matplotlib.colors import LogNorm\n", + "from matplotlib import cm as colormap\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib\n", + "matplotlib.use(\"agg\")\n", + "%matplotlib inline\n", + "import numpy as np\n", + "\n", + "from cal_tools.agipdlib import (AgipdCorrections, get_num_cells, get_acq_rate, get_gain_setting)\n", + "from cal_tools.cython import agipdalgs as calgs\n", + "from cal_tools.ana_tools import get_range\n", + "from cal_tools.enums import BadPixels\n", + "from cal_tools.tools import get_dir_creation_date, map_modules_from_folder\n", + "from cal_tools.step_timing import StepTimer\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluated parameters ##" + ] + }, { "cell_type": "code", "execution_count": null, @@ -92,13 +156,13 @@ "source": [ "# Fill dictionaries comprising bools and arguments for correction and data analysis\n", "\n", - "# Here the herarichy and dependability for correction booleans are defined \n", + "# Here the herarichy and dependability for correction booleans are defined\n", "corr_bools = {}\n", "\n", "# offset is at the bottom of AGIPD correction pyramid.\n", "corr_bools[\"only_offset\"] = only_offset\n", "\n", - "# Dont apply any corrections if only_offset is requested \n", + "# Dont apply any corrections if only_offset is requested\n", "if not only_offset:\n", " corr_bools[\"adjust_mg_baseline\"] = adjust_mg_baseline\n", " corr_bools[\"rel_gain\"] = rel_gain\n", @@ -109,11 +173,35 @@ " corr_bools[\"blc_set_min\"] = blc_set_min\n", " corr_bools[\"match_asics\"] = match_asics\n", " corr_bools[\"corr_asic_diag\"] = corr_asic_diag\n", - " corr_bools[\"dont_zero_nans\"] = dont_zero_nans\n", - " corr_bools[\"dont_zero_orange\"] = dont_zero_orange\n", + " corr_bools[\"zero_nans\"] = zero_nans\n", + " corr_bools[\"zero_orange\"] = zero_orange\n", + " corr_bools[\"mask_noisy_adc\"] = mask_noisy_adc\n", + " corr_bools[\"force_hg_if_below\"] = force_hg_if_below\n", + " corr_bools[\"force_mg_if_below\"] = force_mg_if_below\n", + " corr_bools[\"common_mode\"] = common_mode\n", + " corr_bools[\"melt_snow\"] = melt_snow\n", + " corr_bools[\"mask_zero_std\"] = mask_zero_std\n", + " corr_bools[\"low_medium_gap\"] = low_medium_gap \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if in_folder[-1] == \"/\":\n", + " in_folder = in_folder[:-1]\n", + "if sequences[0] == -1:\n", + " sequences = None\n", + "\n", + "control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n", + "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n", + "h5path = h5path.format(karabo_id, receiver_id)\n", + "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n", "\n", - "# Here the herarichy and dependability for data analysis booleans and arguments are defined \n", - "data_analysis_parms = {}" + "print(f'Path to control file {control_fname}')" ] }, { @@ -127,74 +215,109 @@ }, "outputs": [], "source": [ - "from collections import OrderedDict\n", - "from datetime import timedelta\n", - "import os\n", - "import sys\n", + "# Create output folder\n", + "os.makedirs(out_folder, exist_ok=overwrite)\n", "\n", - "from dateutil import parser\n", - "import h5py\n", - "import numpy as np\n", - "import matplotlib\n", - "matplotlib.use(\"agg\")\n", - "import matplotlib.pyplot as plt\n", - "from ipyparallel import Client\n", - "import yaml\n", - "\n", - "from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions\n", - "from cal_tools.tools import (map_modules_from_folder, parse_runs, run_prop_seq_from_path, get_notebook_name,\n", - " get_dir_creation_date, get_constant_from_db)\n", - "\n", - "from cal_tools.agipdlib import get_gain_setting\n", - "\n", - "# make sure a cluster is running with ipcluster start --n=32, give it a while to start\n", - "print(f\"Connecting to profile {cluster_profile}\")\n", - "view = Client(profile=cluster_profile)[:]\n", - "view.use_dill()\n", - "\n", - "il_mode = interlaced\n", - "max_cells = mem_cells\n", - "gains = np.arange(3)\n", - "cells = np.arange(max_cells)\n", - "\n", - "creation_time = None\n", - "if use_dir_creation_date:\n", - " creation_time = get_dir_creation_date(in_folder, run)\n", - " offset = parser.parse(creation_date_offset)\n", - " delta = timedelta(hours=offset.hour, minutes=offset.minute, seconds=offset.second)\n", - " creation_time += delta\n", - " print(f\"Using {creation_time} as creation time\")\n", - "\n", - "print(f\"Working in IL Mode: {il_mode}. Actual cells in use are: {max_cells}\")\n", - "\n", - "if sequences[0] == -1:\n", - " sequences = None\n", + "# Evaluate detector instance for mapping\n", + "instrument = karabo_id.split(\"_\")[0]\n", + "if instrument == \"SPB\":\n", + " dinstance = \"AGIPD1M1\"\n", + "else:\n", + " dinstance = \"AGIPD1M2\"\n", "\n", - "CHUNK_SIZE = 250\n", - "MAX_PAR = 16\n", + "# Evaluate requested modules\n", + "if karabo_da[0] == '-1':\n", + " if modules[0] == -1:\n", + " modules = list(range(16))\n", + " karabo_da = [\"AGIPD{:02d}\".format(i) for i in modules]\n", + "else:\n", + " modules = [int(x[-2:]) for x in karabo_da]\n", + " \n", + "def mod_name(modno):\n", + " return f\"Q{modno // 4 + 1}M{modno % 4 + 1}\"\n", "\n", - "if in_folder[-1] == \"/\":\n", - " in_folder = in_folder[:-1]\n", - "print(\"Outputting to {}\".format(out_folder))\n", + "print(\"Process modules: \", ', '.join(\n", + " [mod_name(x) for x in modules]))\n", + "print(f\"Detector in use is {karabo_id}\")\n", + "print(f\"Instrument {instrument}\")\n", + "print(f\"Detector instance {dinstance}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Display Information about the selected pulses indices for correction.\n", + "pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses \n", "\n", - "if not os.path.exists(out_folder):\n", - " os.makedirs(out_folder)\n", - "elif not overwrite:\n", - " raise AttributeError(\"Output path exists! Exiting\")\n", + "try:\n", + " if len(pulses_lst) > 1: \n", + " print(\"A range of {} pulse indices is selected: from {} to {} with a step of {}\"\n", + " .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),\n", + " pulses_lst[1] - pulses_lst[0]))\n", + " else:\n", + " print(\"one pulse is selected: a pulse of idx {}\".format(pulses_lst[0]))\n", + "except Exception as e:\n", + " raise ValueError('max_pulses input Error: {}'.format(e))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set everything up filewise\n", + "mmf = map_modules_from_folder(in_folder, run, path_template,\n", + " karabo_da, sequences)\n", + "mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf\n", + "file_list = []\n", "\n", - "import warnings\n", - "warnings.filterwarnings('ignore')\n", + "# ToDo: Split table over pages\n", + "print(f\"Processing a total of {total_sequences} sequence files in chunks of {n_cores_files}\")\n", + "table = []\n", + "ti = 0\n", + "for k, files in mapped_files.items():\n", + " i = 0\n", + " for f in list(files.queue):\n", + " file_list.append(f)\n", + " if i == 0:\n", + " table.append((ti, k, i, f))\n", + " else:\n", + " table.append((ti, \"\", i, f))\n", + " i += 1\n", + " ti += 1\n", + "md = display(Latex(tabulate.tabulate(table, tablefmt='latex',\n", + " headers=[\"#\", \"module\", \"# module\", \"file\"])))\n", + "file_list = sorted(file_list, key=lambda name: name[-10:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filename = file_list[0]\n", + "channel = int(re.findall(r\".*-AGIPD([0-9]+)-.*\", filename)[0])\n", "\n", - "from cal_tools.agipdlib import SnowResolution\n", - "melt_snow = False if melt_snow == \"\" else SnowResolution(melt_snow)\n", + "# Evaluate number of memory cells\n", + "mem_cells = get_num_cells(filename, karabo_id, channel)\n", + "if mem_cells is None:\n", + " raise ValueError(f\"No raw images found in {filename}\")\n", "\n", - "special_opts = blc_noise_threshold, blc_hmatch, melt_snow\n", + "mem_cells_db = mem_cells if mem_cells_db == 0 else mem_cells_db\n", + "max_cells = mem_cells if max_cells == 0 else max_cells\n", "\n", - "instrument = karabo_id.split(\"_\")[0]\n", - "if instrument == \"SPB\":\n", - " dinstance = \"AGIPD1M1\"\n", + "# Evaluate aquisition rate\n", + "if acq_rate == 0:\n", + " acq_rate = get_acq_rate(filename, karabo_id, channel)\n", "else:\n", - " dinstance = \"AGIPD1M2\"\n" + " acq_rate = None\n", + "\n", + "print(f\"Maximum memory cells to calibrate: {max_cells}\")" ] }, { @@ -203,9 +326,16 @@ "metadata": {}, "outputs": [], "source": [ - "control_fname = f'{in_folder}/r{run:04d}/RAW-R{run:04d}-{karabo_da_control}-S00000.h5'\n", - "h5path_ctrl = h5path_ctrl.format(karabo_id_control)\n", - "\n", + "# Evaluate creation time\n", + "creation_time = None\n", + "if use_dir_creation_date:\n", + " creation_time = get_dir_creation_date(in_folder, run)\n", + " offset = parser.parse(creation_date_offset)\n", + " delta = timedelta(hours=offset.hour,\n", + " minutes=offset.minute, seconds=offset.second)\n", + " creation_time += delta\n", + " \n", + "# Evaluate gain setting\n", "if gain_setting == 0.1:\n", " if creation_time.replace(tzinfo=None) < parser.parse('2020-01-31'):\n", " print(\"Set gain-setting to None for runs taken before 2020-01-31\")\n", @@ -218,423 +348,202 @@ " print(e)\n", " print(\"Set gain settion to 0\")\n", " gain_setting = 0\n", - " \n", - "print(f\"Gain setting: {gain_setting}\")\n", - "print(f\"Detector in use is {karabo_id}\")\n", - "print(f\"Instrument {instrument}\")\n", - "print(f\"Detector instance {dinstance}\")\n", - "\n", - "if karabo_da[0] == '-1':\n", - " if modules[0] == -1:\n", - " modules = list(range(16))\n", - " karabo_da = [\"AGIPD{:02d}\".format(i) for i in modules]\n", - "else:\n", - " modules = [int(x[-2:]) for x in karabo_da]\n", - "print(\"Process modules: \",\n", - " ', '.join([f\"Q{x // 4 + 1}M{x % 4 + 1}\" for x in modules]))\n", - "\n", - "h5path = h5path.format(karabo_id, receiver_id)\n", - "h5path_idx = h5path_idx.format(karabo_id, receiver_id)" + " " ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-21T11:30:07.263445Z", - "start_time": "2019-02-21T11:30:07.217070Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "def combine_stack(d, sdim):\n", - " combined = np.zeros((sdim, 2048, 2048))\n", - " combined[...] = np.nan\n", - " \n", - " dy = 0\n", - " for i in range(16):\n", - " \n", - " try:\n", - " if i < 8:\n", - " dx = -512\n", - " if i > 3:\n", - " dx -= 25\n", - " mx = 1\n", - " my = i % 8\n", - " combined[:, my*128+dy:(my+1)*128+dy,\n", - " mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,:,::-1]\n", - " dy += 30\n", - " if i == 3:\n", - " dy += 30\n", + "print(f\"Using {creation_time} as creation time\")\n", + "print(f\"Operating conditions are:\\n• Bias voltage: {bias_voltage}\\n• Memory cells: {mem_cells_db}\\n\"\n", + " f\"• Acquisition rate: {acq_rate}\\n• Gain setting: {gain_setting}\\n• Photon Energy: {photon_energy}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data processing ##" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agipd_corr = AgipdCorrections(max_cells, max_pulses,\n", + " h5_data_path=h5path,\n", + " h5_index_path=h5path_idx,\n", + " corr_bools=corr_bools)\n", + "\n", + "agipd_corr.baseline_corr_noise_threshold = -blc_noise_threshold\n", + "agipd_corr.hg_hard_threshold = hg_hard_threshold\n", + "agipd_corr.mg_hard_threshold = mg_hard_threshold\n", + "\n", + "agipd_corr.cm_dark_fraction = cm_dark_fraction\n", + "agipd_corr.cm_n_itr = cm_n_itr\n", + "agipd_corr.noisy_adc_threshold = noisy_adc_threshold\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Retrieve calibration constants to RAM\n", + "agipd_corr.allocate_constants(modules, (3, mem_cells_db, 512, 128))\n", "\n", - " elif i < 12:\n", - " dx = 80 - 50\n", - " if i == 8:\n", - " dy = 4*30 + 30 +50 -30\n", + "const_yaml = None\n", + "if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):\n", + " with open(f'{out_folder}/retrieved_constants.yml', \"r\") as f:\n", + " const_yaml = yaml.load(f.read(), Loader=yaml.FullLoader)\n", "\n", - " mx = 1\n", - " my = i % 8 +4\n", - " combined[:, my*128+dy:(my+1)*128+dy,\n", - " mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]\n", - " dy += 30\n", - " else:\n", - " dx = 100 - 50\n", - " if i == 11:\n", - " dy = 20\n", + "# retrive constants\n", + "def retrieve_constants(mod):\n", + " \"\"\"\n", + " Retrieve calibration constants and load them to shared memory\n", + " \n", + " Metadata for constants is taken from yml file or retrieved from the DB\n", + " \"\"\"\n", + " device = getattr(getattr(Detectors, dinstance), mod_name(mod))\n", + " err = ''\n", + " try:\n", + " # check if there is a yaml file in out_folder that has the device constants.\n", + " if const_yaml and device.device_name in const_yaml:\n", + " when = agipd_corr.initialize_from_yaml(const_yaml, mod, device)\n", + " else:\n", + " when = agipd_corr.initialize_from_db(cal_db_interface, creation_time, mem_cells_db, bias_voltage,\n", + " photon_energy, gain_setting, acq_rate, mod, device, False)\n", + " except Exception as e:\n", + " err = f\"Error: {e}\\nError traceback: {traceback.format_exc()}\"\n", + " when = None\n", + " return err, mod, when, device.device_name\n", "\n", - " mx = 1\n", - " my = i - 14\n", "\n", - " combined[:, my*128+dy:(my+1)*128+dy,\n", - " mx*512-dx:(mx+1)*512-dx] = np.rollaxis(d[i],2,1)[:,::-1,:]\n", - " dy += 30\n", - " except:\n", - " continue\n", - " \n", - " return combined" + "ts = perf_counter()\n", + "with Pool(processes=16) as pool:\n", + " const_out = pool.map(retrieve_constants, modules)\n", + "print(f\"Constants were loaded in {perf_counter()-ts:.01f}s\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-21T11:30:07.974174Z", - "start_time": "2019-02-21T11:30:07.914832Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "# set everything up filewise\n", - "mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)\n", - "mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf\n", - "MAX_PAR = min(MAX_PAR, total_sequences)" + "# allocate memory for images and hists\n", + "n_images_max = max_cells*256\n", + "data_shape = (n_images_max, 512, 128)\n", + "agipd_corr.allocate_images(data_shape, n_cores_files)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Processed Files ##" + "def batches(l, batch_size):\n", + " \"\"\"Group a list into batches of (up to) batch_size elements\"\"\"\n", + " start = 0\n", + " while start < len(l):\n", + " yield l[start:start + batch_size]\n", + " start += batch_size" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-21T11:30:08.870802Z", - "start_time": "2019-02-21T11:30:08.826285Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "import copy\n", - "from IPython.display import HTML, display, Markdown, Latex\n", - "import tabulate\n", - "print(f\"Processing a total of {total_sequences} sequence files in chunks of {MAX_PAR}\")\n", - "table = []\n", - "mfc = copy.copy(mapped_files)\n", - "ti = 0\n", - "for k, files in mfc.items():\n", - " i = 0\n", - " while not files.empty():\n", - " f = files.get()\n", - " if i == 0:\n", - " table.append((ti, k, i, f))\n", - " else:\n", - " table.append((ti, \"\", i, f))\n", - " i += 1\n", - " ti += 1\n", - "md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=[\"#\", \"module\", \"# module\", \"file\"]))) \n", - "# restore the queue\n", - "mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)\n", - "mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf" + "def imagewise_chunks(img_counts):\n", + " \"\"\"Break up the loaded data into chunks of up to chunk_size\n", + " \n", + " Yields (file data slot, start index, stop index)\n", + " \"\"\"\n", + " for i_proc, n_img in enumerate(img_counts):\n", + " n_chunks = math.ceil(n_img / chunk_size)\n", + " for i in range(n_chunks):\n", + " yield i_proc, i * n_img // n_chunks, (i+1) * n_img // n_chunks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "step_timer = StepTimer()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2019-02-21T11:30:16.057429Z", - "start_time": "2019-02-21T11:30:10.082114Z" - }, - "scrolled": false + "scrolled": true }, "outputs": [], "source": [ - "import copy\n", - "from functools import partial\n", - "def correct_module(max_cells, index_v, CHUNK_SIZE, total_sequences, sequences_qm, \n", - " bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,\n", - " bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, chunk_size_idim,\n", - " special_opts, il_mode, loc, dinstance, force_hg_if_below, force_mg_if_below,\n", - " mask_noisy_adc, acq_rate, gain_setting, corr_bools, h5path, h5path_idx, const_yaml, inp):\n", - " print(\"foo\")\n", - " import numpy as np\n", - " import copy\n", - " import h5py\n", - " import socket\n", - " import traceback\n", - " from datetime import datetime\n", - " import re\n", - " import os\n", - " # from influxdb import InfluxDBClient\n", - " import subprocess\n", - " from iCalibrationDB import Constants, Conditions, Detectors\n", - " from cal_tools.enums import BadPixels\n", - " from cal_tools.agipdlib import AgipdCorrections, SnowResolution\n", - " from cal_tools.agipdlib import get_num_cells, get_acq_rate\n", - "\n", - " #client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')\n", - " \"\"\"\n", - " def create_influx_entry(run, proposal, qm, sequence, filesize, chunksize,\n", - " total_sequences, success, runtime, reason=\"\"):\n", - " return {\n", - " \"measurement\": \"run_correction\",\n", - " \"tags\": {\n", - " \"host\": socket.gethostname(),\n", - " \"run\": run,\n", - " \"proposal\": proposal,\n", - " \"mem_cells\": max_cells,\n", - " \"sequence\": sequence,\n", - " \"module\": qm,\n", - " \"filesize\": filesize,\n", - " \"chunksize\": chunksize,\n", - " \"total_sequences\": total_sequences,\n", - " \"sequences_module\": sequences_qm[qm],\n", - " \"detector\": \"AGIPD\",\n", - " \"instrument\": \"SPB\",\n", - " \n", - " },\n", - " \"time\": datetime.utcnow().isoformat(),\n", - " \"fields\": {\n", - " \"success\": success,\n", - " \"reason\": reason,\n", - " \"runtime\": runtime, \n", - " }\n", - " }\n", - " \"\"\"\n", - " \n", - " hists_signal_low = None\n", - " hists_signal_high = None\n", - " hists_gain_vs_signal = None\n", - " hists_dig_gain_vs_signal = None\n", - " hist_pulses = None\n", - " low_edges = None\n", - " high_edges = None\n", - " signal_edges = None\n", - " dig_signal_edges = None\n", - " gain_stats = 0\n", - " when = None\n", - " err = None\n", - "\n", - " try:\n", - " start = datetime.now()\n", - " success = True\n", - " reason = \"\"\n", - " filename, filename_out, channel, qm = inp\n", - "\n", - " if max_cells == 0:\n", - " max_cells = get_num_cells(filename, loc, channel)\n", - " if max_cells is None:\n", - " raise ValueError(f\"No raw images found for {qm}\")\n", - " else:\n", - " cells = np.arange(max_cells)\n", - " if acq_rate == 0.:\n", - " acq_rate = get_acq_rate(filename, loc, channel)\n", - " else:\n", - " acq_rate = None\n", - " if dbparms[2] == 0:\n", - " dbparms[2] = max_cells\n", - " if dbparms[5] == 0:\n", - " dbparms[5] = dbparms[2]\n", - "\n", - " print(f\"Set memory cells to {max_cells}\")\n", - " print(f\"Set acquistion rate cells to {acq_rate} MHz\")\n", - "\n", - " # AGIPD correction requires path without the leading \"/\"\n", - " if h5path[0] == '/':\n", - " h5path = h5path[1:]\n", - " if h5path_idx[0] == '/':\n", - " h5path_idx = h5path_idx[1:]\n", - "\n", - " infile = h5py.File(filename, \"r\", driver=\"core\")\n", - " outfile = h5py.File(filename_out, \"w\")\n", - "\n", - " try:\n", - " agipd_corr = AgipdCorrections(infile, outfile, max_cells, channel, max_pulses,\n", - " bins_gain_vs_signal, bins_signal_low_range,\n", - " bins_signal_high_range, bins_dig_gain_vs_signal,\n", - " chunk_size_idim=chunk_size_idim,\n", - " il_mode=il_mode, raw_fmt_version=index_v, \n", - " h5_data_path=h5path,\n", - " h5_index_path=h5path_idx,\n", - " cal_det_instance=dinstance, force_hg_if_below=force_hg_if_below,\n", - " force_mg_if_below=force_mg_if_below, mask_noisy_adc=mask_noisy_adc,\n", - " acquisition_rate=acq_rate, gain_setting=gain_setting,\n", - " corr_bools=corr_bools)\n", - " blc_noise_threshold, blc_hmatch, melt_snow = special_opts\n", - " if not corr_bools[\"only_offset\"]:\n", - " blc_hmatch = False\n", - " melt_snow = False\n", - " agipd_corr.baseline_corr_noise_threshold = blc_noise_threshold\n", - " agipd_corr.melt_snow = melt_snow\n", - " try:\n", - " agipd_corr.get_valid_image_idx()\n", - " except IOError:\n", - " return\n", - "\n", - " device = getattr(getattr(Detectors, dinstance), qm)\n", - "\n", - " if not nodb:\n", - " if const_yaml and device.device_name in const_yaml:\n", - " print(fileparms != \"\")\n", - " agipd_corr.initialize_from_yaml(const_yaml, qm,\n", - " only_dark=((fileparms != \"\")))\n", - " else:\n", - " when = agipd_corr.initialize_from_db(dbparms, qm,\n", - " only_dark=(fileparms != \"\"))\n", - "\n", - " if fileparms != \"\" and not corr_bools[\"only_offset\"]:\n", - " agipd_corr.initialize_from_file(fileparms, qm, with_dark=nodb)\n", - " print(\"Initialized constants\")\n", - "\n", - " for irange in agipd_corr.get_iteration_range():\n", - " agipd_corr.correct_agipd(irange)\n", - " print(\"Iterated\")\n", - " hists, edges = agipd_corr.get_histograms()\n", - " hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal, hist_pulses = hists\n", - " low_edges, high_edges, signal_edges, dig_signal_edges = edges\n", - " gain_stats = np.array(agipd_corr.gain_stats)\n", - " finally:\n", - " outfile.close()\n", - " infile.close()\n", - " print(\"Closed files\")\n", - " except Exception as e:\n", - " err = f\"Error: {e}\\nError traceback: {traceback.format_exc()}\"\n", - " print(err)\n", - " success = False\n", - " reason = \"Error\"\n", + "with Pool() as pool:\n", + " for file_batch in batches(file_list, n_cores_files):\n", + " # TODO: Move some printed output to logging or similar\n", + " print(f\"Processing next {len(file_batch)} files:\")\n", + " for file_name in file_batch:\n", + " print(\" \", file_name)\n", + " step_timer.start()\n", " \n", - " finally:\n", - " run = re.findall(r'.*r([0-9]{4}).*', filename)[0]\n", - " proposal = re.findall(r'.*p([0-9]{6}).*', filename)[0]\n", - " sequence = re.findall(r'.*S([0-9]{5}).*', filename)[0]\n", - " filesize = os.path.getsize(filename)\n", - " duration = (datetime.now()-start).total_seconds()\n", - " #influx = create_influx_entry(run, proposal, qm, sequence, filesize, CHUNK_SIZE, total_sequences, success, duration, reason)\n", - " #client.write_points([influx])\n", - " return (hists_signal_low, hists_signal_high, hists_gain_vs_signal, hists_dig_gain_vs_signal, hist_pulses,\n", - " low_edges, high_edges, signal_edges, dig_signal_edges, gain_stats, max_cells, acq_rate, when, qm, err)\n", - " \n", - "done = False\n", - "first_files = []\n", - "inp = []\n", - "left = total_sequences\n", - "\n", - "# Display Information about the selected pulses indices for correction.\n", - "pulses_lst = list(range(*max_pulses)) if not (len(max_pulses)==1 and max_pulses[0]==0) else max_pulses \n", - "\n", - "try:\n", - " if len(pulses_lst) > 1: \n", - " print(\"A range of {} pulse indices is selected: from {} to {} with a step of {}\"\n", - " .format(len(pulses_lst), pulses_lst[0] , pulses_lst[-1] + (pulses_lst[1] - pulses_lst[0]),\n", - " pulses_lst[1] - pulses_lst[0]))\n", - " else:\n", - " print(\"one pulse is selected: a pulse of idx {}\".format(pulses_lst[0]))\n", - "except Exception as e:\n", - " raise ValueError('max_pulses input Error: {}'.format(e))\n", - " \n", - "bins_gain_vs_signal = (100, 100)\n", - "bins_signal_low_range = 100\n", - "bins_signal_high_range = 100\n", - "bins_dig_gain_vs_signal = (100, 4)\n", - "\n", - "hists_gain_vs_signal = np.zeros((bins_gain_vs_signal), np.float64)\n", - "hists_dig_gain_vs_signal = np.zeros((bins_dig_gain_vs_signal), np.float64)\n", - "gain_stats = 0\n", - "\n", - "low_edges, high_edges, signal_edges, dig_signal_edges = None, None, None, None\n", - "dbparms = [cal_db_interface, creation_time, max_cells_db, bias_voltage,\n", - " photon_energy, max_cells_db_dark, cal_db_timeout]\n", - "\n", - "fileparms = calfile\n", - "\n", - "all_cells = []\n", - "whens = []\n", - "errors = []\n", - "const_yaml = None\n", - "\n", - "if os.path.isfile(f'{out_folder}/retrieved_constants.yml'):\n", - " with open(f'{out_folder}/retrieved_constants.yml', \"r\") as f:\n", - " const_yaml = yaml.load(f.read(), Loader=yaml.FullLoader)\n", - "\n", - "mod_dev = dict()\n", - "while not done:\n", - " dones = []\n", - " first = True\n", - " for i in modules:\n", - " qm = f\"Q{i//4+1}M{i%4+1}\"\n", - " if qm in mapped_files and not mapped_files[qm].empty():\n", - " fname_in = str(mapped_files[qm].get())\n", - " dones.append(mapped_files[qm].empty())\n", - " device = getattr(getattr(Detectors, dinstance), qm)\n", - " mod_dev[qm] = device.device_name\n", - " else:\n", - " print(f\"Skipping {qm}\")\n", - " first_files.append((None, None))\n", - " continue\n", - " fout = os.path.abspath(\"{}/{}\".format(out_folder, (os.path.split(fname_in)[-1]).replace(\"RAW\", \"CORR\")))\n", - " if first:\n", - " first_files.append((fname_in, fout))\n", + " img_counts = pool.starmap(agipd_corr.read_file, enumerate(file_batch))\n", + " step_timer.done_step('Load')\n", " \n", - " inp.append((fname_in, fout, i, qm))\n", - " first = False\n", - "\n", - " if len(inp) >= min(MAX_PAR, left):\n", - " print(f\"Running {len(inp)} tasks parallel\")\n", - " p = partial(correct_module, max_cells, index_v, CHUNK_SIZE, total_sequences,\n", - " sequences_qm, bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range,\n", - " bins_dig_gain_vs_signal, max_pulses, dbparms, fileparms, nodb, chunk_size_idim,\n", - " special_opts, il_mode, karabo_id, dinstance, force_hg_if_below, force_mg_if_below,\n", - " mask_noisy_adc, acq_rate, gain_setting, corr_bools, h5path, h5path_idx, const_yaml)\n", - " r = view.map_sync(p, inp)\n", - " #r = list(map(p, inp))\n", - "\n", - " inp = []\n", - " left -= MAX_PAR\n", - "\n", - " init_hist = False\n", - " for rr in r:\n", - " if rr is not None:\n", - " hl, hh, hg, hdg, hp, low_edges, high_edges, signal_edges, dig_signal_edges, gs, cells, acq_rate, when, qm, err = rr\n", - " all_cells.append(cells)\n", - " whens.append((qm, when))\n", - " errors.append(err)\n", - " # Validate hp to be int not None.\n", - " if not init_hist and hp is not None:\n", - " hists_signal_low = np.zeros((bins_signal_low_range, hp), np.float64)\n", - " hists_signal_high = np.zeros((bins_signal_low_range, hp), np.float64)\n", - " init_hist = True\n", - " if hl is not None: # any one being None will also make the others None\n", - " hists_signal_low += hl.astype(np.float64)\n", - " hists_signal_high += hh.astype(np.float64)\n", - " hists_gain_vs_signal += hg.astype(np.float64)\n", - " hists_dig_gain_vs_signal += hdg.astype(np.float64)\n", - " gain_stats += gs\n", - " done = all(dones)\n", - "\n", - "print(f\"Corrected raw data of {cells} memory cells and {acq_rate} MHz acquisition rate\")" + " # Evaluate zero-data-std mask\n", + " pool.starmap(agipd_corr.mask_zero_std, itertools.product(\n", + " range(len(file_batch)), np.array_split(np.arange(agipd_corr.max_cells), n_cores_correct)\n", + " ))\n", + " step_timer.done_step('Mask 0 std')\n", + " \n", + " # Perform image-wise correction\n", + " pool.starmap(agipd_corr.correct_agipd, imagewise_chunks(img_counts))\n", + " step_timer.done_step(\"Image-wise correction\")\n", + " \n", + " # Perform cross-file correction parallel over asics\n", + " pool.starmap(agipd_corr.cm_correction, itertools.product(\n", + " range(len(file_batch)), range(16) # 16 ASICs per module\n", + " ))\n", + " step_timer.done_step(\"Common-mode correction\")\n", + " \n", + " \n", + " # Save corrected data\n", + " pool.starmap(agipd_corr.write_file, [\n", + " (i_proc, file_name, os.path.join(out_folder, os.path.basename(file_name).replace(\"RAW\", \"CORR\")))\n", + " for i_proc, file_name in enumerate(file_batch)\n", + " ])\n", + " step_timer.done_step(\"Save\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Correction of {len(file_list)} files is finished\")\n", + "print(f\"Total processing time {step_timer.timespan():.01f} s\")\n", + "print(f\"Timing summary per batch of {n_cores_files} files:\")\n", + "step_timer.print_summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "scrolled": false + "scrolled": true }, "outputs": [], "source": [ @@ -644,12 +553,13 @@ "\n", "to_store = []\n", "line = []\n", - "for i, (qm, when) in enumerate(whens):\n", + "for i, (error, modno, when, mod_dev) in enumerate(const_out):\n", + " qm = mod_name(modno)\n", " # expose errors while applying correction\n", - " if errors[i]:\n", - " print(\"Error: {}\".format(errors[i]) )\n", + " if error:\n", + " print(\"Error: {}\".format(error) )\n", "\n", - " if not const_yaml or mod_dev[qm] not in const_yaml:\n", + " if not const_yaml or mod_dev not in const_yaml:\n", " if fst_print:\n", " print(\"Constants are retrieved with creation time: \")\n", " fst_print = False\n", @@ -657,7 +567,7 @@ " line = [qm]\n", "\n", " # If correction is crashed\n", - " if not errors[i]:\n", + " if not error:\n", " print(f\"{qm}:\")\n", " for key, item in when.items():\n", " if hasattr(item, 'strftime'):\n", @@ -671,7 +581,7 @@ " if when and key in when and when[key]:\n", " line.append(when[key])\n", " else:\n", - " if errors[i] is not None:\n", + " if error is not None:\n", " line.append('Err')\n", " else:\n", " line.append('NA')\n", @@ -680,6 +590,7 @@ " to_store.append(line)\n", "\n", "seq = sequences[0] if sequences else 0\n", + "\n", "if len(to_store) > 0:\n", " with open(f\"{out_folder}/retrieved_constants_s{seq}.yml\",\"w\") as fyml:\n", " yaml.safe_dump({\"time-summary\": {f\"S{seq}\":to_store}}, fyml)" @@ -696,232 +607,236 @@ }, "outputs": [], "source": [ - "from mpl_toolkits.mplot3d import Axes3D\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib import cm\n", - "from matplotlib.ticker import LinearLocator, FormatStrFormatter\n", - "import numpy as np\n", - "%matplotlib inline\n", "def do_3d_plot(data, edges, x_axis, y_axis):\n", - " fig = plt.figure(figsize=(10,10))\n", + " fig = plt.figure(figsize=(10, 10))\n", " ax = fig.gca(projection='3d')\n", "\n", " # Make data.\n", " X = edges[0][:-1]\n", " Y = edges[1][:-1]\n", " X, Y = np.meshgrid(X, Y)\n", - " \n", " Z = data.T\n", "\n", " # Plot the surface.\n", - " surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,\n", + " surf = ax.plot_surface(X, Y, Z, cmap=colormap.coolwarm,\n", " linewidth=0, antialiased=False)\n", " ax.set_xlabel(x_axis)\n", " ax.set_ylabel(y_axis)\n", - " ax.set_zlabel(\"Counts\")" + " ax.set_zlabel(\"Counts\")\n", + "\n", + "\n", + "def do_2d_plot(data, edges, y_axis, x_axis):\n", + " fig = plt.figure(figsize=(10, 10))\n", + " ax = fig.add_subplot(111)\n", + " extent = [np.min(edges[1]), np.max(edges[1]),\n", + " np.min(edges[0]), np.max(edges[0])]\n", + " im = ax.imshow(data[::-1, :], extent=extent, aspect=\"auto\",\n", + " norm=LogNorm(vmin=1, vmax=max(10, np.max(data))))\n", + " ax.set_xlabel(x_axis)\n", + " ax.set_ylabel(y_axis)\n", + " cb = fig.colorbar(im)\n", + " cb.set_label(\"Counts\")" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Signal vs. Analogue Gain ##\n", - "\n", - "The following plot shows plots signal vs. gain for the first 128 images." + "def get_trains_data(run_folder, source, include, tid=None, path='*/DET/*'):\n", + " \"\"\"\n", + " Load single train for all module\n", + " \n", + " :param run_folder: Path to folder with data\n", + " :param source: Data source to be loaded\n", + " :param include: Inset of file name to be considered \n", + " :param tid: Train Id to be loaded. First train is considered if None is given\n", + " :param path: Path to find image data inside h5 file\n", + " \n", + " \"\"\"\n", + " run_data = RunDirectory(run_folder, include)\n", + " if tid:\n", + " tid, data = run_data.select('*/DET/*', source).train_from_id(tid)\n", + " return tid, stack_detector_data(data, source)\n", + " else:\n", + " for tid, data in run_data.select('*/DET/*', source).trains(require_all=True):\n", + " return tid, stack_detector_data(data, source)\n", + " return None, None" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-18T17:28:52.857960Z", - "start_time": "2019-02-18T17:28:51.767217Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "if signal_edges is not None:\n", - " do_3d_plot(hists_gain_vs_signal, signal_edges, \"Signal (ADU)\", \"Analogue gain (ADU)\")" + "geom = AGIPD_1MGeometry.from_quad_positions(quad_pos=[\n", + " (-525, 625),\n", + " (-550, -10),\n", + " (520, -160),\n", + " (542.5, 475),\n", + "])" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-18T17:28:53.690522Z", - "start_time": "2019-02-18T17:28:52.860143Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "def do_2d_plot(data, edges, y_axis, x_axis):\n", - " from matplotlib.colors import LogNorm\n", - " fig = plt.figure(figsize=(10,10))\n", - " ax = fig.add_subplot(111)\n", - " extent = [np.min(edges[1]), np.max(edges[1]),np.min(edges[0]), np.max(edges[0])]\n", - " im = ax.imshow(data[::-1,:], extent=extent, aspect=\"auto\", norm=LogNorm(vmin=1, vmax=np.max(data)))\n", - " ax.set_xlabel(x_axis)\n", - " ax.set_ylabel(y_axis)\n", - " cb = fig.colorbar(im)\n", - " cb.set_label(\"Counts\")\n", - " \n", - "if signal_edges is not None:\n", - " do_2d_plot(hists_gain_vs_signal, signal_edges, \"Signal (ADU)\", \"Gain Value (ADU)\")" + "include = '*S00000*' if sequences is None else f'*S{sequences[0]:05d}*'\n", + "tid, corrected = get_trains_data(f'{out_folder}/', 'image.data', include)\n", + "_, gains = get_trains_data(f'{out_folder}/', 'image.gain', include, tid)\n", + "_, mask = get_trains_data(f'{out_folder}/', 'image.mask', include, tid)\n", + "_, blshift = get_trains_data(f'{out_folder}/', 'image.blShift', include, tid)\n", + "_, cellId = get_trains_data(f'{out_folder}/', 'image.cellId', include, tid)\n", + "_, pulseId = get_trains_data(f'{out_folder}/', 'image.pulseId', include, tid)\n", + "_, raw = get_trains_data(f'{in_folder}/r{run:04d}/', 'image.data', include, tid)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(Markdown(f'## Preview and statistics for {gains.shape[0]} images of the train {tid} ##\\n'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Signal vs. Analogue Gain ###" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hist, bins_x, bins_y = calgs.histogram2d(raw[:,0,...].flatten().astype(np.float32),\n", + " raw[:,1,...].flatten().astype(np.float32),\n", + " bins=(100, 100),\n", + " range=[[4000, 8192], [4000, 8192]])\n", + "do_2d_plot(hist, (bins_x, bins_y), \"Signal (ADU)\", \"Analogue gain (ADU)\")\n", + "do_3d_plot(hist, (bins_x, bins_y), \"Signal (ADU)\", \"Analogue gain (ADU)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Signal vs. Digitized Gain ##\n", + "### Signal vs. Digitized Gain ###\n", "\n", - "The following plot shows plots signal vs. digitized gain for the first 128 images." + "The following plot shows plots signal vs. digitized gain" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-18T17:28:54.370559Z", - "start_time": "2019-02-18T17:28:53.691959Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "if dig_signal_edges is not None:\n", - " do_2d_plot(hists_dig_gain_vs_signal, dig_signal_edges, \"Signal (ADU)\", \"Gain Bit Value\")" + "hist, bins_x, bins_y = calgs.histogram2d(corrected.flatten().astype(np.float32),\n", + " gains.flatten().astype(np.float32), bins=(100, 3),\n", + " range=[[-50, 8192], [0, 3]])\n", + "do_2d_plot(hist, (bins_x, bins_y), \"Signal (ADU)\", \"Gain bit value\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-18T17:31:51.668096Z", - "start_time": "2019-02-18T17:31:51.529158Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots()\n", - "if isinstance(gain_stats, np.ndarray):\n", - " ax.pie(gain_stats, labels=[\"high\", \"medium\", \"low\"], autopct='%1.2f%%',\n", - " shadow=True, startangle=27)\n", - " a = ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle." + "print(f\"Gain statistics in %\")\n", + "table = [[f'{gains[gains==0].size/gains.size*100:.02f}',\n", + " f'{gains[gains==1].size/gains.size*100:.03f}',\n", + " f'{gains[gains==2].size/gains.size*100:.03f}']] \n", + "md = display(Latex(tabulate.tabulate(table, tablefmt='latex',\n", + " headers=[\"High\", \"Medium\", \"Low\"])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Mean Intensity per Pulse ##\n", - "\n", - "The following plots show the mean signal for each pulse in a detailed and expanded intensity region." + "### Intensity per Pulse ###" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2019-02-18T17:28:57.327702Z", - "start_time": "2019-02-18T17:28:54.377061Z" - }, "scrolled": false }, "outputs": [], "source": [ - "if low_edges is not None:\n", - " do_3d_plot(hists_signal_low, low_edges, \"Signal (ADU)\", \"Pulse id\")\n", - " do_2d_plot(hists_signal_low, low_edges, \"Signal (ADU)\", \"Pulse id\")\n", - "if high_edges is not None:\n", - " do_3d_plot(hists_signal_high, high_edges, \"Signal (ADU)\", \"Pulse id\")\n", - " do_2d_plot(hists_signal_high, high_edges, \"Signal (ADU)\", \"Pulse id\")" + "pulse_range = [np.min(pulseId[pulseId>=0]), np.max(pulseId[pulseId>=0])]\n", + "\n", + "mean_data = np.nanmean(corrected, axis=(2, 3))\n", + "hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),\n", + " pulseId.flatten().astype(np.float32),\n", + " bins=(100, int(pulse_range[1])),\n", + " range=[[-50, 1000], pulse_range])\n", + "\n", + "do_2d_plot(hist, (bins_x, bins_y), \"Signal (ADU)\", \"Pulse id\")\n", + "do_3d_plot(hist, (bins_x, bins_y), \"Signal (ADU)\", \"Pulse id\")\n", + "\n", + "hist, bins_x, bins_y = calgs.histogram2d(mean_data.flatten().astype(np.float32),\n", + " pulseId.flatten().astype(np.float32),\n", + " bins=(100, int(pulse_range[1])),\n", + " range=[[-50, 200000], pulse_range])\n", + "\n", + "do_2d_plot(hist, (bins_x, bins_y), \"Signal (ADU)\", \"Pulse id\")\n", + "do_3d_plot(hist, (bins_x, bins_y), \"Signal (ADU)\", \"Pulse id\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Baseline shift ###\n", + "\n", + "Estimated base-line shift with respect to the total ADU counts of corrected image." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-18T17:29:20.634480Z", - "start_time": "2019-02-18T17:28:57.329231Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "\n", - "corrected = []\n", - "raw = []\n", - "gains = []\n", - "mask = []\n", - "cell_fac = 1\n", - "first_idx = 0\n", - "last_idx = cell_fac*176+first_idx\n", - "pulse_ids = []\n", - "train_ids = []\n", - "for i, ff in enumerate(first_files[:16]):\n", - " try:\n", - "\n", - " rf, cf = ff\n", - " if rf is None:\n", - " \n", - " raise Exception(\"File not present\")\n", - " infile = h5py.File(rf, \"r\")\n", - " datapath = h5path.format(i)\n", - " raw.append(np.array(infile[f\"{datapath}/image/data\"][first_idx:last_idx,0,...]))\n", - " infile.close()\n", - " \n", - " infile = h5py.File(cf, \"r\")\n", - " corrected.append(np.array(infile[f\"{datapath}/image/data\"][first_idx:last_idx,...]))\n", - " gains.append(np.array(infile[f\"{datapath}/image/gain\"][first_idx:last_idx,...]))\n", - " mask.append(np.array(infile[f\"{datapath}/image/mask\"][first_idx:last_idx,...]))\n", - " pulse_ids.append(np.squeeze(infile[f\"{datapath}/image/pulseId\"][first_idx:last_idx,...]))\n", - " train_ids.append(np.squeeze(infile[f\"{datapath}/image/trainId\"][first_idx:last_idx,...]))\n", - " infile.close()\n", - " \n", - " except Exception as e:\n", - " print(e)\n", - " corrected.append(np.zeros((last_idx-first_idx, 512, 128)))\n", - " gains.append(np.zeros((last_idx-first_idx, 512, 128)))\n", - " mask.append(np.zeros((last_idx-first_idx, 512, 128)))\n", - " raw.append(np.zeros((last_idx-first_idx, 512, 128)))\n", - " " + "fig = plt.figure(figsize=(20, 10))\n", + "ax = fig.add_subplot(111)\n", + "h = ax.hist(blshift.flatten(), bins=100, log=True)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-18T17:29:27.025667Z", - "start_time": "2019-02-18T17:29:20.642029Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "domask = False\n", - "if domask:\n", - " for i, c in enumerate(corrected):\n", - " c[mask[i] != 0] = 0\n", - " #c[pats[i] < 200] = 0\n", - " #c[np.abs(pats[i]) == 1000] = np.abs(c[np.abs(pats[i]) == 1000])\n", - "combined = combine_stack(corrected, last_idx-first_idx)\n", - "combined_raw = combine_stack(raw, last_idx-first_idx)\n", - "combined_g = combine_stack(gains, last_idx-first_idx)\n", - "combined_mask = combine_stack(mask, last_idx-first_idx)" + "fig = plt.figure(figsize=(10, 10))\n", + "corrected_ave = np.nansum(corrected, axis=(2, 3))\n", + "plt.scatter(corrected_ave.flatten()/10**6, blshift.flatten(), s=0.9)\n", + "\n", + "plt.xlabel('Illuminated corrected [MADU] ')\n", + "_ = plt.ylabel('Estimated baseline shift [ADU]')" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "### Mean RAW Preview ###\n", - "\n", - "The per pixel mean of the first 128 images of the RAW data" + "display(Markdown('### Raw preview ###\\n'))\n", + "display(Markdown(f'Mean over images of the RAW data\\n'))" ] }, { @@ -935,23 +850,34 @@ }, "outputs": [], "source": [ - "%matplotlib inline\n", - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "im = ax.imshow(np.mean(combined_raw[:,:1300,400:1600],axis=0),\n", - " vmin=min(0.75*np.median(combined_raw[combined_raw > 0]), 4000),\n", - " vmax=max(1.5*np.median(combined_raw[combined_raw > 0]), 7000), cmap=\"jet\")\n", - "cb = fig.colorbar(im, ax=ax)\n", - "\n" + "data = np.mean(raw[:, 0, ...], axis=0)\n", + "vmin, vmax = get_range(data, 5)\n", + "ax = geom.plot_data_fast(data, ax=ax, cmap=\"jet\", vmin=vmin, vmax=vmax)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "### Single Shot Preview ###\n", - "\n", - "A single shot image from cell 12 of the first train" + "display(Markdown(f'Single shot of the RAW data from cell {np.max(cellId[cell_id_preview])} \\n'))\n", + "fig = plt.figure(figsize=(20, 10))\n", + "ax = fig.add_subplot(111)\n", + "vmin, vmax = get_range(raw[cell_id_preview, 0, ...], 5)\n", + "ax = geom.plot_data_fast(raw[cell_id_preview, 0, ...], ax=ax, cmap=\"jet\", vmin=vmin, vmax=vmax)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(Markdown('### Corrected preview ###\\n'))\n", + "display(Markdown(f'A single shot image from cell {np.max(cellId[cell_id_preview])} \\n'))" ] }, { @@ -965,13 +891,10 @@ }, "outputs": [], "source": [ - "\n", - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "dim = combined[1,:1300,400:1600]\n", - "\n", - "im = ax.imshow(dim, vmin=-0, vmax=max(20*np.median(dim[dim > 0]), 100), cmap=\"jet\", interpolation=\"nearest\")\n", - "cb = fig.colorbar(im, ax=ax)" + "vmin, vmax = get_range(corrected[cell_id_preview], 7)\n", + "ax = geom.plot_data_fast(corrected[cell_id_preview], ax=ax, cmap=\"jet\", vmin=vmin, vmax=vmax)" ] }, { @@ -985,18 +908,20 @@ }, "outputs": [], "source": [ - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "h = ax.hist(dim.flatten(), bins=1000, range=(0, 2000))\n" + "h = ax.hist(corrected[cell_id_preview].flatten(), bins=1000, range=(-50, 2000), log=True)\n", + "_ = plt.xlabel('[ADU]')" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "### Mean CORRECTED Preview ###\n", - "\n", - "The per pixel mean of the first 128 images of the CORRECTED data" + "display(Markdown('### Mean CORRECTED Preview ###\\n'))\n", + "display(Markdown(f'A mean across one train \\n'))" ] }, { @@ -1010,13 +935,11 @@ }, "outputs": [], "source": [ - "\n", - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "im = ax.imshow(np.mean(combined[:,:1300,400:1600], axis=0), vmin=-50,\n", - " vmax=max(100*np.median(combined[combined > 0]), 100), cmap=\"jet\", interpolation=\"nearest\")\n", - "cb = fig.colorbar(im, ax=ax)\n", - "\n" + "data = np.mean(corrected, axis=0)\n", + "vmin, vmax = get_range(data, 5)\n", + "ax = geom.plot_data_fast(data, ax=ax, cmap=\"jet\", vmin=vmin, vmax=vmax)" ] }, { @@ -1030,19 +953,28 @@ }, "outputs": [], "source": [ - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "combined[combined <= 0] = 0\n", - "h = ax.hist(combined.flatten(), bins=1000, range=(-50, 1000), log=True)\n" + "h = ax.hist(corrected.flatten(), bins=1200,\n", + " range=(-200, 20000), histtype='step', log=True, label = 'All')\n", + "_ = ax.hist(corrected[gains == 0].flatten(), bins=1200, range=(-200, 20000),\n", + " alpha=0.5, log=True, label='High gain', color='green')\n", + "_ = ax.hist(corrected[gains == 1].flatten(), bins=1200, range=(-200, 20000),\n", + " alpha=0.5, log=True, label='Medium gain', color='red')\n", + "_ = ax.hist(corrected[gains == 2].flatten(), bins=1200,\n", + " range=(-200, 20000), alpha=0.5, log=True, label='Low gain', color='yellow')\n", + "_ = ax.legend()\n", + "_ = plt.xlabel('[ADU]')" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "### Maximum GAIN Preview ###\n", - "\n", - "The per pixel maximum of the first 128 images of the digitized GAIN data" + "display(Markdown('### Maximum GAIN Preview ###\\n'))\n", + "display(Markdown(f'The per pixel maximum across one train for the digitized gain'))" ] }, { @@ -1056,12 +988,10 @@ }, "outputs": [], "source": [ - "\n", - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "im = ax.imshow(np.max(combined_g[1,:1300,400:1600][None,...], axis=0), vmin=0,\n", - " vmax=3, cmap=\"jet\")\n", - "cb = fig.colorbar(im, ax=ax)\n" + "ax = geom.plot_data_fast(np.max(gains, axis=0), ax=ax,\n", + " cmap=\"jet\", vmin=-1, vmax=3)" ] }, { @@ -1085,22 +1015,21 @@ }, "outputs": [], "source": [ - "from cal_tools.enums import BadPixels\n", - "from IPython.display import HTML, display, Markdown, Latex\n", - "import tabulate\n", "table = []\n", "for item in BadPixels:\n", " table.append((item.name, \"{:016b}\".format(item.value)))\n", - "md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=[\"Bad pixel type\", \"Bit mask\"])))" + "md = display(Latex(tabulate.tabulate(table, tablefmt='latex',\n", + " headers=[\"Bad pixel type\", \"Bit mask\"])))" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "### Single Shot Bad Pixels ###\n", - "\n", - "A single shot bad pixel map from cell 4 of the first train" + "display(Markdown(f'### Single Shot Bad Pixels ### \\n'))\n", + "display(Markdown(f'A single shot bad pixel map from cell {np.max(cellId[cell_id_preview])} \\n'))" ] }, { @@ -1114,11 +1043,9 @@ }, "outputs": [], "source": [ - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "im = ax.imshow(np.log2(combined_mask[10,:1300,400:1600]), vmin=0,\n", - " vmax=32, cmap=\"jet\")\n", - "cb = fig.colorbar(im, ax=ax)" + "ax = geom.plot_data_fast(np.log2(mask[cell_id_preview]), ax=ax, vmin=0, vmax=32, cmap=\"jet\")" ] }, { @@ -1127,7 +1054,7 @@ "collapsed": true }, "source": [ - "### Full Train Bad Pixels ###" + "### Percentage of Bad Pixels across one train ###" ] }, { @@ -1141,36 +1068,17 @@ }, "outputs": [], "source": [ - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "im = ax.imshow(np.log2(np.max(combined_mask[:,:1300,400:1600], axis=0)), vmin=0,\n", - " vmax=32, cmap=\"jet\")\n", - "cb = fig.colorbar(im, ax=ax)" + "ax = geom.plot_data_fast(np.mean(mask>0, axis=0),\n", + " vmin=0, ax=ax, vmax=1, cmap=\"jet\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Full Train Bad Pixels - Only Dark Char. Related ###" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2019-02-18T17:29:53.662423Z", - "start_time": "2019-02-18T17:29:51.688376Z" - } - }, - "outputs": [], - "source": [ - "fig = plt.figure(figsize=(20,10))\n", - "ax = fig.add_subplot(111)\n", - "im = ax.imshow(np.max((combined_mask.astype(np.uint32)[:,:1300,400:1600] & BadPixels.NOISY_ADC.value) != 0, axis=0), vmin=0,\n", - " vmax=1, cmap=\"jet\")\n", - "cb = fig.colorbar(im, ax=ax)" + "### Percentage of Bad Pixels across one train. Only Dark Related ###" ] }, { @@ -1184,14 +1092,12 @@ }, "outputs": [], "source": [ - "from cal_tools.enums import BadPixels\n", - "fig = plt.figure(figsize=(20,10))\n", + "fig = plt.figure(figsize=(20, 10))\n", "ax = fig.add_subplot(111)\n", - "cm = combined_mask[:,:1300,400:1600]\n", + "cm = np.copy(mask)\n", "cm[cm > BadPixels.NO_DARK_DATA.value] = 0\n", - "im = ax.imshow(np.log2(np.max(cm, axis=0)), vmin=0,\n", - " vmax=32, cmap=\"jet\")\n", - "cb = fig.colorbar(im, ax=ax)" + "ax = geom.plot_data_fast(np.mean(cm>0, axis=0),\n", + " vmin=0, ax=ax, vmax=1, cmap=\"jet\")" ] } ], diff --git a/requirements.txt b/requirements.txt index e428b6d8836185ab1bc549a6a019867763e85a09..d8bd3e2677a2d766eb182acaedefe0043a243424 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ git+file:///gpfs/exfel/sw/calsoft/git/pyDetLib@2.5.3-2.7.0#subdirectory=lib astcheck == 0.2.5 astsearch == 0.1.3 dill == 0.3.0 +extra_data == 1.2.0 extra_geom == 0.8.0 fabio == 0.9.0 gitpython == 3.1.0 @@ -28,6 +29,7 @@ python-dateutil == 2.8.1 pyyaml == 5.3 pyzmq == 19.0.0 scikit-learn == 0.22.2.post1 +sharedmem == 0.3.7 sphinx == 1.4.5 tabulate == 0.8.6 unittest-xml-reporting == 3.0.2 diff --git a/setup.py b/setup.py index b93537e269853e03be6300828022df7f61c1efbd..8213e937d6d08a795d7e9ed466653a7df4ec43c3 100644 --- a/setup.py +++ b/setup.py @@ -2,8 +2,18 @@ from setuptools import setup from setuptools.command.install import install from subprocess import check_call, check_output from distutils.command.build import build + +from Cython.Distutils import build_ext +from distutils.extension import Extension import sys +import numpy +extensions = [Extension("cal_tools.cython.agipdalgs", + ['cal_tools/cython/agipdalgs.pyx'], + include_dirs=[numpy.get_include()], + extra_compile_args=['-fopenmp', '-march=native'], + extra_link_args=['-fopenmp'], ), + ] class PostInstallCommand(install): """Post-installation for installation mode.""" @@ -61,6 +71,7 @@ setup( cmdclass={ 'build' : PreInstallCommand, 'install': PostInstallCommand, + 'build_ext': build_ext }, url='', license='(c) European XFEL GmbH 2018', @@ -72,6 +83,7 @@ setup( 'xfel-calibrate = xfel_calibrate.calibrate:run', ], }, + ext_modules=extensions )