diff --git a/src/calng/agipd_gpu.py b/src/calng/agipd_gpu.py index 110939ec8509c5ddc322081423bc83b4a7537780..1d8d2d150ce3e22ac32f4166b42411fba1b1ba31 100644 --- a/src/calng/agipd_gpu.py +++ b/src/calng/agipd_gpu.py @@ -41,7 +41,7 @@ class AgipdGpuRunner(base_gpu.BaseGpuRunner): ) self.gain_map_gpu = cupy.empty(self.processed_shape, dtype=np.uint8) - self.map_shape = (self.pixels_x, self.pixels_y, self.constant_memory_cells) + self.map_shape = (self.constant_memory_cells, self.pixels_x, self.pixels_y) self.gm_map_shape = self.map_shape + (3,) # for gain-mapped constants self.threshold_map_shape = self.map_shape + (2,) # constants @@ -50,8 +50,7 @@ class AgipdGpuRunner(base_gpu.BaseGpuRunner): ) self.offset_map_gpu = cupy.zeros(self.gm_map_shape, dtype=np.float32) self.rel_gain_pc_map_gpu = cupy.ones(self.gm_map_shape, dtype=np.float32) - # optional extra offset for medium gain - self.md_additional_offset_gpu = cupy.zeros(1, dtype=np.float32) + self.md_additional_offset_gpu = cupy.zeros(self.map_shape, dtype=np.float32) self.rel_gain_xray_map_gpu = cupy.ones(self.map_shape, dtype=np.float32) self.badpixel_map_gpu = cupy.zeros(self.map_shape, dtype=np.uint32) @@ -59,33 +58,36 @@ class AgipdGpuRunner(base_gpu.BaseGpuRunner): def load_thresholds(self, threshold_map): # shape: y, x, memory cell, threshold 0 / threshold 1 / 3 gain values - # TODO: do we need the gain values for anything? - to_set = np.transpose(threshold_map[..., :2], (1, 0, 2, 3)).astype(np.float32) + # TODO: do we need the gain values (in the constant) for anything? self.gain_thresholds_gpu.set( - to_set + np.transpose(threshold_map[..., :2], (2, 1, 0, 3)).astype(np.float32) ) def load_offset_map(self, offset_map): # shape: y, x, memory cell, gain stage self.offset_map_gpu.set( - np.transpose(offset_map, (1, 0, 2, 3)).astype(np.float32) + np.transpose(offset_map, (2, 1, 0, 3)).astype(np.float32) ) def load_rel_gain_pc_map(self, slopes_pc_map): # pc has funny shape (11, 352, 128, 512) from file # this is (fi, memory cell, y, x) + slopes_pc_map = slopes_pc_map.astype(np.float32) pc_high_m = slopes_pc_map[0] pc_high_I = slopes_pc_map[1] pc_med_m = slopes_pc_map[3] pc_med_I = slopes_pc_map[4] frac_high_med = pc_high_m / pc_med_m # TODO: verify formula - md_additional_offset = pc_high_I - pc_med_I * frac_high_med - self.rel_gain_pc_map_gpu[..., 0] = 1 # rel xray gain can come after - self.rel_gain_pc_map_gpu[..., 1] = self.rel_gain_pc_map[..., 0] * frac_high_med - self.rel_gain_pc_map_gpu[..., 2] = self.rel_gain_pc_map[..., 1] * 4.48 + md_additional_offset = (pc_high_I - pc_med_I * frac_high_med).astype(np.float32) + rel_gain_map = np.ones( + (3, self.constant_memory_cells, self.pixels_y, self.pixels_x), dtype=np.float32 + ) + rel_gain_map[1] = rel_gain_map[0] * frac_high_med + rel_gain_map[2] = rel_gain_map[1] * 4.48 # TODO: enable overriding this based on user input - self.md_additional_offset_gpu.set(md_additional_offset) + self.rel_gain_pc_map_gpu.set(np.transpose(rel_gain_map, (1, 3, 2, 0))) + self.md_additional_offset_gpu.set(np.transpose(md_additional_offset, (0, 2, 1))) def load_rel_gain_ff_map(self, slopes_ff_map): # constant shape: y, x, memory cell @@ -93,7 +95,7 @@ class AgipdGpuRunner(base_gpu.BaseGpuRunner): # old format, is per pixel only raise NotImplementedError("Old slopes FF map format") self.rel_gain_xray_map_gpu.set( - np.transpose(slopes_ff_map, (1, 0, 2)).astype(np.float32) + np.transpose(slopes_ff_map).astype(np.float32) ) def correct(self, flags): @@ -108,6 +110,8 @@ class AgipdGpuRunner(base_gpu.BaseGpuRunner): (view of) said buffer as an ndarray. Keep in mind that the output buffers will get overwritten eventually (circular buffer). """ + if flags & CorrectionFlags.BLSHIFT: + raise NotImplementedError("Baseline shift not implemented yet") self.correction_kernel( self.full_grid, self.full_block, @@ -137,7 +141,6 @@ class AgipdGpuRunner(base_gpu.BaseGpuRunner): "output_data_dtype": utils.np_dtype_to_c_type(self.output_data_dtype), } ) - print(kernel_source) self.source_module = cupy.RawModule(code=kernel_source) self.correction_kernel = self.source_module.get_function("correct") self.casting_kernel = self.source_module.get_function("only_cast") diff --git a/src/calng/agipd_gpu_kernels.cpp b/src/calng/agipd_gpu_kernels.cpp index 1212e00b14b89118a92751bdd4727afc12ed72da..d11af9737db09bbcb6eaf8f62a0881c1388feefc 100644 --- a/src/calng/agipd_gpu_kernels.cpp +++ b/src/calng/agipd_gpu_kernels.cpp @@ -21,7 +21,7 @@ extern "C" { const float* threshold_map, const float* offset_map, const float* rel_gain_pc_map, - const float md_additional_offset, + const float* md_additional_offset, const float* rel_gain_xray_map, const unsigned int* bad_pixel_map, unsigned char* gain_map, @@ -60,17 +60,22 @@ extern "C" { const size_t output_stride_cell = output_stride_x * X; const size_t output_index = cell * output_stride_cell + x * output_stride_x + y * output_stride_y; - // threshold constant shape: x, y, cell, threshold (dim size 2) + // per-pixel only constant: cell, x, y + const size_t map_stride_y = 1; + const size_t map_stride_x = Y * map_stride_y; + const size_t map_stride_cell = X * map_stride_x; + + // threshold constant shape: cell, x, y, threshold (dim size 2) const size_t threshold_map_stride_threshold = 1; - const size_t threshold_map_stride_cell = 2 * threshold_map_stride_threshold; - const size_t threshold_map_stride_y = map_cells * threshold_map_stride_cell; - const size_t threshold_map_stride_x = Y * threshold_map_stride_y; + const size_t threshold_map_stride_y = 2 * threshold_map_stride_threshold; + const size_t threshold_map_stride_x = Y * threshold_map_stride_y; + const size_t threshold_map_stride_cell = X * threshold_map_stride_x; - // gain mapped constant shape: x, y, memory cell, gain_level (dim size 3) + // gain mapped constant shape: cell, x, y, gain_level (dim size 3) const size_t gm_map_stride_gain = 1; - const size_t gm_map_stride_cell = 3 * gm_map_stride_gain; - const size_t gm_map_stride_y = map_cells * gm_map_stride_cell; - const size_t gm_map_stride_x = Y * gm_map_stride_y; + const size_t gm_map_stride_y = 3 * gm_map_stride_gain; + const size_t gm_map_stride_x = Y * gm_map_stride_y; + const size_t gm_map_stride_cell = X * gm_map_stride_x; // TODO: handle multiple maps, multiple strides, multiple limits const size_t map_cell = cell_table[cell]; @@ -96,6 +101,10 @@ extern "C" { } } + const size_t map_index = map_cell * map_stride_cell + + y * map_stride_y + + x * map_stride_x; + const size_t gm_map_index = gain * gm_map_stride_gain + map_cell * gm_map_stride_cell + y * gm_map_stride_y + @@ -111,7 +120,7 @@ extern "C" { if (corr_flags & CORR_REL_GAIN_PC) { corrected *= rel_gain_pc_map[gm_map_index]; if (gain == 1) { - corrected += md_additional_offset; + corrected += md_additional_offset[map_index]; } } if (corr_flags & CORR_REL_GAIN_XRAY) { diff --git a/src/calng/base_gpu.py b/src/calng/base_gpu.py index 0c1d2cd763abcc9ef980f5f52e58e473c7686fa6..ef8d3d3cf5609fc066dc27d2c91dccf31954271d 100644 --- a/src/calng/base_gpu.py +++ b/src/calng/base_gpu.py @@ -65,7 +65,7 @@ class BaseGpuRunner: # reuse output arrays self.cell_table_gpu = cupy.empty(self.memory_cells, dtype=np.uint16) self.input_data_gpu = cupy.empty(self.input_shape, dtype=input_data_dtype) - self.processed_data_gpu = cupy.empty(self.input_shape, dtype=output_data_dtype) + self.processed_data_gpu = cupy.empty(self.processed_shape, dtype=output_data_dtype) self.reshaped_data_gpu = cupy.empty(self.output_shape, dtype=output_data_dtype) self.preview_raw = cupyx.empty_pinned(self.preview_shape, dtype=np.float32) self.preview_corrected = cupyx.empty_pinned( @@ -107,8 +107,6 @@ class BaseGpuRunner: return self.reshaped_data_gpu.get(out=out) def load_data(self, raw_data): - print(raw_data.shape) - print(self.input_data_gpu.shape) self.input_data_gpu.set(np.squeeze(raw_data)) def load_cell_table(self, cell_table): diff --git a/src/tests/test_agipd_kernels.py b/src/tests/test_agipd_kernels.py index 91015e6a0508bbdedbc4b35e487310c91dfeb871..4a5aa10db181738274ebbd2c21eb90e44c2b39c1 100644 --- a/src/tests/test_agipd_kernels.py +++ b/src/tests/test_agipd_kernels.py @@ -1,4 +1,6 @@ +import h5py import numpy as np +import pathlib import pytest from calng import agipd_gpu @@ -9,19 +11,24 @@ corr_dtype = np.float32 pixels_x = 512 pixels_y = 128 memory_cells = 352 -offset_map = ( - np.random.random(size=(pixels_x, pixels_y, memory_cells)).astype(corr_dtype) * 20 -) -cell_table = np.arange(memory_cells, dtype=np.uint16) -np.random.shuffle(cell_table) + raw_data = np.random.randint( low=0, high=2000, size=(memory_cells, 2, pixels_x, pixels_y), dtype=input_dtype ) +image_data = raw_data[:, 0] +raw_gain = raw_data[:, 1] +cell_table = np.arange(memory_cells, dtype=np.uint16) +np.random.shuffle(cell_table) + +caldb_store = pathlib.Path("/gpfs/exfel/d/cal/caldb_store/xfel/cal") +caldb_prefix = caldb_store / "agipd-type/agipd_siv1_agipdv11_m305" -thresholds = np.random.random(size=(pixels_y, pixels_x, memory_cells)) * 1000 -thresholds = np.stack((thresholds, thresholds*2), axis=3).astype(np.float32) -gm_const_zeros = np.zeros((pixels_x, pixels_y, memory_cells, 3), dtype=np.float32) -gm_const_ones = np.ones((pixels_x, pixels_y, memory_cells, 3), dtype=np.float32) +with h5py.File(caldb_prefix / "cal.1619543695.4679213.h5", "r") as fd: + thresholds = np.array(fd["/AGIPD_SIV1_AGIPDV11_M305/ThresholdsDark/0/data"]) +with h5py.File(caldb_prefix / "cal.1619543664.1545036.h5", "r") as fd: + offset_map = np.array(fd["/AGIPD_SIV1_AGIPDV11_M305/Offset/0/data"]) +with h5py.File(caldb_prefix / "cal.1615377705.8904035.h5", "r") as fd: + slopes_pc_map = np.array(fd["/AGIPD_SIV1_AGIPDV11_M305/SlopesPC/0/data"]) kernel_runner = agipd_gpu.AgipdGpuRunner( pixels_x, @@ -32,29 +39,79 @@ kernel_runner = agipd_gpu.AgipdGpuRunner( output_data_dtype=output_dtype, ) + def thresholding_cpu(data, cell_table, thresholds): # get to memory_cell, x, y - raw_gain = data[:, 1, ...].astype(np.float32) + raw_gain = data[:, 1, ...].astype(corr_dtype) # get to threshold, memory_cell, x, y - thresholds = np.transpose(thresholds, (3, 2, 1, 0))[:, cell_table] + thresholds = np.transpose(thresholds)[:, cell_table] res = np.zeros((memory_cells, pixels_x, pixels_y), dtype=np.uint8) res[raw_gain > thresholds[0]] = 1 res[raw_gain > thresholds[1]] = 2 return res -kernel_runner.load_cell_table(cell_table) -kernel_runner.load_data(raw_data) -kernel_runner.load_thresholds(thresholds) -kernel_runner.correct(agipd_gpu.CorrectionFlags.THRESHOLD) -gpu_res = kernel_runner.gain_map_gpu.get() -print(np.sum(gpu_res, axis=(0,2))) +gain_map_cpu = thresholding_cpu(raw_data, cell_table, thresholds) + + +def corr_offset_cpu(data, cell_table, gain_map, offset): + image_data = data[:, 0].astype(corr_dtype) + offset = np.transpose(offset)[:, cell_table] + return (image_data - np.choose(gain_map, offset)).astype(output_dtype) + + +def corr_rel_gain_pc_cpu(data, cell_table, gain_map, slopes_pc): + slopes_pc = slopes_pc.astype(np.float32) + pc_high_m = slopes_pc[0] + pc_high_I = slopes_pc[1] + pc_med_m = slopes_pc[3] + pc_med_I = slopes_pc[4] + frac_high_med = pc_high_m / pc_med_m + md_additional_offset = pc_high_I - pc_med_I * frac_high_med + rel_gain_map = np.ones((3, pixels_x, pixels_y, memory_cells), dtype=np.float32) + rel_gain_map[0] = 1 # rel xray gain can come after + rel_gain_map[1] = rel_gain_map[0] * np.transpose(frac_high_med) + rel_gain_map[2] = rel_gain_map[1] * 4.48 + res = data[:, 0].astype(corr_dtype, copy=True) + res *= np.choose(gain_map, np.transpose(rel_gain_map, (0, 3, 1, 2))) + pixels_in_medium_gain = gain_map == 1 + res[pixels_in_medium_gain] += np.transpose(md_additional_offset, (0, 2, 1))[ + pixels_in_medium_gain + ] + return res -def test_only_thresholding(): + +def test_thresholding(): kernel_runner.load_cell_table(cell_table) kernel_runner.load_data(raw_data) kernel_runner.load_thresholds(thresholds) kernel_runner.correct(agipd_gpu.CorrectionFlags.THRESHOLD) gpu_res = kernel_runner.gain_map_gpu.get() - cpu_res = thresholding_cpu(raw_data, cell_table, thresholds) + assert np.allclose(gpu_res, gain_map_cpu) + + +def test_offset(): + kernel_runner.load_cell_table(cell_table) + kernel_runner.load_data(raw_data) + kernel_runner.load_thresholds(thresholds) + kernel_runner.load_offset_map(offset_map) + # have to do thresholding, otherwise all is treated as high gain + kernel_runner.correct( + agipd_gpu.CorrectionFlags.THRESHOLD | agipd_gpu.CorrectionFlags.OFFSET + ) + cpu_res = corr_offset_cpu(raw_data, cell_table, gain_map_cpu, offset_map) + gpu_res = kernel_runner.processed_data_gpu.get() + assert np.allclose(gpu_res, cpu_res) + + +def test_rel_gain_pc(): + kernel_runner.load_cell_table(cell_table) + kernel_runner.load_data(raw_data) + kernel_runner.load_thresholds(thresholds) + kernel_runner.load_rel_gain_pc_map(slopes_pc_map) + kernel_runner.correct( + agipd_gpu.CorrectionFlags.THRESHOLD | agipd_gpu.CorrectionFlags.REL_GAIN_PC + ) + cpu_res = corr_rel_gain_pc_cpu(raw_data, cell_table, gain_map_cpu, slopes_pc_map) + gpu_res = kernel_runner.processed_data_gpu.get() assert np.allclose(gpu_res, cpu_res)