David Hammer · fc816d34 · ede1decd · ede1decd · 67e778f3 · fc816d34
--- a/src/calng/kernels/agipd_gpu.cu 0 → 100644

+ 148

− 0
+++ b/src/calng/kernels/agipd_gpu.cu 0 → 100644

+ 148

− 0
+#include <cuda_fp16.h>
+
+{{corr_enum}}
+
+extern "C" {
+	/*
+	  Perform corrections; see agipd_gpu.CorrectionFlags
+	  Note that THRESHOLD and OFFSET should for any later corrections to make sense
+	  Will take cell_table into account when getting correction values
+	  Will convert from input dtype to float for correction
+	  Will convert to output dtype for output
+	*/
+	__global__ void correct(const {{input_data_dtype}}* data,
+	                        const unsigned short* cell_table,
+	                        const unsigned char corr_flags,
+	                        // default_gain can be 0, 1, or 2, and is relevant for fixed gain mode (no THRESHOLD)
+	                        const unsigned char default_gain,
+	                        const float* threshold_map,
+	                        const float* offset_map,
+	                        const float* rel_gain_pc_map,
+	                        const float* md_additional_offset,
+	                        const float* rel_gain_xray_map,
+	                        const float g_gain_value,
+	                        const unsigned int* bad_pixel_map,
+	                        const float bad_pixel_mask_value,
+	                        float* gain_map, // TODO: more compact yet plottable representation
+	                        {{output_data_dtype}}* output) {
+		const size_t X = {{pixels_x}};
+		const size_t Y = {{pixels_y}};
+		const size_t input_cells = {{data_memory_cells}};
+		const size_t map_cells = {{constant_memory_cells}};
+
+		const size_t cell = blockIdx.x * blockDim.x + threadIdx.x;
+		const size_t x = blockIdx.y * blockDim.y + threadIdx.y;
+		const size_t y = blockIdx.z * blockDim.z + threadIdx.z;
+
+		if (cell >= input_cells || y >= Y || x >= X) {
+			return;
+		}
+
+		// data shape: memory cell, data/raw_gain (dim size 2), x, y
+		const size_t data_stride_y = 1;
+		const size_t data_stride_x = Y * data_stride_y;
+		const size_t data_stride_raw_gain = X * data_stride_x;
+		const size_t data_stride_cell = 2 * data_stride_raw_gain;
+		const size_t data_index = cell * data_stride_cell +
+			0 * data_stride_raw_gain +
+			y * data_stride_y +
+			x * data_stride_x;
+		const size_t raw_gain_index = cell * data_stride_cell +
+			1 * data_stride_raw_gain +
+			y * data_stride_y +
+			x * data_stride_x;
+		float corrected = (float)data[data_index];
+		const float raw_gain_val = (float)data[raw_gain_index];
+
+		const size_t output_stride_y = 1;
+		const size_t output_stride_x = output_stride_y * Y;
+		const size_t output_stride_cell = output_stride_x * X;
+		const size_t output_index = cell * output_stride_cell + x * output_stride_x + y * output_stride_y;
+
+		// per-pixel only constant: cell, x, y
+		const size_t map_stride_y = 1;
+		const size_t map_stride_x = Y * map_stride_y;
+		const size_t map_stride_cell = X * map_stride_x;
+
+		// threshold constant shape: cell, x, y, threshold (dim size 2)
+		const size_t threshold_map_stride_threshold = 1;
+		const size_t threshold_map_stride_y = 2 * threshold_map_stride_threshold;
+		const size_t threshold_map_stride_x = Y * threshold_map_stride_y;
+		const size_t threshold_map_stride_cell = X * threshold_map_stride_x;
+
+		// gain mapped constant shape: cell, x, y, gain_level (dim size 3)
+		const size_t gm_map_stride_gain = 1;
+		const size_t gm_map_stride_y = 3 * gm_map_stride_gain;
+		const size_t gm_map_stride_x = Y * gm_map_stride_y;
+		const size_t gm_map_stride_cell = X * gm_map_stride_x;
+		// note: assuming all maps have same shape (in terms of cells / x / y)
+
+		const size_t map_cell = cell_table[cell];
+
+		if (map_cell < map_cells) {
+			unsigned char gain = default_gain;
+			if (corr_flags & THRESHOLD) {
+				const float threshold_0 = threshold_map[0 * threshold_map_stride_threshold +
+				                                        map_cell * threshold_map_stride_cell +
+				                                        y * threshold_map_stride_y +
+				                                        x * threshold_map_stride_x];
+				const float threshold_1 = threshold_map[1 * threshold_map_stride_threshold +
+				                                        map_cell * threshold_map_stride_cell +
+				                                        y * threshold_map_stride_y +
+				                                        x * threshold_map_stride_x];
+				// could consider making this const using ternaries / tiny function
+				if (raw_gain_val <= threshold_0) {
+					gain = 0;
+				} else if (raw_gain_val <= threshold_1) {
+					gain = 1;
+				} else {
+					gain = 2;
+				}
+			}
+			gain_map[output_index] = (float)gain;
+
+			const size_t map_index = map_cell * map_stride_cell +
+				y * map_stride_y +
+				x * map_stride_x;
+
+			const size_t gm_map_index = gain * gm_map_stride_gain +
+				map_cell * gm_map_stride_cell +
+				y * gm_map_stride_y +
+				x * gm_map_stride_x;
+
+			if ((corr_flags & BPMASK) && bad_pixel_map[gm_map_index]) {
+				corrected = bad_pixel_mask_value;
+				gain_map[output_index] = bad_pixel_mask_value;
+			} else {
+				if (corr_flags & OFFSET) {
+					corrected -= offset_map[gm_map_index];
+					// TODO: optionally reassign gain stage for this pixel based on new value
+				}
+				// TODO: baseline shift
+				if (corr_flags & REL_GAIN_PC) {
+					corrected *= rel_gain_pc_map[gm_map_index];
+					if (gain == 1) {
+						corrected += md_additional_offset[map_index];
+					}
+				}
+				if (corr_flags & GAIN_XRAY) {
+					corrected = (corrected / rel_gain_xray_map[map_index]) * g_gain_value;
+				}
+			}
+			{% if output_data_dtype == "half" %}
+			output[output_index] = __float2half(corrected);
+			{% else %}
+			output[output_index] = ({{output_data_dtype}})corrected;
+			{% endif %}
+		} else {
+			// TODO: decide what to do when we cannot threshold
+			{% if output_data_dtype == "half" %}
+			output[data_index] = __float2half(corrected);
+			{% else %}
+			output[data_index] = ({{output_data_dtype}})corrected;
+			{% endif %}
+
+			gain_map[data_index] = 255;
+		}
+	}
+}