diff --git a/src/calng/base_correction.py b/src/calng/base_correction.py index aed5cba3b00fb750f87d6a2fe9f27c5bb6a9309b..7f2aa23d448fc9137cdd8c854d44c0c142f4e1ce 100644 --- a/src/calng/base_correction.py +++ b/src/calng/base_correction.py @@ -893,6 +893,10 @@ class BaseCorrection(PythonDevice): corrections, processed_buffer, previews = self.kernel_runner.correct( image_data, cell_table, *additional_data ) + # hack: get extra data out from AGIPD until addon API is updated + if isinstance(corrections, list): + corrections, extra_stuff = corrections + data_hash.merge(extra_stuff) data_hash["corrections"] = corrections # write previews first so addons cannot mess with them diff --git a/src/calng/corrections/AgipdCorrection.py b/src/calng/corrections/AgipdCorrection.py index 75a2032c8f77c218b789ec97c7acc2b4dc3b8639..7568df56e0b4e1fe925b5a240c92c81e861087dd 100644 --- a/src/calng/corrections/AgipdCorrection.py +++ b/src/calng/corrections/AgipdCorrection.py @@ -6,10 +6,12 @@ from karabo.bound import ( DOUBLE_ELEMENT, FLOAT_ELEMENT, KARABO_CLASSINFO, + NDARRAY_ELEMENT, OUTPUT_CHANNEL, OVERWRITE_ELEMENT, STRING_ELEMENT, UINT32_ELEMENT, + Hash, MetricPrefix, Unit, ) @@ -606,6 +608,15 @@ class AgipdGpuRunner(AgipdBaseRunner): ), name="common_mode_asic", ) + self.gain_count_kernel = self._xp.RawKernel( + code=base_kernel_runner.get_kernel_template("gaincount.cu").render( + ss_dim=self.num_pixels_ss, + fs_dim=self.num_pixels_fs, + block_size=128, # TODO: tune + num_gain_stages=3, + ), + name="count_pixels_per_gain_stage", + ) def _correct(self, flags, image_data, cell_table, processed_data, gain_map): num_frames = self._xp.uint16(image_data.shape[0]) @@ -653,6 +664,62 @@ class AgipdGpuRunner(AgipdBaseRunner): ), ) + def correct(self, image_data, cell_table, *additional_data): + """Temporary override to utilize gain map for monitoring""" + num_frames = image_data.shape[0] + processed_buffers = self._make_output_buffers( + num_frames, self._correction_flag_preview + ) + image_data = self._xp.asarray(image_data) + additional_data = [self._xp.asarray(data) for data in additional_data] + cell_table = self._xp.asarray(cell_table) + self._correct( + self._correction_flag_preview, + image_data, + cell_table, + *additional_data, + *processed_buffers, + ) + num_frames = image_data.shape[0] + + preview_buffers = self._preview_data_views( + image_data, *additional_data, *processed_buffers + ) + + if self._correction_flag_preview != self._correction_flag_enabled: + processed_buffers = self._make_output_buffers( + num_frames, self._correction_flag_enabled + ) + self._correct( + self._correction_flag_enabled, + image_data, + cell_table, + *additional_data, + *processed_buffers, + ) + # this buffer only used for this, not preview + gain_count_buffer = self._xp.empty( + (num_frames, 3), dtype=np.uint32 + ) + self.gain_count_kernel( + (num_frames, 1, 1), + (1, 128, 1), + ( + processed_buffers[1], + num_frames, + gain_count_buffer, + ) + ) + extra = Hash("numPixelsPerGainStage", gain_count_buffer.get()) + return ( + [ + self._correction_applied_hash, + extra, + ], + processed_buffers[0], + preview_buffers, + ) + class AgipdCalcatFriend(base_calcat.BaseCalcatFriend): _constant_enum_class = Constants @@ -781,10 +848,20 @@ class AgipdCalcatFriend(base_calcat.BaseCalcatFriend): return res +def custom_output_schema(use_shmem_handles): + res = schemas.xtdf_output_schema(use_shmem_handles) + ( + NDARRAY_ELEMENT(res) + .key("numPixelsPerGainStage") + .commit(), + ) + return res + + @KARABO_CLASSINFO("AgipdCorrection", deviceVersion) class AgipdCorrection(base_correction.BaseCorrection): # subclass *must* set these attributes - _base_output_schema = schemas.xtdf_output_schema + _base_output_schema = custom_output_schema _calcat_friend_class = AgipdCalcatFriend _constant_enum_class = Constants _correction_steps = correction_steps diff --git a/src/calng/kernels/gaincount.cu b/src/calng/kernels/gaincount.cu new file mode 100644 index 0000000000000000000000000000000000000000..f88e84a3ece29074eabd48e566fc1f606057803d --- /dev/null +++ b/src/calng/kernels/gaincount.cu @@ -0,0 +1,54 @@ +extern "C" { + __global__ void count_pixels_per_gain_stage(const float* gain_map, // num_frames x ss_dim x fs_dim + const unsigned short num_frames, + unsigned int* counts) { // output: num_frames x num_gain_stages + if (blockIdx.x >= num_frames) { + return; + } + const size_t ss_dim = {{ss_dim}}; + const size_t fs_dim = {{fs_dim}}; + const float* frame_start = gain_map + ss_dim * fs_dim * blockIdx.x; + // block x: handle frame x + // block y: block within frame x (merge slow scan / fast scan, it doesn't matter) + // so blockDim y is size of group for parallel reduction + assert(blockDim.x == 1); + assert(blockDim.y == {{block_size}}); + + // first: grid-stride loop to compute local part + unsigned int my_res[{{num_gain_stages}}] = {0}; + for (int i = blockIdx.y * blockDim.y + threadIdx.y; i < ss_dim * fs_dim; i += blockDim.y * gridDim.y) { + const float gain_value = frame_start[i]; + if (isnan(gain_value) || gain_value >= {{num_gain_stages}}) { + continue; + } + my_res[static_cast<int>(gain_value)] += 1; + } + + // share with the class + // each thread will write num_gain_stages adjacent values + __shared__ unsigned int group_res[{{block_size * num_gain_stages}}]; + for (int i=0; i<{{num_gain_stages}}; ++i) { + group_res[threadIdx.y * {{num_gain_stages}} + i] = my_res[i]; + } + + // now for parallel reduction + __syncthreads(); + int active_threads = blockDim.y; + while (active_threads > 1) { + active_threads /= 2; + if (threadIdx.y < active_threads) { + for (int i=0; i<{{num_gain_stages}}; ++i) { + group_res[threadIdx.y * {{num_gain_stages}} + i] += group_res[(threadIdx.y + active_threads) * {{num_gain_stages}} + i]; + } + } + __syncthreads(); + } + + // and write out the result + if (threadIdx.y == 0) { + for (int i=0; i<{{num_gain_stages}}; ++i) { + counts[blockIdx.x * {{num_gain_stages}} + i] = group_res[i]; + } + } + } +}