diff --git a/src/calng/kernels/gaincount.cu b/src/calng/kernels/gaincount.cu index a6176a1da7d55996dd3cf615023eada901910e75..f88e84a3ece29074eabd48e566fc1f606057803d 100644 --- a/src/calng/kernels/gaincount.cu +++ b/src/calng/kernels/gaincount.cu @@ -1,5 +1,5 @@ extern "C" { - __global__ void count_pixels_per_gain_stage(const char* gain_map, // num_frames x ss_dim x fs_dim + __global__ void count_pixels_per_gain_stage(const float* gain_map, // num_frames x ss_dim x fs_dim const unsigned short num_frames, unsigned int* counts) { // output: num_frames x num_gain_stages if (blockIdx.x >= num_frames) { @@ -7,7 +7,7 @@ extern "C" { } const size_t ss_dim = {{ss_dim}}; const size_t fs_dim = {{fs_dim}}; - const char* frame_start = gain_map + ss_dim * fs_dim * blockIdx.x; + const float* frame_start = gain_map + ss_dim * fs_dim * blockIdx.x; // block x: handle frame x // block y: block within frame x (merge slow scan / fast scan, it doesn't matter) // so blockDim y is size of group for parallel reduction @@ -17,11 +17,11 @@ extern "C" { // first: grid-stride loop to compute local part unsigned int my_res[{{num_gain_stages}}] = {0}; for (int i = blockIdx.y * blockDim.y + threadIdx.y; i < ss_dim * fs_dim; i += blockDim.y * gridDim.y) { - const char gain_value = frame_start[i]; - if (gain_value < 0 || gain_value >= {{num_gain_stages}}) { + const float gain_value = frame_start[i]; + if (isnan(gain_value) || gain_value >= {{num_gain_stages}}) { continue; } - my_res[gain_value] += 1; + my_res[static_cast<int>(gain_value)] += 1; } // share with the class