Skip to content
Snippets Groups Projects
Commit df209b62 authored by David Hammer's avatar David Hammer
Browse files

JF: Add CPU runner version

Still to do: speed up preview generation; single-threaded numpy reduction
functions are a bit of an issue.
parent d234558a
No related branches found
No related tags found
1 merge request!12Snapshot: field test deployed version as of end of run 202201
...@@ -49,8 +49,18 @@ setup(name='calng', ...@@ -49,8 +49,18 @@ setup(name='calng',
[ [
Extension( Extension(
'calng.kernels.gotthard2_cython', 'calng.kernels.gotthard2_cython',
['src/calng/kernels/gotthard2_cpu.pyx'] ['src/calng/kernels/gotthard2_cpu.pyx'],
) extra_compile_args = ['-O3', '-march=native'],
] ),
Extension(
'calng.kernels.jungfrau_cython',
['src/calng/kernels/jungfrau_cpu.pyx'],
extra_compile_args = ['-O3', '-march=native', '-fopenmp' ],
extra_link_args=['-fopenmp'],
),
],
compiler_directives={
'language_level': 3
},
), ),
) )
...@@ -14,7 +14,7 @@ from karabo.bound import ( ...@@ -14,7 +14,7 @@ from karabo.bound import (
VECTOR_STRING_ELEMENT, VECTOR_STRING_ELEMENT,
) )
from . import base_calcat, base_gpu, utils from . import base_calcat, base_kernel_runner, utils
from ._version import version as deviceVersion from ._version import version as deviceVersion
from .base_correction import BaseCorrection, add_correction_step_schema, preview_schema from .base_correction import BaseCorrection, add_correction_step_schema, preview_schema
...@@ -47,7 +47,7 @@ class CorrectionFlags(enum.IntFlag): ...@@ -47,7 +47,7 @@ class CorrectionFlags(enum.IntFlag):
BPMASK = 32 BPMASK = 32
class AgipdGpuRunner(base_gpu.BaseGpuRunner): class AgipdGpuRunner(base_kernel_runner.BaseGpuRunner):
_kernel_source_filename = "agipd_gpu.cu" _kernel_source_filename = "agipd_gpu.cu"
_corrected_axis_order = "cxy" _corrected_axis_order = "cxy"
......
...@@ -9,7 +9,7 @@ from karabo.bound import ( ...@@ -9,7 +9,7 @@ from karabo.bound import (
VECTOR_STRING_ELEMENT, VECTOR_STRING_ELEMENT,
) )
from . import base_calcat, base_gpu, utils from . import base_calcat, base_kernel_runner, utils
from ._version import version as deviceVersion from ._version import version as deviceVersion
from .base_correction import BaseCorrection, add_correction_step_schema from .base_correction import BaseCorrection, add_correction_step_schema
...@@ -23,7 +23,7 @@ class DsscConstants(enum.Enum): ...@@ -23,7 +23,7 @@ class DsscConstants(enum.Enum):
Offset = enum.auto() Offset = enum.auto()
class DsscGpuRunner(base_gpu.BaseGpuRunner): class DsscGpuRunner(base_kernel_runner.BaseGpuRunner):
_kernel_source_filename = "dssc_gpu.cu" _kernel_source_filename = "dssc_gpu.cu"
_corrected_axis_order = "cyx" _corrected_axis_order = "cyx"
......
...@@ -24,6 +24,7 @@ from .base_correction import ( ...@@ -24,6 +24,7 @@ from .base_correction import (
add_correction_step_schema, add_correction_step_schema,
preview_schema, preview_schema,
) )
from .base_kernel_runner import BaseKernelRunner
_pretend_pulse_table = np.arange(2720, dtype=np.uint8) _pretend_pulse_table = np.arange(2720, dtype=np.uint8)
...@@ -52,7 +53,7 @@ class CorrectionFlags(enum.IntFlag): ...@@ -52,7 +53,7 @@ class CorrectionFlags(enum.IntFlag):
GAIN = 4 GAIN = 4
class Gotthard2CpuRunner: class Gotthard2CpuRunner(BaseKernelRunner):
def __init__( def __init__(
self, self,
pixels_x, pixels_x,
...@@ -63,12 +64,18 @@ class Gotthard2CpuRunner: ...@@ -63,12 +64,18 @@ class Gotthard2CpuRunner:
output_data_dtype=np.float32, output_data_dtype=np.float32,
bad_pixel_mask_value=np.nan, bad_pixel_mask_value=np.nan,
): ):
from .kernels import gotthard2_cython # TODO super().__init__(
pixels_x,
pixels_y,
memory_cells,
constant_memory_cells,
input_data_dtype,
output_data_dtype,
)
from .kernels import gotthard2_cython
self.correction_kernel = gotthard2_cython.correct self.correction_kernel = gotthard2_cython.correct
self.pixels_x = pixels_x
self.memory_cells = memory_cells
self.constant_memory_cells = constant_memory_cells
self.input_shape = (memory_cells, pixels_x) self.input_shape = (memory_cells, pixels_x)
self.processed_shape = self.input_shape self.processed_shape = self.input_shape
# model: 2 buffers (corresponding to actual memory cells), 2720 frames # model: 2 buffers (corresponding to actual memory cells), 2720 frames
...@@ -79,13 +86,10 @@ class Gotthard2CpuRunner: ...@@ -79,13 +86,10 @@ class Gotthard2CpuRunner:
self.offset_map = np.empty(self.map_shape, dtype=np.float32) self.offset_map = np.empty(self.map_shape, dtype=np.float32)
self.rel_gain_map = np.empty(self.map_shape, dtype=np.float32) self.rel_gain_map = np.empty(self.map_shape, dtype=np.float32)
self.flush_buffers() self.flush_buffers()
self.input_data = None # will just point to data coming in self.input_data = None # will just point to data coming in
self.input_gain_stage = None # will just point to data coming in self.input_gain_stage = None # will just point to data coming in
self.processed_data = None # will just point to buffer we're given self.processed_data = None # will just point to buffer we're given
self.preview_buffer_getters = [
self._get_raw_for_preview,
self._get_corrected_for_preview,
]
def _get_raw_for_preview(self): def _get_raw_for_preview(self):
return self.input_data return self.input_data
...@@ -94,7 +98,7 @@ class Gotthard2CpuRunner: ...@@ -94,7 +98,7 @@ class Gotthard2CpuRunner:
return self.processed_data return self.processed_data
def load_data(self, image_data, input_gain_stage): def load_data(self, image_data, input_gain_stage):
"""Experiment: loading all three in one function as they are tied""" """Experiment: loading both in one function as they are tied"""
self.input_data = image_data.astype(np.uint16, copy=False) self.input_data = image_data.astype(np.uint16, copy=False)
self.input_gain_stage = input_gain_stage.astype(np.uint8, copy=False) self.input_gain_stage = input_gain_stage.astype(np.uint8, copy=False)
...@@ -122,33 +126,6 @@ class Gotthard2CpuRunner: ...@@ -122,33 +126,6 @@ class Gotthard2CpuRunner:
self.processed_data = out self.processed_data = out
return out return out
def compute_previews(self, preview_index):
"""See BaseGpuRunner.compute_previews"""
if preview_index < -4:
raise ValueError(f"No statistic with code {preview_index} defined")
elif preview_index >= self.memory_cells:
raise ValueError(f"Memory cell index {preview_index} out of range")
# TODO: enum around reduction type
return tuple(
self._compute_a_preview(image_data=getter(), preview_index=preview_index)
for getter in self.preview_buffer_getters
)
def _compute_a_preview(self, image_data, preview_index):
if preview_index >= 0:
return image_data[preview_index].astype(np.float32, copy=False)
elif preview_index == -1:
return np.nanmax(image_data, axis=0).astype(np.float32, copy=False)
elif preview_index in (-2, -3, -4):
stat_fun = {
-2: np.nanmean,
-3: np.nansum,
-4: np.nanstd,
}[preview_index]
return stat_fun(image_data, axis=0, dtype=np.float32)
class Gotthard2CalcatFriend(base_calcat.BaseCalcatFriend): class Gotthard2CalcatFriend(base_calcat.BaseCalcatFriend):
_constant_enum_class = Gotthard2Constants _constant_enum_class = Gotthard2Constants
...@@ -184,11 +161,6 @@ class Gotthard2CalcatFriend(base_calcat.BaseCalcatFriend): ...@@ -184,11 +161,6 @@ class Gotthard2CalcatFriend(base_calcat.BaseCalcatFriend):
.key(f"{param_prefix}.memoryCells") .key(f"{param_prefix}.memoryCells")
.setNewDefaultValue(2) .setNewDefaultValue(2)
.commit(), .commit(),
OVERWRITE_ELEMENT(expected)
.key("outputShmemBufferSize")
.setNewDefaultValue(2)
.commit(),
) )
base_calcat.add_status_schema_from_enum( base_calcat.add_status_schema_from_enum(
...@@ -236,6 +208,11 @@ class Gotthard2Correction(BaseCorrection): ...@@ -236,6 +208,11 @@ class Gotthard2Correction(BaseCorrection):
.key("preview.selectionMode") .key("preview.selectionMode")
.setNewDefaultValue("frame") .setNewDefaultValue("frame")
.commit(), .commit(),
OVERWRITE_ELEMENT(expected)
.key("outputShmemBufferSize")
.setNewDefaultValue(2)
.commit(),
) )
( (
...@@ -327,7 +304,6 @@ class Gotthard2Correction(BaseCorrection): ...@@ -327,7 +304,6 @@ class Gotthard2Correction(BaseCorrection):
buffer_handle, buffer_array = self._shmem_buffer.next_slot() buffer_handle, buffer_array = self._shmem_buffer.next_slot()
self.kernel_runner.correct(self._correction_flag_enabled, out=buffer_array) self.kernel_runner.correct(self._correction_flag_enabled, out=buffer_array)
if do_generate_preview: if do_generate_preview:
if self._correction_flag_enabled != self._correction_flag_preview: if self._correction_flag_enabled != self._correction_flag_preview:
self.kernel_runner.correct(self._correction_flag_preview) self.kernel_runner.correct(self._correction_flag_preview)
......
...@@ -11,9 +11,10 @@ from karabo.bound import ( ...@@ -11,9 +11,10 @@ from karabo.bound import (
VECTOR_STRING_ELEMENT, VECTOR_STRING_ELEMENT,
) )
from . import base_calcat, base_gpu, utils from . import base_calcat, utils
from ._version import version as deviceVersion from ._version import version as deviceVersion
from .base_correction import BaseCorrection, add_correction_step_schema, preview_schema from .base_correction import BaseCorrection, add_correction_step_schema, preview_schema
from .base_kernel_runner import BaseGpuRunner, BaseKernelRunner
_pretend_pulse_table = np.arange(16, dtype=np.uint8) _pretend_pulse_table = np.arange(16, dtype=np.uint8)
...@@ -33,7 +34,12 @@ class CorrectionFlags(enum.IntFlag): ...@@ -33,7 +34,12 @@ class CorrectionFlags(enum.IntFlag):
BPMASK = 4 BPMASK = 4
class JungfrauGpuRunner(base_gpu.BaseGpuRunner): class KernelRunnerVersions(enum.Enum):
CPU = enum.auto()
GPU = enum.auto()
class JungfrauGpuRunner(BaseGpuRunner):
_kernel_source_filename = "jungfrau_gpu.cu" _kernel_source_filename = "jungfrau_gpu.cu"
_corrected_axis_order = "cyx" _corrected_axis_order = "cyx"
...@@ -59,8 +65,8 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner): ...@@ -59,8 +65,8 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner):
) )
# TODO: avoid superclass creating cell table with wrong dtype first # TODO: avoid superclass creating cell table with wrong dtype first
self.cell_table_gpu = cupy.empty(self.memory_cells, dtype=cupy.uint8) self.cell_table_gpu = cupy.empty(self.memory_cells, dtype=cupy.uint8)
self.input_gain_map_gpu = cupy.empty(self.input_shape, dtype=cupy.uint8) self.input_gain_stage_gpu = cupy.empty(self.input_shape, dtype=cupy.uint8)
self.preview_buffer_getters.append(self._get_gain_map_for_preview) self.preview_buffer_getters.append(self._get_gain_stage_for_preview)
self.map_shape = (self.constant_memory_cells, self.pixels_y, self.pixels_x, 3) self.map_shape = (self.constant_memory_cells, self.pixels_y, self.pixels_x, 3)
self.offset_map_gpu = cupy.zeros(self.map_shape, dtype=cupy.float32) self.offset_map_gpu = cupy.zeros(self.map_shape, dtype=cupy.float32)
self.rel_gain_map_gpu = cupy.ones(self.map_shape, dtype=cupy.float32) self.rel_gain_map_gpu = cupy.ones(self.map_shape, dtype=cupy.float32)
...@@ -95,13 +101,13 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner): ...@@ -95,13 +101,13 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner):
def _get_corrected_for_preview(self): def _get_corrected_for_preview(self):
return self.processed_data_gpu return self.processed_data_gpu
def _get_gain_map_for_preview(self): def _get_gain_stage_for_preview(self):
return self.input_gain_map_gpu return self.input_gain_stage_gpu
def load_data(self, image_data, input_gain_map, cell_table): def load_data(self, image_data, input_gain_stage, cell_table):
"""Experiment: loading all three in one function as they are tied""" """Experiment: loading all three in one function as they are tied"""
self.input_data_gpu.set(image_data) self.input_data_gpu.set(image_data)
self.input_gain_map_gpu.set(input_gain_map) self.input_gain_stage_gpu.set(input_gain_stage)
if self.burst_mode: if self.burst_mode:
self.cell_table_gpu.set(cell_table) self.cell_table_gpu.set(cell_table)
...@@ -116,7 +122,7 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner): ...@@ -116,7 +122,7 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner):
self.full_block, self.full_block,
( (
self.input_data_gpu, self.input_data_gpu,
self.input_gain_map_gpu, self.input_gain_stage_gpu,
self.cell_table_gpu, self.cell_table_gpu,
cupy.uint8(flags), cupy.uint8(flags),
self.offset_map_gpu, self.offset_map_gpu,
...@@ -128,6 +134,98 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner): ...@@ -128,6 +134,98 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner):
) )
class JungfrauCpuRunner(BaseKernelRunner):
_corrected_axis_order = "cyx"
def __init__(
self,
pixels_x,
pixels_y,
memory_cells,
constant_memory_cells,
input_data_dtype=np.uint16,
output_data_dtype=np.float32, # TODO: configurable
bad_pixel_mask_value=np.nan,
):
super().__init__(
pixels_x,
pixels_y,
memory_cells,
constant_memory_cells,
input_data_dtype,
output_data_dtype,
)
from .kernels import jungfrau_cython
self.correction_kernel_single = jungfrau_cython.correct_single
self.correction_kernel_burst = jungfrau_cython.correct_burst
self.input_shape = (memory_cells, pixels_y, pixels_x)
self.preview_buffer_getters.append(self._get_gain_stage_for_preview)
self.processed_shape = self.input_shape
self.map_shape = (self.constant_memory_cells, self.pixels_y, self.pixels_x, 3)
self.offset_map = np.empty(self.map_shape, dtype=np.float32)
self.rel_gain_map = np.empty(self.map_shape, dtype=np.float32)
self.bad_pixel_map = np.empty(self.map_shape, dtype=np.uint32)
self.bad_pixel_mask_value = bad_pixel_mask_value
self.flush_buffers()
self.input_data = None # will just point to data coming in
self.input_gain_stage = None # will just point to data coming in
self.processed_data = None # will just point to buffer we're given
def _get_raw_for_preview(self):
return self.input_data
def _get_corrected_for_preview(self):
return self.processed_data
def _get_gain_stage_for_preview(self):
return self.input_gain_stage
@property
def burst_mode(self):
return self.memory_cells > 1
def load_data(self, image_data, input_gain_stage, cell_table):
self.input_data = image_data.astype(np.uint16, copy=False)
self.input_gain_stage = input_gain_stage.astype(np.uint8, copy=False)
self.input_cell_table = cell_table.astype(np.uint8, copy=False)
def flush_buffers(self):
self.offset_map.fill(0)
self.rel_gain_map.fill(1)
self.bad_pixel_map.fill(0)
def correct(self, flags, out=None):
if out is None:
out = np.empty(self.processed_shape, dtype=np.float32)
if self.burst_mode:
self.correction_kernel_burst(
self.input_data,
self.input_gain_stage,
self.input_cell_table,
flags,
self.offset_map,
self.rel_gain_map,
self.bad_pixel_map,
self.bad_pixel_mask_value,
out,
)
else:
self.correction_kernel_single(
self.input_data,
self.input_gain_stage,
flags,
self.offset_map,
self.rel_gain_map,
self.bad_pixel_map,
self.bad_pixel_mask_value,
out,
)
self.processed_data = out
class JungfrauCalcatFriend(base_calcat.BaseCalcatFriend): class JungfrauCalcatFriend(base_calcat.BaseCalcatFriend):
_constant_enum_class = JungfrauConstants _constant_enum_class = JungfrauConstants
...@@ -172,12 +270,6 @@ class JungfrauCalcatFriend(base_calcat.BaseCalcatFriend): ...@@ -172,12 +270,6 @@ class JungfrauCalcatFriend(base_calcat.BaseCalcatFriend):
.key(f"{param_prefix}.biasVoltage") .key(f"{param_prefix}.biasVoltage")
.setNewDefaultValue(90) .setNewDefaultValue(90)
.commit(), .commit(),
# JUNGFRAU data is small, can fit plenty of trains in here
OVERWRITE_ELEMENT(expected)
.key("outputShmemBufferSize")
.setNewDefaultValue(2)
.commit(),
) )
# add extra parameters # add extra parameters
...@@ -255,7 +347,7 @@ class JungfrauCorrection(BaseCorrection): ...@@ -255,7 +347,7 @@ class JungfrauCorrection(BaseCorrection):
("relGain", CorrectionFlags.REL_GAIN), ("relGain", CorrectionFlags.REL_GAIN),
("badPixels", CorrectionFlags.BPMASK), ("badPixels", CorrectionFlags.BPMASK),
) )
_kernel_runner_class = JungfrauGpuRunner _kernel_runner_class = None # note: set in __init__ based on config
_calcat_friend_class = JungfrauCalcatFriend _calcat_friend_class = JungfrauCalcatFriend
_constant_enum_class = JungfrauConstants _constant_enum_class = JungfrauConstants
_managed_keys = BaseCorrection._managed_keys.copy() _managed_keys = BaseCorrection._managed_keys.copy()
...@@ -264,7 +356,6 @@ class JungfrauCorrection(BaseCorrection): ...@@ -264,7 +356,6 @@ class JungfrauCorrection(BaseCorrection):
@staticmethod @staticmethod
def expectedParameters(expected): def expectedParameters(expected):
super(JungfrauCorrection, JungfrauCorrection).expectedParameters(expected)
( (
OVERWRITE_ELEMENT(expected) OVERWRITE_ELEMENT(expected)
.key("dataFormat.pixelsX") .key("dataFormat.pixelsX")
...@@ -285,8 +376,26 @@ class JungfrauCorrection(BaseCorrection): ...@@ -285,8 +376,26 @@ class JungfrauCorrection(BaseCorrection):
.key("preview.selectionMode") .key("preview.selectionMode")
.setNewDefaultValue("frame") .setNewDefaultValue("frame")
.commit(), .commit(),
# JUNGFRAU data is small, can fit plenty of trains in here
OVERWRITE_ELEMENT(expected)
.key("outputShmemBufferSize")
.setNewDefaultValue(2)
.commit(),
) )
# support both CPU and GPU kernels
(
STRING_ELEMENT(expected)
.key("kernelType")
.assignmentOptional()
.defaultValue(KernelRunnerVersions.CPU.name)
.options(",".join(kernel_type.name for kernel_type in KernelRunnerVersions))
.reconfigurable()
.commit(),
)
JungfrauCorrection._managed_keys.add("kernelType")
( (
OUTPUT_CHANNEL(expected) OUTPUT_CHANNEL(expected)
.key("preview.outputGainMap") .key("preview.outputGainMap")
...@@ -320,6 +429,11 @@ class JungfrauCorrection(BaseCorrection): ...@@ -320,6 +429,11 @@ class JungfrauCorrection(BaseCorrection):
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
kernel_type = KernelRunnerVersions[config["kernelType"]]
if kernel_type is KernelRunnerVersions.CPU:
self._kernel_runner_class = JungfrauCpuRunner
else:
self._kernel_runner_class = JungfrauGpuRunner
# TODO: gain mode as constant parameter and / or device configuration # TODO: gain mode as constant parameter and / or device configuration
try: try:
...@@ -406,20 +520,34 @@ class JungfrauCorrection(BaseCorrection): ...@@ -406,20 +520,34 @@ class JungfrauCorrection(BaseCorrection):
constant_data = np.transpose(constant_data, (2, 1, 0, 3)) constant_data = np.transpose(constant_data, (2, 1, 0, 3))
else: else:
constant_data = np.transpose(constant_data, (2, 0, 1, 3)) constant_data = np.transpose(constant_data, (2, 0, 1, 3))
kernel_type = KernelRunnerVersions[self.get("kernelType")]
if constant is JungfrauConstants.Offset10Hz: if constant is JungfrauConstants.Offset10Hz:
self.kernel_runner.offset_map_gpu.set(constant_data.astype(np.float32)) if kernel_type is KernelRunnerVersions.CPU:
self.kernel_runner.offset_map[:] = constant_data.astype(np.float32)
else:
self.kernel_runner.offset_map_gpu.set(constant_data.astype(np.float32))
if not self.get("corrections.offset.available"): if not self.get("corrections.offset.available"):
self.set("corrections.offset.available", True) self.set("corrections.offset.available", True)
elif constant is JungfrauConstants.RelativeGain10Hz: elif constant is JungfrauConstants.RelativeGain10Hz:
self.kernel_runner.rel_gain_map_gpu.set(constant_data.astype(np.float32)) if kernel_type is KernelRunnerVersions.CPU:
self.kernel_runner.rel_gain_map[:] = constant_data.astype(np.float32)
else:
self.kernel_runner.rel_gain_map_gpu.set(
constant_data.astype(np.float32)
)
if not self.get("corrections.relGain.available"): if not self.get("corrections.relGain.available"):
self.set("corrections.relGain.available", True) self.set("corrections.relGain.available", True)
elif constant in ( elif constant in (
JungfrauConstants.BadPixelsDark10Hz, JungfrauConstants.BadPixelsFF10Hz JungfrauConstants.BadPixelsDark10Hz,
JungfrauConstants.BadPixelsFF10Hz,
): ):
self.kernel_runner.bad_pixel_map_gpu |= cupy.asarray(constant_data) if kernel_type is KernelRunnerVersions.CPU:
self.kernel_runner.bad_pixel_map |= constant_data
else:
self.kernel_runner.bad_pixel_map_gpu |= cupy.asarray(constant_data)
if not self.get("corrections.badPixels.available"): if not self.get("corrections.badPixels.available"):
self.set("corrections.badPixels.available", True) self.set("corrections.badPixels.available", True)
self._update_correction_flags() self._update_correction_flags()
self.log_status_info(f"Done loading {constant.name} to GPU") self.log_status_info(f"Done loading {constant.name} to runner")
...@@ -11,7 +11,7 @@ from karabo.bound import ( ...@@ -11,7 +11,7 @@ from karabo.bound import (
VECTOR_STRING_ELEMENT, VECTOR_STRING_ELEMENT,
) )
from . import base_gpu, base_calcat, utils from . import base_kernel_runner, base_calcat, utils
from ._version import version as deviceVersion from ._version import version as deviceVersion
from .base_correction import BaseCorrection, add_correction_step_schema, preview_schema from .base_correction import BaseCorrection, add_correction_step_schema, preview_schema
...@@ -34,7 +34,7 @@ class CorrectionFlags(enum.IntFlag): ...@@ -34,7 +34,7 @@ class CorrectionFlags(enum.IntFlag):
BPMASK = 16 BPMASK = 16
class LpdGpuRunner(base_gpu.BaseGpuRunner): class LpdGpuRunner(base_kernel_runner.BaseGpuRunner):
_kernel_source_filename = "lpd_gpu.cu" _kernel_source_filename = "lpd_gpu.cu"
_corrected_axis_order = "cxy" _corrected_axis_order = "cxy"
......
...@@ -43,7 +43,7 @@ class BaseKernelRunner: ...@@ -43,7 +43,7 @@ class BaseKernelRunner:
Detector-specific subclass must define this method. It should assume that image Detector-specific subclass must define this method. It should assume that image
data, cell table, and other data (including constants) has already been loaded. data, cell table, and other data (including constants) has already been loaded.
It should probably run some GPU kernel and output should go into It should probably run some GPU kernel and output should go into
self.processed_data_gpu. self.processed_data(_gpu).
Keep in mind that user only gets output from compute_preview or reshape Keep in mind that user only gets output from compute_preview or reshape
(either of these should come after correct). (either of these should come after correct).
...@@ -62,7 +62,7 @@ class BaseKernelRunner: ...@@ -62,7 +62,7 @@ class BaseKernelRunner:
raise NotImplementedError() raise NotImplementedError()
def _get_corrected_for_preview(self): def _get_corrected_for_preview(self):
"""Should return view of self.processed_data_gpu with shape (cell, x/y, x/y)""" """Should return view of self.processed_data(_gpu) with shape (cell, x/y, x/y)"""
raise NotImplementedError() raise NotImplementedError()
def flush_buffers(self): def flush_buffers(self):
...@@ -99,21 +99,37 @@ class BaseKernelRunner: ...@@ -99,21 +99,37 @@ class BaseKernelRunner:
here for now (and can differ between AGIPD and DSSC)""" here for now (and can differ between AGIPD and DSSC)"""
if preview_index >= 0: if preview_index >= 0:
# TODO: reuse pinned buffers for this # TODO: reuse pinned buffers for this
return image_data[preview_index].astype(np.float32).get() return image_data[preview_index].astype(np.float32)
elif preview_index == -1: elif preview_index == -1:
# TODO: confirm that max is pixel and not integrated intensity # TODO: confirm that max is pixel and not integrated intensity
# separate from next case because dtype not applicable here # separate from next case because dtype not applicable here
return cupy.nanmax(image_data, axis=0).astype(cupy.float32).get() return np.nanmax(image_data, axis=0).astype(np.float32)
elif preview_index in (-2, -3, -4): elif preview_index in (-2, -3, -4):
stat_fun = { stat_fun = {
-2: cupy.nanmean, -2: np.nanmean,
-3: cupy.nansum, -3: np.nansum,
-4: cupy.nanstd, -4: np.nanstd,
}[preview_index] }[preview_index]
return stat_fun(image_data, axis=0, dtype=cupy.float32).get() return stat_fun(image_data, axis=0, dtype=np.float32)
def reshape(self, output_order, out=None):
"""Move axes to desired output order"""
# TODO: avoid copy
if output_order == self._corrected_axis_order:
self.reshaped_data = self.processed_data
else:
self.reshaped_data = np.transpose(
self.processed_data,
utils.transpose_order(self._corrected_axis_order, output_order),
)
class BaseGpuRunner(base_kernel_runner): if out is None:
return self.reshaped_data
else:
out[:] = self.reshaped_data
class BaseGpuRunner(BaseKernelRunner):
"""Class to handle GPU buffers and execution of CUDA kernels on image data """Class to handle GPU buffers and execution of CUDA kernels on image data
All GPU buffers are kept within this class and it is intentionally very stateful. All GPU buffers are kept within this class and it is intentionally very stateful.
...@@ -214,3 +230,6 @@ class BaseGpuRunner(base_kernel_runner): ...@@ -214,3 +230,6 @@ class BaseGpuRunner(base_kernel_runner):
utils.ceil_div(a_length, block_length) utils.ceil_div(a_length, block_length)
for (a_length, block_length) in zip(self.processed_shape, full_block) for (a_length, block_length) in zip(self.processed_shape, full_block)
) )
def _compute_a_preview(self, image_data, preview_index):
return super()._compute_a_preview(image_data, preview_index).get()
# cython: boundscheck=False
# cython: cdivision=True
# cython: wrapararound=False
cdef unsigned char NONE = 0
cdef unsigned char OFFSET = 1
cdef unsigned char REL_GAIN = 2
cdef unsigned char BPMASK = 4
from cython.parallel import prange
def correct_burst(
unsigned short[:, :, :] image_data,
unsigned char[:, :, :] gain_stage,
unsigned char[:] cell_table,
unsigned char flags,
float[:, :, :, :] offset_map,
float[:, :, :, :] relgain_map,
unsigned[:, :, :, :] badpixel_mask,
float badpixel_fill_value,
float[:, :, :] output,
):
cdef int input_cell, map_cell, x, y
cdef unsigned char gain
cdef float corrected
for input_cell in prange(image_data.shape[0], nogil=True):
map_cell = cell_table[input_cell]
if map_cell >= offset_map.shape[0]:
for y in range(image_data.shape[1]):
for x in range(image_data.shape[2]):
output[input_cell, y, x] = <float>image_data[input_cell, y, x]
continue
for y in range(image_data.shape[1]):
for x in range(image_data.shape[2]):
corrected = image_data[input_cell, y, x]
gain = gain_stage[input_cell, y, x]
# legal values: 0, 1, or 3
if gain == 2:
corrected = badpixel_fill_value
else:
if gain == 3:
gain = 2
if (flags & BPMASK) and badpixel_mask[map_cell, y, x, gain] != 0:
corrected = badpixel_fill_value
else:
if (flags & OFFSET):
corrected = corrected - offset_map[map_cell, y, x, gain]
if (flags & REL_GAIN):
corrected = corrected / relgain_map[map_cell, y, x, gain]
output[input_cell, y, x] = corrected
def correct_single(
unsigned short[:, :, :] image_data,
unsigned char[:, :, :] gain_stage,
unsigned char flags,
float[:, :, :] offset_map,
float[:, :, :] relgain_map,
unsigned[:, :, :] badpixel_mask,
float badpixel_fill_value,
float[:, :, :] output,
):
# ignore "cell table", constant pretends cell 0
cdef int x, y
cdef unsigned char gain
cdef float corrected
for y in range(image_data.shape[1]):
for x in range(image_data.shape[2]):
corrected = image_data[0, y, x]
gain = gain_stage[0, y, x]
# legal values: 0, 1, or 3
if gain == 2:
corrected = badpixel_fill_value
else:
if gain == 3:
gain = 2
if (flags & BPMASK) and badpixel_mask[y, x, gain] != 0:
corrected = badpixel_fill_value
else:
if (flags & OFFSET):
corrected = corrected - offset_map[y, x, gain]
if (flags & REL_GAIN):
corrected = corrected / relgain_map[y, x, gain]
output[0, y, x] = corrected
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment