From 876fbbe82c9fe5cbc67237253616e78cd86a74b3 Mon Sep 17 00:00:00 2001 From: David Hammer <dhammer@mailbox.org> Date: Thu, 2 Jun 2022 15:31:20 +0200 Subject: [PATCH] GH/JF: Decrease default shmem buffer size --- src/calng/Gotthard2Correction.py | 5 + src/calng/JungfrauCorrection.py | 6 + .../{base_gpu.py => base_kernel_runner.py} | 175 ++++++++++-------- 3 files changed, 109 insertions(+), 77 deletions(-) rename src/calng/{base_gpu.py => base_kernel_runner.py} (90%) diff --git a/src/calng/Gotthard2Correction.py b/src/calng/Gotthard2Correction.py index d5ac7722..39c6c46e 100644 --- a/src/calng/Gotthard2Correction.py +++ b/src/calng/Gotthard2Correction.py @@ -184,6 +184,11 @@ class Gotthard2CalcatFriend(base_calcat.BaseCalcatFriend): .key(f"{param_prefix}.memoryCells") .setNewDefaultValue(2) .commit(), + + OVERWRITE_ELEMENT(expected) + .key("outputShmemBufferSize") + .setNewDefaultValue(2) + .commit(), ) base_calcat.add_status_schema_from_enum( diff --git a/src/calng/JungfrauCorrection.py b/src/calng/JungfrauCorrection.py index 7be0e149..553f86db 100644 --- a/src/calng/JungfrauCorrection.py +++ b/src/calng/JungfrauCorrection.py @@ -172,6 +172,12 @@ class JungfrauCalcatFriend(base_calcat.BaseCalcatFriend): .key(f"{param_prefix}.biasVoltage") .setNewDefaultValue(90) .commit(), + + # JUNGFRAU data is small, can fit plenty of trains in here + OVERWRITE_ELEMENT(expected) + .key("outputShmemBufferSize") + .setNewDefaultValue(2) + .commit(), ) # add extra parameters diff --git a/src/calng/base_gpu.py b/src/calng/base_kernel_runner.py similarity index 90% rename from src/calng/base_gpu.py rename to src/calng/base_kernel_runner.py index 333eb053..38127f06 100644 --- a/src/calng/base_gpu.py +++ b/src/calng/base_kernel_runner.py @@ -7,32 +7,7 @@ import numpy as np from . import utils -class BaseGpuRunner: - """Class to handle GPU buffers and execution of CUDA kernels on image data - - All GPU buffers are kept within this class and it is intentionally very stateful. - This generally means that you will want to load data into it and then do something. - Typical usage in correct order: - - 1. instantiate - 2. load constants - 3. load_data - 4. load_cell_table - 5. correct - 6a. reshape (only here does data transfer back to host) - 6b. compute_preview (optional) - - repeat from 2. or 3. - - In case no constants are available / correction is not desired, can skip 3 and 4 and - pass CorrectionFlags.NONE to correct(...). Generally, user must handle which - correction steps are appropriate given the constants loaded so far. - """ - - # These must be set by subclass - _kernel_source_filename = None - _corrected_axis_order = None - +class BaseKernelRunner: def __init__( self, pixels_x, @@ -42,11 +17,6 @@ class BaseGpuRunner: input_data_dtype=np.uint16, output_data_dtype=np.float32, ): - _src_dir = pathlib.Path(__file__).absolute().parent - # subclass must define _kernel_source_filename - with (_src_dir / "kernels" / self._kernel_source_filename).open("r") as fd: - self._kernel_template = jinja2.Template(fd.read()) - self.pixels_x = pixels_x self.pixels_y = pixels_y self.memory_cells = memory_cells @@ -60,41 +30,20 @@ class BaseGpuRunner: self.input_data_dtype = input_data_dtype self.output_data_dtype = output_data_dtype - self._init_kernels() - - # reuse buffers for input / output - self.cell_table_gpu = cupy.empty(self.memory_cells, dtype=np.uint16) - self.input_data_gpu = cupy.empty(self.input_shape, dtype=input_data_dtype) - self.processed_data_gpu = cupy.empty( - self.processed_shape, dtype=output_data_dtype - ) - self.reshaped_data_gpu = None # currently not reusing buffer - # default preview layers: raw and corrected (subclass can extend) self.preview_buffer_getters = [ self._get_raw_for_preview, self._get_corrected_for_preview, ] - # to get data from respective buffers to cell, x, y shape for preview computation - def _get_raw_for_preview(self): - """Should return view of self.input_data_gpu with shape (cell, x/y, x/y)""" - raise NotImplementedError() - - def _get_corrected_for_preview(self): - """Should return view of self.processed_data_gpu with shape (cell, x/y, x/y)""" - raise NotImplementedError() - - def flush_buffers(self): - """Optional reset GPU buffers (implement in subclasses which need this)""" - pass def correct(self, flags): """Correct (already loaded) image data according to flags - Subclass must define this method. It should assume that image data, cell table, - and other data (including constants) has already been loaded. It should - probably run some GPU kernel and output should go into self.processed_data_gpu. + Detector-specific subclass must define this method. It should assume that image + data, cell table, and other data (including constants) has already been loaded. + It should probably run some GPU kernel and output should go into + self.processed_data_gpu. Keep in mind that user only gets output from compute_preview or reshape (either of these should come after correct). @@ -107,29 +56,18 @@ class BaseGpuRunner: """ raise NotImplementedError() - def reshape(self, output_order, out=None): - """Move axes to desired output order and copy to host memory - - The out parameter is passed directly to the get function of GPU array: if - None, then a new ndarray (in host memory) is returned. If not None, then data - will be loaded into the provided array, which must match shape / dtype. - """ - # TODO: avoid copy - if output_order == self._corrected_axis_order: - self.reshaped_data_gpu = self.processed_data_gpu - else: - self.reshaped_data_gpu = cupy.transpose( - self.processed_data_gpu, - utils.transpose_order(self._corrected_axis_order, output_order), - ) - - return self.reshaped_data_gpu.get(out=out) + # to get data from respective buffers to cell, x, y shape for preview computation + def _get_raw_for_preview(self): + """Should return view of self.input_data_gpu with shape (cell, x/y, x/y)""" + raise NotImplementedError() - def load_data(self, raw_data): - self.input_data_gpu.set(raw_data) + def _get_corrected_for_preview(self): + """Should return view of self.processed_data_gpu with shape (cell, x/y, x/y)""" + raise NotImplementedError() - def load_cell_table(self, cell_table): - self.cell_table_gpu.set(cell_table) + def flush_buffers(self): + """Optional reset GPU buffers (implement in subclasses which need this)""" + pass def compute_previews(self, preview_index): """Generate single slice or reduction preview of raw and corrected data @@ -174,6 +112,89 @@ class BaseGpuRunner: }[preview_index] return stat_fun(image_data, axis=0, dtype=cupy.float32).get() + +class BaseGpuRunner(base_kernel_runner): + """Class to handle GPU buffers and execution of CUDA kernels on image data + + All GPU buffers are kept within this class and it is intentionally very stateful. + This generally means that you will want to load data into it and then do something. + Typical usage in correct order: + + 1. instantiate + 2. load constants + 3. load_data + 4. load_cell_table + 5. correct + 6a. reshape (only here does data transfer back to host) + 6b. compute_preview (optional) + + repeat from 2. or 3. + + In case no constants are available / correction is not desired, can skip 3 and 4 and + pass CorrectionFlags.NONE to correct(...). Generally, user must handle which + correction steps are appropriate given the constants loaded so far. + """ + + # These must be set by subclass + _kernel_source_filename = None + _corrected_axis_order = None + + def __init__( + self, + pixels_x, + pixels_y, + memory_cells, + constant_memory_cells, + input_data_dtype=np.uint16, + output_data_dtype=np.float32, + ): + super().__init__( + pixels_x, + pixels_y, + memory_cells, + constant_memory_cells, + input_data_dtype, + output_data_dtype, + ) + _src_dir = pathlib.Path(__file__).absolute().parent + # subclass must define _kernel_source_filename + with (_src_dir / "kernels" / self._kernel_source_filename).open("r") as fd: + self._kernel_template = jinja2.Template(fd.read()) + + self._init_kernels() + + # reuse buffers for input / output + self.cell_table_gpu = cupy.empty(self.memory_cells, dtype=np.uint16) + self.input_data_gpu = cupy.empty(self.input_shape, dtype=input_data_dtype) + self.processed_data_gpu = cupy.empty( + self.processed_shape, dtype=output_data_dtype + ) + self.reshaped_data_gpu = None # currently not reusing buffer + + def reshape(self, output_order, out=None): + """Move axes to desired output order and copy to host memory + + The out parameter is passed directly to the get function of GPU array: if + None, then a new ndarray (in host memory) is returned. If not None, then data + will be loaded into the provided array, which must match shape / dtype. + """ + # TODO: avoid copy + if output_order == self._corrected_axis_order: + self.reshaped_data_gpu = self.processed_data_gpu + else: + self.reshaped_data_gpu = cupy.transpose( + self.processed_data_gpu, + utils.transpose_order(self._corrected_axis_order, output_order), + ) + + return self.reshaped_data_gpu.get(out=out) + + def load_data(self, raw_data): + self.input_data_gpu.set(raw_data) + + def load_cell_table(self, cell_table): + self.cell_table_gpu.set(cell_table) + def update_block_size(self, full_block): """Set execution grid such that it covers processed_shape with full_blocks -- GitLab