GH/JF: Decrease default shmem buffer size

876fbbe8 · David Hammer · 87feec34 · 876fbbe8 · 876fbbe8 · 876fbbe8
Commit 876fbbe8 authored 2 years ago by David Hammer
--- a/src/calng/Gotthard2Correction.py
+++ b/src/calng/Gotthard2Correction.py
@@ -184,6 +184,11 @@ class Gotthard2CalcatFriend(base_calcat.BaseCalcatFriend):
            .key(f"{param_prefix}.memoryCells")
            .setNewDefaultValue(2)
            .commit(),
+
+            OVERWRITE_ELEMENT(expected)
+            .key("outputShmemBufferSize")
+            .setNewDefaultValue(2)
+            .commit(),
        )

        base_calcat.add_status_schema_from_enum(

--- a/src/calng/JungfrauCorrection.py
+++ b/src/calng/JungfrauCorrection.py
@@ -172,6 +172,12 @@ class JungfrauCalcatFriend(base_calcat.BaseCalcatFriend):
            .key(f"{param_prefix}.biasVoltage")
            .setNewDefaultValue(90)
            .commit(),
+
+            # JUNGFRAU data is small, can fit plenty of trains in here
+            OVERWRITE_ELEMENT(expected)
+            .key("outputShmemBufferSize")
+            .setNewDefaultValue(2)
+            .commit(),
        )

        # add extra parameters

--- a/src/calng/base_gpu.py
+++ b/src/calng/base_gpu.py
@@ -7,32 +7,7 @@ import numpy as np
 from . import utils


-class BaseGpuRunner:
-    """Class to handle GPU buffers and execution of CUDA kernels on image data
-
-    All GPU buffers are kept within this class and it is intentionally very stateful.
-    This generally means that you will want to load data into it and then do something.
-    Typical usage in correct order:
-
-    1. instantiate
-    2. load constants
-    3. load_data
-    4. load_cell_table
-    5. correct
-    6a. reshape (only here does data transfer back to host)
-    6b. compute_preview (optional)
-
-    repeat from 2. or 3.
-
-    In case no constants are available / correction is not desired, can skip 3 and 4 and
-    pass CorrectionFlags.NONE to correct(...). Generally, user must handle which
-    correction steps are appropriate given the constants loaded so far.
-    """
-
-    # These must be set by subclass
-    _kernel_source_filename = None
-    _corrected_axis_order = None
-
+class BaseKernelRunner:
    def __init__(
        self,
        pixels_x,
@@ -42,11 +17,6 @@ class BaseGpuRunner:
        input_data_dtype=np.uint16,
        output_data_dtype=np.float32,
    ):
-        _src_dir = pathlib.Path(__file__).absolute().parent
-        # subclass must define _kernel_source_filename
-        with (_src_dir / "kernels" / self._kernel_source_filename).open("r") as fd:
-            self._kernel_template = jinja2.Template(fd.read())
-
        self.pixels_x = pixels_x
        self.pixels_y = pixels_y
        self.memory_cells = memory_cells
@@ -60,41 +30,20 @@ class BaseGpuRunner:
        self.input_data_dtype = input_data_dtype
        self.output_data_dtype = output_data_dtype

-        self._init_kernels()
-
-        # reuse buffers for input / output
-        self.cell_table_gpu = cupy.empty(self.memory_cells, dtype=np.uint16)
-        self.input_data_gpu = cupy.empty(self.input_shape, dtype=input_data_dtype)
-        self.processed_data_gpu = cupy.empty(
-            self.processed_shape, dtype=output_data_dtype
-        )
-        self.reshaped_data_gpu = None  # currently not reusing buffer
-
        # default preview layers: raw and corrected (subclass can extend)
        self.preview_buffer_getters = [
            self._get_raw_for_preview,
            self._get_corrected_for_preview,
        ]

-    # to get data from respective buffers to cell, x, y shape for preview computation
-    def _get_raw_for_preview(self):
-        """Should return view of self.input_data_gpu with shape (cell, x/y, x/y)"""
-        raise NotImplementedError()
-
-    def _get_corrected_for_preview(self):
-        """Should return view of self.processed_data_gpu with shape (cell, x/y, x/y)"""
-        raise NotImplementedError()
-
-    def flush_buffers(self):
-        """Optional reset GPU buffers (implement in subclasses which need this)"""
-        pass

    def correct(self, flags):
        """Correct (already loaded) image data according to flags

-        Subclass must define this method. It should assume that image data, cell table,
-        and other data (including constants) has already been loaded. It should
-        probably run some GPU kernel and output should go into self.processed_data_gpu.
+        Detector-specific subclass must define this method. It should assume that image
+        data, cell table, and other data (including constants) has already been loaded.
+        It should probably run some GPU kernel and output should go into
+        self.processed_data_gpu.

        Keep in mind that user only gets output from compute_preview or reshape
        (either of these should come after correct).
@@ -107,29 +56,18 @@ class BaseGpuRunner:
        """
        raise NotImplementedError()

-    def reshape(self, output_order, out=None):
-        """Move axes to desired output order and copy to host memory
-
-        The out parameter is passed directly to the get function of GPU array: if
-        None, then a new ndarray (in host memory) is returned. If not None, then data
-        will be loaded into the provided array, which must match shape / dtype.
-        """
-        # TODO: avoid copy
-        if output_order == self._corrected_axis_order:
-            self.reshaped_data_gpu = self.processed_data_gpu
-        else:
-            self.reshaped_data_gpu = cupy.transpose(
-                self.processed_data_gpu,
-                utils.transpose_order(self._corrected_axis_order, output_order),
-            )
-
-        return self.reshaped_data_gpu.get(out=out)
+    # to get data from respective buffers to cell, x, y shape for preview computation
+    def _get_raw_for_preview(self):
+        """Should return view of self.input_data_gpu with shape (cell, x/y, x/y)"""
+        raise NotImplementedError()

-    def load_data(self, raw_data):
-        self.input_data_gpu.set(raw_data)
+    def _get_corrected_for_preview(self):
+        """Should return view of self.processed_data_gpu with shape (cell, x/y, x/y)"""
+        raise NotImplementedError()

-    def load_cell_table(self, cell_table):
-        self.cell_table_gpu.set(cell_table)
+    def flush_buffers(self):
+        """Optional reset GPU buffers (implement in subclasses which need this)"""
+        pass

    def compute_previews(self, preview_index):
        """Generate single slice or reduction preview of raw and corrected data
@@ -174,6 +112,89 @@ class BaseGpuRunner:
            }[preview_index]
            return stat_fun(image_data, axis=0, dtype=cupy.float32).get()

+
+class BaseGpuRunner(base_kernel_runner):
+    """Class to handle GPU buffers and execution of CUDA kernels on image data
+
+    All GPU buffers are kept within this class and it is intentionally very stateful.
+    This generally means that you will want to load data into it and then do something.
+    Typical usage in correct order:
+
+    1. instantiate
+    2. load constants
+    3. load_data
+    4. load_cell_table
+    5. correct
+    6a. reshape (only here does data transfer back to host)
+    6b. compute_preview (optional)
+
+    repeat from 2. or 3.
+
+    In case no constants are available / correction is not desired, can skip 3 and 4 and
+    pass CorrectionFlags.NONE to correct(...). Generally, user must handle which
+    correction steps are appropriate given the constants loaded so far.
+    """
+
+    # These must be set by subclass
+    _kernel_source_filename = None
+    _corrected_axis_order = None
+
+    def __init__(
+        self,
+        pixels_x,
+        pixels_y,
+        memory_cells,
+        constant_memory_cells,
+        input_data_dtype=np.uint16,
+        output_data_dtype=np.float32,
+    ):
+        super().__init__(
+            pixels_x,
+            pixels_y,
+            memory_cells,
+            constant_memory_cells,
+            input_data_dtype,
+            output_data_dtype,
+        )
+        _src_dir = pathlib.Path(__file__).absolute().parent
+        # subclass must define _kernel_source_filename
+        with (_src_dir / "kernels" / self._kernel_source_filename).open("r") as fd:
+            self._kernel_template = jinja2.Template(fd.read())
+
+        self._init_kernels()
+
+        # reuse buffers for input / output
+        self.cell_table_gpu = cupy.empty(self.memory_cells, dtype=np.uint16)
+        self.input_data_gpu = cupy.empty(self.input_shape, dtype=input_data_dtype)
+        self.processed_data_gpu = cupy.empty(
+            self.processed_shape, dtype=output_data_dtype
+        )
+        self.reshaped_data_gpu = None  # currently not reusing buffer
+
+    def reshape(self, output_order, out=None):
+        """Move axes to desired output order and copy to host memory
+
+        The out parameter is passed directly to the get function of GPU array: if
+        None, then a new ndarray (in host memory) is returned. If not None, then data
+        will be loaded into the provided array, which must match shape / dtype.
+        """
+        # TODO: avoid copy
+        if output_order == self._corrected_axis_order:
+            self.reshaped_data_gpu = self.processed_data_gpu
+        else:
+            self.reshaped_data_gpu = cupy.transpose(
+                self.processed_data_gpu,
+                utils.transpose_order(self._corrected_axis_order, output_order),
+            )
+
+        return self.reshaped_data_gpu.get(out=out)
+
+    def load_data(self, raw_data):
+        self.input_data_gpu.set(raw_data)
+
+    def load_cell_table(self, cell_table):
+        self.cell_table_gpu.set(cell_table)
+
    def update_block_size(self, full_block):
        """Set execution grid such that it covers processed_shape with full_blocks