diff --git a/src/calng/Gotthard2Correction.py b/src/calng/Gotthard2Correction.py index 49cd9e01a70e22280b9add36f1aee19a279bb052..d03eeb83a52f003045414503d365cbcac6424b6f 100644 --- a/src/calng/Gotthard2Correction.py +++ b/src/calng/Gotthard2Correction.py @@ -200,6 +200,7 @@ class Gotthard2Correction(BaseCorrection): _image_data_path = "data.adc" _cell_table_path = "data.memoryCell" _warn_memory_cell_range = False # for now, receiver always writes 255 + _cuda_pin_buffers = False @staticmethod def expectedParameters(expected): diff --git a/src/calng/base_correction.py b/src/calng/base_correction.py index 111abe78e80c50cf3eaeb8fa77eca63d9a72ba42..ec2648279a96463bf78a3d8452089530a85b5348 100644 --- a/src/calng/base_correction.py +++ b/src/calng/base_correction.py @@ -197,6 +197,7 @@ class BaseCorrection(PythonDevice): _image_data_path = "image.data" # customize for *some* subclasses _cell_table_path = "image.cellId" _warn_memory_cell_range = True # can be disabled for some detectors + _cuda_pin_buffers = True def _load_constant_to_runner(self, constant_name, constant_data): """Subclass must define how to process constants into correction maps and store @@ -894,8 +895,9 @@ class BaseCorrection(PythonDevice): self.output_data_dtype, shmem_buffer_name, ) - self.log.INFO("Trying to pin the shmem buffer memory") - self._shmem_buffer.cuda_pin() + if self._cuda_pin_buffers: + self.log.INFO("Trying to pin the shmem buffer memory") + self._shmem_buffer.cuda_pin() self.log.INFO("Done, shmem buffer is ready") else: self._shmem_buffer.change_shape(self.output_data_shape) diff --git a/src/calng/shmem_utils.py b/src/calng/shmem_utils.py index 0a82cdf6eaa6a5e0bf065e6eda1812597db1f3d0..4c4838e21fbb6df786d83508fd3be7630f110d08 100644 --- a/src/calng/shmem_utils.py +++ b/src/calng/shmem_utils.py @@ -67,8 +67,6 @@ class ShmemCircularBuffer: self._buffer_ary = None self._update_shape(array_shape, dtype) self._cuda_pinned = False - # important for performance and pinning: touch memory to actually allocate - self._buffer_ary.fill(0) def _update_shape(self, array_shape, dtype): array_shape = tuple(array_shape) @@ -101,16 +99,16 @@ class ShmemCircularBuffer: def cuda_pin(self): import cupy + self._memory_pointer = self._buffer_ary.ctypes.get_data() cupy.cuda.runtime.hostRegister( - self._memory_pointer, - self._shared_memory.size, - 0 + self._memory_pointer, self._shared_memory.size, 0 ) def __del__(self): if self._cuda_pinned: import cupy + cupy.cuda.runtime.hostUnregister(self._memory_pointer) del self._buffer_ary del self._shared_memory