diff --git a/src/calng/DetectorAssembler.py b/src/calng/DetectorAssembler.py index 0259846f4f7a09e16457bbbf26233b7cccbdad09..9fc0dc68ca430f0fd08e2bd66dbfa19cd995b063 100644 --- a/src/calng/DetectorAssembler.py +++ b/src/calng/DetectorAssembler.py @@ -200,6 +200,8 @@ class DetectorAssembler(TrainMatcher.TrainMatcher): self._path_to_stack = self.get("pathToStack") self._geometry = None self._stack_input_buffer = None + self._position_output_buffer = None + self._extra_shape = () self.KARABO_SLOT(self.requestScene) @@ -256,9 +258,6 @@ class DetectorAssembler(TrainMatcher.TrainMatcher): return self._geometry = geom_utils.deserialize_geometry(serialized_geometry) # TODO: allow multiple memory cells (extra geom notion of extra dimensions) - self._stack_input_buffer = np.zeros( - self._geometry.expected_data_shape, dtype=np.float32 - ) def on_matched_data(self, train_id, sources): if self._geometry is None: @@ -271,7 +270,26 @@ class DetectorAssembler(TrainMatcher.TrainMatcher): self.unsafe_get("outputForBridgeOutput") ) - module_indices_unfilled = set(range(self._stack_input_buffer.shape[0])) + # check and maybe update stacking, output buffers + input_shape = next(iter(sources.values()))[0].get(self._path_to_stack).shape + input_extra_shape = input_shape[:-2] + if self._stack_input_buffer is None or input_extra_shape != self._extra_shape: + self._extra_shape = input_extra_shape + self._stack_input_buffer = np.zeros( + self._extra_shape + self._geometry.expected_data_shape, + dtype=np.float32 + ) + self._position_output_buffer = self._geometry.output_array_for_position_fast( + extra_shape=self._extra_shape, dtype=np.float32 + ) + self.log.INFO( + f"Updating stacking buffer to shape: {self._stack_input_buffer.shape}" + ) + self.log.INFO( + f"Updating output buffer to shape: {self._position_output_buffer.shape}" + ) + + module_indices_unfilled = set(range(self._geometry.n_modules)) earliest_source_timestamp = float("inf") for source, (data, source_timestamp) in sources.items(): # regular TrainMatcher output @@ -282,7 +300,7 @@ class DetectorAssembler(TrainMatcher.TrainMatcher): # prepare for assembly # TODO: handle failure to "parse" source, get data out module_index = self._source_to_index(source) - self._stack_input_buffer[module_index] = data.get( + self._stack_input_buffer[..., module_index, :, :] = data.get( self._path_to_stack ).astype(np.float32, copy=False) # TODO: set dtype based on input? module_indices_unfilled.discard(module_index) @@ -299,7 +317,9 @@ class DetectorAssembler(TrainMatcher.TrainMatcher): # TODO: configurable treatment of missing modules # TODO: reusable output buffer to save on allocation - assembled, _ = self._geometry.position_modules_fast(self._stack_input_buffer) + assembled, _ = self._geometry.position_modules_fast( + self._stack_input_buffer, out=self._position_output_buffer + ) # TODO: optionally include control data output_hash = Hash( @@ -330,7 +350,7 @@ class DetectorAssembler(TrainMatcher.TrainMatcher): "image.data", ImageData( # TODO: get around this being mirrored... - assembled[::-1, ::-1], + assembled[..., ::-1, ::-1], Dims(*assembled.shape), Encoding.GRAY, bitsPerPixel=32, @@ -390,22 +410,22 @@ class DetectorAssembler(TrainMatcher.TrainMatcher): def downsample_2d(arr, factor, reduction_fun=np.nanmax): """Generalization of downsampling from FemDataAssembler - Expects first two dimensions of arr to be multiple of 2 ** factor + Expects last two dimensions of arr to be multiple of 2 ** factor Useful if you're sitting at home and ssh connection is slow to get full-resolution previews.""" for i in range(factor // 2): arr = reduction_fun( ( - arr[:-1:2], - arr[1::2], + arr[..., :-1:2, :], + arr[..., 1::2, :], ), axis=0, ) arr = reduction_fun( ( - arr[:, :-1:2], - arr[:, 1::2], + arr[..., :-1:2], + arr[..., 1::2], ), axis=0, ) diff --git a/src/calng/base_correction.py b/src/calng/base_correction.py index 3f58ca800c5e2a5c1837a2fbb0bd51711db4b43d..31b5bf3369f67201f600efd180c2ea8d31bfd26e 100644 --- a/src/calng/base_correction.py +++ b/src/calng/base_correction.py @@ -496,13 +496,15 @@ class BaseCorrection(PythonDevice): .displayedName("Index (or stat) for preview") .description( "If this value is ≥ 0, the corresponding index (frame, cell, or pulse) " - "will be sliced for the preview output. If this value is < 0, preview " - "will be one of the following stats: -1: max, -2: mean, -3: sum, -4: " - "stdev. These stats are computed across memory cells." + "will be sliced for the preview output. If -4 ≤ this value ≤ -1, " + "preview will be one of the following stats: -1: max, -2: mean, -3: " + "sum, -4: stdev. These stats are computed across memory cells. " + "Finally, -5 will cause all memory cells to be sent. Be aware that " + "this can cause severe performance and bandwidth issues." ) .assignmentOptional() .defaultValue(0) - .minInc(-4) + .minInc(-5) .reconfigurable() .commit(), diff --git a/src/calng/base_gpu.py b/src/calng/base_gpu.py index 333eb053815cd643540cca94e3b1aa0c457adc37..f41d06a0846fd594768a2cd08effb8271aa3861a 100644 --- a/src/calng/base_gpu.py +++ b/src/calng/base_gpu.py @@ -145,7 +145,7 @@ class BaseGpuRunner: been called with the appropriate flags before compute_preview(...). """ - if preview_index < -4: + if preview_index < -5: raise ValueError(f"No statistic with code {preview_index} defined") elif preview_index >= self.memory_cells: raise ValueError(f"Memory cell index {preview_index} out of range") @@ -173,6 +173,8 @@ class BaseGpuRunner: -4: cupy.nanstd, }[preview_index] return stat_fun(image_data, axis=0, dtype=cupy.float32).get() + elif preview_index == -5: + return image_data.get() def update_block_size(self, full_block): """Set execution grid such that it covers processed_shape with full_blocks