From 4e18965cce056221785dc838624f7aa7274dbe3b Mon Sep 17 00:00:00 2001 From: David Hammer <dhammer@mailbox.org> Date: Tue, 7 Dec 2021 20:08:10 +0100 Subject: [PATCH] Remove small redundancies --- src/calng/base_gpu.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/calng/base_gpu.py b/src/calng/base_gpu.py index 903cb1e3..c0619d0a 100644 --- a/src/calng/base_gpu.py +++ b/src/calng/base_gpu.py @@ -168,31 +168,28 @@ class BaseGpuRunner: return cupy.nanmax(image_data, axis=0).astype(cupy.float32).get() elif preview_index in (-2, -3, -4): stat_fun = { - -1: cupy.nanmax, -2: cupy.nanmean, -3: cupy.nansum, -4: cupy.nanstd, }[preview_index] return stat_fun(image_data, axis=0, dtype=cupy.float32).get() - def update_block_size(self, full_block, target_shape=None): - """Compute grid such that thread block grid covers target shape + def update_block_size(self, full_block): + """Set execution grid such that it covers processed_shape with full_blocks - Execution is scheduled with 3d "blocks" of CUDA threads, tuning can affect + Execution is scheduled with 3d "blocks" of CUDA threads. Tuning can affect performance. Correction kernels are "monolithic" for simplicity (i.e. each logical thread handles one entry in output data), so in each dimension we - parallelize, grid * block >= length. + parallelize, grid * block >= length to cover all entries. Note that individual kernels must themselves check whether they go out of bounds; grid dimensions get rounded up in case ndarray size is not multiple of block size. """ - if target_shape is None: - target_shape = self.processed_shape assert len(full_block) == 3 self.full_block = tuple(full_block) self.full_grid = tuple( utils.ceil_div(a_length, block_length) - for (a_length, block_length) in zip(target_shape, full_block) + for (a_length, block_length) in zip(self.processed_shape, full_block) ) -- GitLab