diff --git a/src/calng/AgipdCorrection.py b/src/calng/AgipdCorrection.py
index 2c1c5537e2b51d175e7da5f85dbc8bfb6d7f6d95..a2a0779799adc84a861113fc8ede9db9dc92ae50 100644
--- a/src/calng/AgipdCorrection.py
+++ b/src/calng/AgipdCorrection.py
@@ -8,7 +8,7 @@ from karabo.common.states import State
 from . import shmem_utils, utils
 from ._version import version as deviceVersion
 from .base_correction import BaseCorrection
-from .agipd_gpu import AgipdGpuRunner
+from .agipd_gpu import AgipdGpuRunner, CorrectionFlags
 
 
 @KARABO_CLASSINFO("AgipdCorrection", deviceVersion)
@@ -54,7 +54,7 @@ class AgipdCorrection(BaseCorrection):
         super().__init__(config)
         output_axis_order = config.get("dataFormat.outputAxisOrder")
         if output_axis_order == "pixels-fast":
-            self._output_transpose = (0, 2, 1)
+            self._output_transpose = None
         elif output_axis_order == "memorycells-fast":
             self._output_transpose = (2, 1, 0)
         else:
@@ -68,6 +68,7 @@ class AgipdCorrection(BaseCorrection):
             self.pulse_filter,
             self._output_transpose,
         )
+        self._cached_constants = {}
 
         self.updateState(State.ON)
 
@@ -103,8 +104,6 @@ class AgipdCorrection(BaseCorrection):
         # original shape: memory_cell, data/raw_gain, x, y
         # TODO: consider making paths configurable
         image_data = data.get("image.data")
-        self.log.INFO(f"Image data had shape: {image_data.shape}")
-        return
         if image_data.shape[0] != self.get("dataFormat.memoryCells"):
             self.set(
                 "status", f"Updating input shapes based on received {image_data.shape}"
@@ -127,7 +126,7 @@ class AgipdCorrection(BaseCorrection):
         do_generate_preview = train_id % self.get(
             "preview.trainIdModulo"
         ) == 0 and self.get("preview.enable")
-        can_apply_correction = correction_cell_num > 0
+        can_apply_correction = True
         do_apply_correction = self.get("applyCorrection")
 
         if not self.get("state") is State.PROCESSING:
@@ -165,7 +164,12 @@ class AgipdCorrection(BaseCorrection):
             buffer_handle, buffer_array = self._shmem_buffer.next_slot()
             if do_apply_correction:
                 self.gpu_runner.load_cell_table(cell_table)
-                self.gpu_runner.correct()
+                self.gpu_runner.correct(
+                    CorrectionFlags.THRESHOLD
+                    | CorrectionFlags.OFFSET
+                    | CorrectionFlags.REL_GAIN_PC
+                    | CorrectionFlags.REL_GAIN_XRAY
+                )
             else:
                 self.gpu_runner.only_cast()
             self.gpu_runner.reshape(out=buffer_array)
@@ -221,19 +225,21 @@ class AgipdCorrection(BaseCorrection):
         if self.get("performance.rateUpdateOnEachInput"):
             self._update_actual_rate()
 
-    def constantLoaded(self):
-        """Hook from CalibrationReceiverBaseDevice called after each getConstant
-
-        Here, used to load the received constants (or correction maps derived
-        fromt them) onto GPU.
-
-        TODO: call after receiving *all* constants instead of calling once per
-        new constant (will cause some overhead for bigger devices)
-
-        """
-
-        self.log.WARN("Not ready to handle constants yet")
-        ...
+    def requestConstant(self, name, mostRecent=False, tryRemote=True):
+        """constantLoaded hook would have gotten called without naming which constant,
+        so here we go. Ugly hooking it."""
+        # TODO: clear from device, too
+        # TODO: update correction capability flag
+        if name in self._cached_constants:
+            del self._cached_constants[name]
+        super().requestConstant(name, mostRecent, tryRemote)
+        constant = self.getConstant(name)
+        if constant is not None:
+            self._cached_constants[name] = constant
+            if name == "ThresholdsDark":
+                self.gpu_runner.load_thresholds(constant)
+            elif name == "Offset":
+                self.gpu_runner.load_offset_map(constant)
 
     def _update_pulse_filter(self, filter_string):
         """Called whenever the pulse filter changes, typically followed by
@@ -251,7 +257,8 @@ class AgipdCorrection(BaseCorrection):
     ):
         """(Re)initialize (GPU) buffers according to expected data shapes"""
 
-        input_data_shape = (memory_cells, 1, pixels_y, pixels_x)
+        # TODO: report "actual" input shape (incl. raw gain)
+        input_data_shape = (memory_cells, pixels_x, pixels_y)
         # reflect the axis reordering in the expected output shape
         output_data_shape = utils.shape_after_transpose(
             input_data_shape, output_transpose
@@ -276,6 +283,7 @@ class AgipdCorrection(BaseCorrection):
             pixels_x,
             pixels_y,
             memory_cells,
+            constant_memory_cells=250,
             output_transpose=output_transpose,
             input_data_dtype=self.input_data_dtype,
             output_data_dtype=self.output_data_dtype,
diff --git a/src/calng/agipd_gpu.py b/src/calng/agipd_gpu.py
index 1d8d2d150ce3e22ac32f4166b42411fba1b1ba31..3c6f010c60261bfee8c9eef7fe7eb327d76d8abc 100644
--- a/src/calng/agipd_gpu.py
+++ b/src/calng/agipd_gpu.py
@@ -56,6 +56,12 @@ class AgipdGpuRunner(base_gpu.BaseGpuRunner):
 
         self.update_block_size((1, 1, 64))
 
+    def _preview_preprocess_raw(self):
+        return self.input_data_gpu[:, 0]
+
+    def _preview_preprocess_corr(self):
+        return self.processed_data_gpu
+
     def load_thresholds(self, threshold_map):
         # shape: y, x, memory cell, threshold 0 / threshold 1 / 3 gain values
         # TODO: do we need the gain values (in the constant) for anything?
diff --git a/src/calng/base_gpu.py b/src/calng/base_gpu.py
index ef8d3d3cf5609fc066dc27d2c91dccf31954271d..486043499683a6e7b5a352bb3f44286d759d3aa2 100644
--- a/src/calng/base_gpu.py
+++ b/src/calng/base_gpu.py
@@ -72,6 +72,13 @@ class BaseGpuRunner:
             self.preview_shape, dtype=np.float32
         )
 
+    # functions to get data from respective buffers to cell, x, y shape for preview computation
+    def _preview_preprocess_raw():
+        raise NotImplementedError()
+
+    def _preview_preprocess_corr():
+        raise NotImplementedError()
+
     def only_cast(self):
         """Like correct without the correction
 
@@ -137,13 +144,14 @@ class BaseGpuRunner:
             # if not have_corrected and not can_correct, assume only_cast already done
 
         # TODO: enum around reduction type
-        for (image_data, output_buffer) in (
-            (self.input_data_gpu, self.preview_raw),
-            (self.processed_data_gpu, self.preview_corrected),
+        for (preprocces, output_buffer) in (
+            (self._preview_preprocess_raw, self.preview_raw),
+            (self._preview_preprocess_corr, self.preview_corrected),
         ):
+            image_data = preprocces()
             if preview_index >= 0:
                 # TODO: change axis order when moving reshape to after correction
-                image_data[preview_index].astype(np.float32).transpose().get(
+                image_data[preview_index].astype(np.float32).get(
                     out=output_buffer
                 )
             elif preview_index == -1:
@@ -152,12 +160,12 @@ class BaseGpuRunner:
                 max_index = cupy.argmax(
                     cupy.sum(image_data, axis=(1, 2), dtype=cupy.float32)
                 )
-                image_data[max_index].astype(np.float32).transpose().get(
+                image_data[max_index].astype(np.float32).get(
                     out=output_buffer
                 )
             elif preview_index in (-2, -3, -4):
                 stat_fun = {-2: cupy.mean, -3: cupy.sum, -4: cupy.std}[preview_index]
-                stat_fun(image_data, axis=0, dtype=cupy.float32).transpose().get(
+                stat_fun(image_data, axis=0, dtype=cupy.float32).get(
                     out=output_buffer
                 )
         return self.preview_raw, self.preview_corrected
diff --git a/src/calng/dssc_gpu.py b/src/calng/dssc_gpu.py
index 38980e78235dd07ea8d652105e57403ba55362ab..cd7e2d80595fe6c252cc497b1b4c86d6f8aaa007 100644
--- a/src/calng/dssc_gpu.py
+++ b/src/calng/dssc_gpu.py
@@ -37,6 +37,12 @@ class DsscGpuRunner(base_gpu.BaseGpuRunner):
 
         self.update_block_size((1, 1, 64))
 
+    def _preview_preprocess_raw(self):
+        return cupy.transpose(self.input_data_gpu, (0, 2, 1))
+
+    def _preview_preprocess_corr(self):
+        return cupy.transpose(self.processed_data_gpu, (0, 2, 1))
+
     def load_constants(self, offset_map):
         constant_memory_cells = offset_map.shape[-1]
         if constant_memory_cells != self.constant_memory_cells: