diff --git a/src/calng/JungfrauCorrection.py b/src/calng/JungfrauCorrection.py
index b2c8b8cd797dfd6d92e8ac6157b5c4595aada9e0..1a0b20c9fefa733c947862e953fa35dcdbe23eba 100644
--- a/src/calng/JungfrauCorrection.py
+++ b/src/calng/JungfrauCorrection.py
@@ -36,11 +36,12 @@ class CorrectionFlags(enum.IntFlag):
     NONE = 0
     OFFSET = 1
     REL_GAIN = 2
+    BPMASK = 4
 
 
 class JungfrauGpuRunner(base_gpu.BaseGpuRunner):
-    _kernel_source_filename = "jungfrau_gpu_kernels.cpp"
-    _corrected_axis_order = ...  # TODO: get specs for jungfrau data
+    _kernel_source_filename = "jungfrau_gpu.cu"
+    _corrected_axis_order = "cyx"
 
     def __init__(
         self,
@@ -50,10 +51,13 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner):
         constant_memory_cells,
         input_data_dtype=cupy.uint16,
         output_data_dtype=cupy.float32,
+        bad_pixel_mask_value=cupy.nan,
+        burst_mode=False,
     ):
-        self.input_shape = ...
-        self.processed_shape = ...
-        super().__ini__(
+        self.burst_mode = burst_mode
+        self.input_shape = (memory_cells, pixels_y, pixels_x)
+        self.processed_shape = self.input_shape
+        super().__init__(
             pixels_x,
             pixels_y,
             memory_cells,
@@ -61,10 +65,28 @@ class JungfrauGpuRunner(base_gpu.BaseGpuRunner):
             input_data_dtype,
             output_data_dtype,
         )
-        self.map_shape = ...
+        self.map_shape = self.input_shape + (3,)
         # is jungfrau stuff gain mapped?
-        self.offset_map_gpu = cupy.zeros(..., dtype=cupy.float32)
-        self.rel_gain_map_gpu = cupy.ones(..., dtype=cupy.float32)
+        self.offset_map_gpu = cupy.zeros(self.map_shape, dtype=cupy.float32)
+        self.rel_gain_map_gpu = cupy.ones(self.map_shape, dtype=cupy.float32)
+        self.bad_pixel_map_gpu = cupy.zeros(self.map_shape, dtype=cupy.uint32)
+
+    def _init_kernels(self):
+        kernel_source = self._kernel_template.render(
+            {
+                "pixels_x": self.pixels_x,
+                "pixels_y": self.pixels_y,
+                "data_memory_cells": self.memory_cells,
+                "constant_memory_cells": self.constant_memory_cells,
+                "input_data_dtype": utils.np_dtype_to_c_type(self.input_data_dtype),
+                "output_data_dtype": utils.np_dtype_to_c_type(self.output_data_dtype),
+                "corr_enum": utils.enum_to_c_template(CorrectionFlags),
+                "burst_mode": self.burst_mode,
+            }
+        )
+        print(kernel_source)
+        self.source_module = cupy.RawModule(code=kernel_source)
+        self.correction_kernel = self.source_module.get_function("correct")
 
 
 class JungfrauCalcatFriend(calcat_utils.BaseCalcatFriend):
@@ -94,14 +116,17 @@ class JungfrauCalcatFriend(calcat_utils.BaseCalcatFriend):
             .key(f"{param_prefix}.pixelsX")
             .setNewDefaultValue(1024)
             .commit(),
+
             OVERWRITE_ELEMENT(schema)
             .key(f"{param_prefix}.pixelsY")
             .setNewDefaultValue(512)
             .commit(),
+
             OVERWRITE_ELEMENT(schema)
             .key(f"{param_prefix}.memoryCells")
             .setNewDefaultValue(1)
             .commit(),
+
             OVERWRITE_ELEMENT(schema)
             .key(f"{param_prefix}.biasVoltage")
             .setNewDefaultValue(90)
@@ -118,6 +143,7 @@ class JungfrauCalcatFriend(calcat_utils.BaseCalcatFriend):
             .defaultValue(350)
             .reconfigurable()
             .commit(),
+
             DOUBLE_ELEMENT(schema)
             .key(f"{param_prefix}.sensorTemperature")
             .displayedName("Sensor temperature")
@@ -126,6 +152,7 @@ class JungfrauCalcatFriend(calcat_utils.BaseCalcatFriend):
             .defaultValue(291)
             .reconfigurable()
             .commit(),
+
             DOUBLE_ELEMENT(schema)
             .key(f"{param_prefix}.gainSetting")
             .displayedName("Gain setting")
@@ -134,6 +161,14 @@ class JungfrauCalcatFriend(calcat_utils.BaseCalcatFriend):
             .defaultValue(0)
             .reconfigurable()
             .commit(),
+
+            STRING_ELEMENT(schema)
+            .key(f"{param_prefix}.gainMode")
+            .description("Gain mode (WIP)")
+            .assignmentOptional()
+            .defaultValue("dynamicgain")
+            .options("dynamicgain,fixgain1,fixgain2")
+            .commit(),
         )
         managed_keys.add(f"{param_prefix}.integrationTime")
         managed_keys.add(f"{param_prefix}.sensorTemperature")
@@ -161,10 +196,12 @@ class JungfrauCorrection(BaseCorrection):
     _correction_field_names = (
         ("offset", CorrectionFlags.OFFSET),
         ("relGain", CorrectionFlags.REL_GAIN),
+        ("badPixels", CorrectionFlags.BPMASK),
     )
     _kernel_runner_class = JungfrauGpuRunner
     _calcat_friend_class = JungfrauCalcatFriend
     _constant_enum_class = JungfrauConstants
+    _managed_keys = BaseCorrection._managed_keys.copy()
 
     @staticmethod
     def expectedParameters(expected):
@@ -174,6 +211,7 @@ class JungfrauCorrection(BaseCorrection):
             .key("dataFormat.memoryCells")
             .setNewDefaultValue(1)
             .commit(),
+
             OVERWRITE_ELEMENT(expected)
             .key("preview.selectionMode")
             .setNewDefaultValue("frame")
@@ -198,10 +236,8 @@ class JungfrauCorrection(BaseCorrection):
 
     @property
     def input_data_shape(self):
-        # TODO: check up on this
         return (
             self._schema_cache["dataFormat.memoryCells"],
-            1,
             self._schema_cache["dataFormat.pixelsX"],
             self._schema_cache["dataFormat.pixelsY"],
         )
diff --git a/src/calng/base_correction.py b/src/calng/base_correction.py
index 14a1b6b16ad8452042136cd72a9cbd05e7f5ac41..eadf0a815b46debe4dd7297c43b96bbc63bd86b8 100644
--- a/src/calng/base_correction.py
+++ b/src/calng/base_correction.py
@@ -213,6 +213,8 @@ class BaseCorrection(PythonDevice):
         "processingStateTimeout",
         "state",
     }  # subclass should be aware of cache, but does not need to extend
+    _image_data_path = "image.data"  # customize for *some* subclasses
+    _cell_table_path = "image.cellId"
 
     def _load_constant_to_runner(constant_name, constant_data):
         """Subclass must define how to process constants into correction maps and store
@@ -808,7 +810,7 @@ class BaseCorrection(PythonDevice):
         timestamp = Timestamp(Epochstamp(), Trainstamp(train_id))
         metadata = ChannelMetaData(source, timestamp)
         for channel_name, data in channel_data_pairs:
-            preview_hash.set("image.data", data)
+            preview_hash.set(self._image_data_path, data)
             channel = self.signalSlotable.getOutputChannel(channel_name)
             channel.write(preview_hash, metadata, False)
             channel.update()
@@ -954,7 +956,7 @@ class BaseCorrection(PythonDevice):
                 return
 
             train_id = metadata.getAttribute("timestamp", "tid")
-            cell_table = np.squeeze(data_hash.get("image.cellId"))
+            cell_table = np.squeeze(data_hash.get(self._cell_table_path))
             if len(cell_table.shape) == 0:
                 self.log_status_warn(
                     "cellId had 0 dimensions. DAQ may not be sending data."
@@ -975,7 +977,7 @@ class BaseCorrection(PythonDevice):
                     "corrected."
                 )
 
-            image_data = data_hash.get("image.data")
+            image_data = data_hash.get(self._image_data_path)
             if image_data.shape[0] != self._schema_cache["dataFormat.memoryCells"]:
                 self.log_status_info(
                     f"Updating new input shape {image_data.shape}, updating buffers"
diff --git a/src/calng/kernels/jungfrau_gpu.cu b/src/calng/kernels/jungfrau_gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2c84628f376c711331e63cbdbd630382f998c664
--- /dev/null
+++ b/src/calng/kernels/jungfrau_gpu.cu
@@ -0,0 +1,91 @@
+#include <cuda_fp16.h>
+
+{{corr_enum}}
+
+extern "C" {
+	/*
+	  TODO
+	  Shape of input data: memory cell, y, x
+	  Shape of offset constant: x, y, memory cell
+	*/
+	__global__ void correct(const {{input_data_dtype}}* data, // shape: memory cell, y, x
+	                        const unsigned char* gain_stage, // same shape
+	                        const unsigned char* cell_table,
+	                        const unsigned char corr_flags,
+	                        const float* offset_map,
+	                        const float* rel_gain_map,
+	                        const unsigned int bad_pixel_map,
+	                        const float bad_pixel_mask_value,
+	                        {{output_data_dtype}}* output) {
+		const size_t X = {{pixels_x}};
+		const size_t Y = {{pixels_y}};
+		const size_t memory_cells = {{data_memory_cells}};
+		const size_t map_memory_cells = {{constant_memory_cells}};
+
+		const size_t memory_cell = blockIdx.x * blockDim.x + threadIdx.x;
+		const size_t y = blockIdx.y * blockDim.y + threadIdx.y;
+		const size_t x = blockIdx.z * blockDim.z + threadIdx.z;
+
+		if (memory_cell >= memory_cells || y >= Y || x >= X) {
+			return;
+		}
+
+		// note: strides differ from numpy strides because unit here is sizeof(...), not byte
+		const size_t data_stride_x = 1;
+		const size_t data_stride_y = X * data_stride_x;
+		const size_t data_stride_cell = Y * data_stride_y;
+		const size_t data_index = memory_cell * data_stride_cell +
+			y * data_stride_y +
+			x * data_stride_x;
+		float res = (float)data[data_index];
+
+		// gain mapped constant shape: cell, y, x, gain_level (dim size 3)
+		const size_t map_stride_gain = 1;
+		const size_t map_stride_x = 3 * map_stride_gain;
+		const size_t map_stride_y = X * map_stride_x;
+		const size_t map_stride_cell = Y * map_stride_y;
+
+		{% if burst_mode %}
+		const size_t map_cell = cell_table[memory_cell];
+		{% else %}
+		const size_t map_cell = 0;
+		{% endif %}
+
+		if (map_cell < map_memory_cells) {
+			unsigned char gain = gain_stage[data_index];
+
+			{% if burst_mode %}
+			if (gain == 2) {
+				gain = 1;
+			} else if (gain == 3) {
+				gain = 2;
+			}
+			{% else %}
+			if (gain == 3) {
+				gain = 2;
+			}
+			{% endif %}
+
+			const size_t map_index = map_cell * map_stride_cell +
+				y * map_stride_y +
+				x * map_stride_x +
+				gain * map_stride_gain;
+			if ((corr_flags & BPMASK) && bad_pixel_map[map_index]) {
+				res = bad_pixel_mask_value;
+			} else {
+				if (corr_flags & OFFSET) {
+					res -= offset_map[map_index];
+				}
+				if (corr_flags & GAIN) {
+					res /= gain_map[map_index];
+				}
+			}
+		}
+
+		{% if output_data_dtype == "half" %}
+		output[data_index] = __float2half(res);
+		{% else %}
+		output[data_index] = ({{output_data_dtype}})res;
+		{% endif %}
+	}
+}