diff --git a/src/calng/agipd_gpu.py b/src/calng/agipd_gpu.py
index df1bc811e4f804cde296ba578d9023ee6968a659..98d26c547e49f4a6bf30d12b937b74d17601d517 100644
--- a/src/calng/agipd_gpu.py
+++ b/src/calng/agipd_gpu.py
@@ -25,7 +25,7 @@ class AgipdGainMode(enum.IntEnum):
 
 
 class AgipdGpuRunner(base_gpu.BaseGpuRunner):
-    _kernel_source_filename = "agipd_gpu_kernels.cpp"
+    _kernel_source_filename = "agipd_gpu.cu"
     _corrected_axis_order = "cxy"
 
     def __init__(
diff --git a/src/calng/base_gpu.py b/src/calng/base_gpu.py
index 5a4821a0f954166587b5a1543efbf94570f5eb64..903cb1e35e46f03013b627d1a47032b30c969984 100644
--- a/src/calng/base_gpu.py
+++ b/src/calng/base_gpu.py
@@ -44,7 +44,7 @@ class BaseGpuRunner:
     ):
         _src_dir = pathlib.Path(__file__).absolute().parent
         # subclass must define _kernel_source_filename
-        with (_src_dir / self._kernel_source_filename).open("r") as fd:
+        with (_src_dir / "kernels" / self._kernel_source_filename).open("r") as fd:
             self._kernel_template = jinja2.Template(fd.read())
 
         self.pixels_x = pixels_x
diff --git a/src/calng/dssc_gpu.py b/src/calng/dssc_gpu.py
index 138e301647f3ed138422eeadc42cfc4438a27d6d..b9dc3b091c6e14374adddbb08a9ad4d04340e9f3 100644
--- a/src/calng/dssc_gpu.py
+++ b/src/calng/dssc_gpu.py
@@ -12,7 +12,7 @@ class CorrectionFlags(enum.IntFlag):
 
 
 class DsscGpuRunner(base_gpu.BaseGpuRunner):
-    _kernel_source_filename = "dssc_gpu_kernels.cpp"
+    _kernel_source_filename = "dssc_gpu.cu"
     _corrected_axis_order = "cyx"
 
     def __init__(
diff --git a/src/calng/agipd_gpu_kernels.cpp b/src/calng/kernels/agipd_gpu.cu
similarity index 100%
rename from src/calng/agipd_gpu_kernels.cpp
rename to src/calng/kernels/agipd_gpu.cu
diff --git a/src/calng/dssc_gpu_kernels.cpp b/src/calng/kernels/dssc_gpu.cu
similarity index 100%
rename from src/calng/dssc_gpu_kernels.cpp
rename to src/calng/kernels/dssc_gpu.cu