From 276be77dea7703a5919056081b188be756519c40 Mon Sep 17 00:00:00 2001
From: Philipp Schmidt <philipp.schmidt@xfel.eu>
Date: Tue, 19 Oct 2021 10:07:13 +0200
Subject: [PATCH] Add native implementation for transposition of AGIPD
 constants

---
 setup.py                    |  4 +--
 src/cal_tools/agipdalgs.pyx | 66 +++++++++++++++++++++++++++++--------
 src/cal_tools/agipdlib.py   | 19 +++++++----
 3 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/setup.py b/setup.py
index 956173615..080642fa8 100644
--- a/setup.py
+++ b/setup.py
@@ -1,10 +1,10 @@
 from distutils.command.build import build
-from distutils.extension import Extension
 from subprocess import check_output
 
 import numpy
 from Cython.Build import cythonize
 from Cython.Distutils import build_ext
+from setuptools.extension import Extension
 from setuptools import find_packages, setup
 
 from src.xfel_calibrate.notebooks import notebooks
@@ -71,7 +71,7 @@ setup(
         "build": PreInstallCommand,
         "build_ext": build_ext,
     },
-    ext_modules=cythonize(ext_modules),
+    ext_modules=cythonize(ext_modules, language_level=3),
     install_requires=[
         "iCalibrationDB @ git+ssh://git@git.xfel.eu:10022/detectors/cal_db_interactive.git@2.0.9",  # noqa
         "XFELDetectorAnalysis @ git+ssh://git@git.xfel.eu:10022/karaboDevices/pyDetLib.git@2.5.6-2.10.0#subdirectory=lib",  # noqa
diff --git a/src/cal_tools/agipdalgs.pyx b/src/cal_tools/agipdalgs.pyx
index cacc0f549..8100ec9f5 100644
--- a/src/cal_tools/agipdalgs.pyx
+++ b/src/cal_tools/agipdalgs.pyx
@@ -1,11 +1,51 @@
 import numpy as np
 
-cimport cython
+from cython cimport boundscheck, wraparound
+from cython.view cimport contiguous
+from cython.parallel import prange
 cimport numpy as cnp
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+# Separate fused types for input and output to allow for the
+# combination to occur.
+ctypedef fused inp_t:
+    int
+    unsigned int
+    float
+    double
+
+ctypedef fused outp_t:
+    int
+    unsigned int
+    float
+    double
+
+
+@wraparound(False)
+@boundscheck(False)
+def transpose_constant(outp_t[:, :, :, ::contiguous] dst,
+                       inp_t[:, :, :, ::contiguous] src,
+                       int num_threads=0):
+    """
+    Optimized and parallelized constant transposition.
+
+    Will work for any 4D array, but the iteration order is handpicked
+    to be fast for AGIPD constants.
+    """
+
+    cdef int y, x, c, g
+
+    # Optimized iteration (on INTEL) order via profiling, do not change
+    # and don't ask me why!
+    for c in prange(src.shape[2], nogil=True, num_threads=num_threads):
+        for y in range(src.shape[0]):
+            for x in range(src.shape[1]):
+                for g in range(src.shape[3]):
+                    dst[g, c, x, y] = <outp_t>src[y, x, c, g]
+
+
+@boundscheck(False)
+@wraparound(False)
 def histogram(cnp.ndarray[cnp.float32_t, ndim=2] data, range=(0,1), int bins=20,
            cnp.ndarray[cnp.float32_t, ndim=2] weights=None):
     """
@@ -38,8 +78,8 @@ def histogram(cnp.ndarray[cnp.float32_t, ndim=2] data, range=(0,1), int bins=20,
     return ret, np.linspace(min, max, bins+1)
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+@boundscheck(False)
+@wraparound(False)
 def histogram2d(cnp.ndarray[float, ndim=1] data_x, cnp.ndarray[float, ndim=1] data_y,
                 range=((0,1), (0,1)), bins=(20, 20)):
     """
@@ -75,8 +115,8 @@ def histogram2d(cnp.ndarray[float, ndim=1] data_x, cnp.ndarray[float, ndim=1] da
     return ret, np.linspace(min_x, max_x, bins_x+1), np.linspace(min_y, max_y, bins_y+1)
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+@boundscheck(False)
+@wraparound(False)
 def gain_choose(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.float32_t, ndim=4] choices):
     """Specialised fast equivalent of np.choose(), to select data for a per-pixel gain stage"""
     cdef int i, j, k
@@ -96,8 +136,8 @@ def gain_choose(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.float32_t, n
     return out
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+@boundscheck(False)
+@wraparound(False)
 def gain_choose_int(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.int32_t, ndim=4] choices):
     """Specialised fast equivalent of np.choose(), to select data for a per-pixel gain stage"""
     cdef int i, j, k
@@ -117,8 +157,8 @@ def gain_choose_int(cnp.ndarray[cnp.uint8_t, ndim=3] a, cnp.ndarray[cnp.int32_t,
     return out
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+@boundscheck(False)
+@wraparound(False)
 def sum_and_count_in_range_asic(cnp.ndarray[float, ndim=4] arr, float lower, float upper):
     """
     Return the sum & count of values where lower <= x <= upper,
@@ -148,8 +188,8 @@ def sum_and_count_in_range_asic(cnp.ndarray[float, ndim=4] arr, float lower, flo
     return sum_, count
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+@boundscheck(False)
+@wraparound(False)
 def sum_and_count_in_range_cell(cnp.ndarray[float, ndim=4] arr, float lower, float upper):
     """
     Return the sum & count of values where lower <= x <= upper,
diff --git a/src/cal_tools/agipdlib.py b/src/cal_tools/agipdlib.py
index 3d9f17a7e..fb6f6b6ac 100644
--- a/src/cal_tools/agipdlib.py
+++ b/src/cal_tools/agipdlib.py
@@ -1,3 +1,4 @@
+import os
 import posixpath
 import traceback
 import zlib
@@ -1143,10 +1144,16 @@ class AgipdCorrections:
         :return:
         """
 
-        self.offset[module_idx][...] = cons_data["Offset"].transpose()[...]
-        self.noise[module_idx][...] = cons_data["Noise"].transpose()[...]
+        # Distribute threads for transposition evenly across all modules
+        # assuming this method runs in parallel.
+        calgs_opts = dict(num_threads=os.cpu_count() // len(self.offset))
+
+        calgs.transpose_constant(self.offset[module_idx], cons_data['Offset'], **calgs_opts)
+        calgs.transpose_constant(self.noise[module_idx], cons_data['Noise'], **calgs_opts)
         if self.gain_mode is AgipdGainMode.ADAPTIVE_GAIN:
-            self.thresholds[module_idx][...] = cons_data["ThresholdsDark"].transpose()[:3,...]  # noqa
+            calgs.transpose_constant(self.thresholds[module_idx],
+                                     cons_data['ThresholdsDark'][..., :3],
+                                     **calgs_opts)
 
         if self.corr_bools.get("low_medium_gap"):
             t0 = self.thresholds[module_idx][0]
@@ -1208,7 +1215,7 @@ class AgipdCorrections:
             rel_gain = np.ones((128, 512, self.max_cells, 3), np.float32)
 
             if when["SlopesPC"]:
-                slopesPC = cons_data["SlopesPC"].astype(np.float32)
+                slopesPC = cons_data["SlopesPC"].astype(np.float32, copy=False)
 
                 # This will handle some historical data in a different format
                 # constant dimension injected first
@@ -1283,10 +1290,10 @@ class AgipdCorrections:
                 frac_high_med = np.ones((self.max_cells,), np.float32)
 
             self.md_additional_offset[module_idx][...] = md_additional_offset.transpose()[...]  # noqa
-            self.rel_gain[module_idx][...] = rel_gain[...].transpose()
+            calgs.transpose_constant(self.rel_gain[module_idx], rel_gain, **calgs_opts)
             self.frac_high_med[module_idx][...] = frac_high_med
 
-        self.mask[module_idx][...] = bpixels.transpose()[...]
+        calgs.transpose_constant(self.mask[module_idx], bpixels, **calgs_opts)
 
         return
 
-- 
GitLab