From b5d0586c9880afeeea60ddc06d93ade7e2d41e2f Mon Sep 17 00:00:00 2001
From: Thomas Kluyver <thomas@kluyver.me.uk>
Date: Thu, 28 Apr 2022 13:39:46 +0100
Subject: [PATCH] Hopefully speed up baseline correction by stripe method

---
 src/cal_tools/agipdutils.py | 84 ++++++++++++++++++++++---------------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/src/cal_tools/agipdutils.py b/src/cal_tools/agipdutils.py
index a5d7cb628..916e5c868 100644
--- a/src/cal_tools/agipdutils.py
+++ b/src/cal_tools/agipdutils.py
@@ -82,13 +82,42 @@ def assemble_constant_dict(
     return const_dict
 
 
+# contiguous_regions() by Joe Kington on Stackoverflow
+# https://stackoverflow.com/a/4495197/434217
+# Used here under Stackoverflow's default CC-BY-SA 3.0 license.
+def contiguous_regions(condition):
+    """Finds contiguous True regions of the boolean array "condition". Returns
+    a 2D array where the first column is the start index of the region and the
+    second column is the end index."""
+
+    # Find the indices of changes in "condition"
+    d = np.diff(condition)
+    idx, = d.nonzero()
+
+    # We need to start things after the change in "condition". Therefore,
+    # we'll shift the index by 1 to the right.
+    idx += 1
+
+    if condition[0]:
+        # If the start of condition is True prepend a 0
+        idx = np.r_[0, idx]
+
+    if condition[-1]:
+        # If the end of condition is True, append the length of the array
+        idx = np.r_[idx, condition.size] # Edit
+
+    # Reshape the result into two columns
+    idx.shape = (-1,2)
+    return idx
+
+
 def get_shadowed_stripe(data, threshold, fraction):
     """
-    Return list of shadowed regions.
+    Return 1D bool array of which pixel rows are shadowed.
     Shadowed region is presented by the list of lines of pixels along
     numpy axis 1. Regions are defined with respect of threshold
     and fraction. Margin of one pixel is used.
-    Double-pixels are stored in separate regions
+    Double-pixels are excluded, so only regular square pixels are used.
 
     :param data: (numpy.ndarray, rank=2) offset corrected single image
     :param threshold: (float) a threshold, below which
@@ -96,34 +125,27 @@ def get_shadowed_stripe(data, threshold, fraction):
     :param fraction: (float) a fraction of pixels in a line along axis 1
     below which a full line is considered as dark
     """
-
+    # Identify rows with > fraction of their pixels < threshold
     npx_all = np.count_nonzero(~np.isnan(data), axis=1)
     npx_sh = np.count_nonzero(data < threshold, axis=1)
+    rows_to_use = npx_sh / npx_all > fraction
 
-    A = np.array(np.where(npx_sh / npx_all > fraction)[0])
+    # Ignore rows with double-width pixels
+    rows_to_use[64:512:64] = False
+    rows_to_use[63:511:64] = False
 
-    dp_idx = np.arange(64, 512, 64)
-    dp_idx = np.sort(np.hstack((dp_idx, dp_idx - 1)))
+    # TODO: is this necessary?
+    # The previous code here and in baseline_correct_via_stripe ignored any
+    # stripes <= 2 pixels width. Is this by design, or just an accident of
+    # ignoring the double-width pixels (which are in stripes of 2 together)?
+    # For now, I'll preserve the behaviour. -TK
+    shadow_bounds = contiguous_regions(rows_to_use)
+    stripe_widths = shadow_bounds[:, 1] - shadow_bounds[:, 0]
+    stripes_too_narrow = shadow_bounds[stripe_widths <= 2]
+    for start, end in stripes_too_narrow:
+        rows_to_use[start:end] = False
 
-    # grouping indices
-    sh_idx = []
-    tmp_idx = []
-    for idx, i in enumerate(A[1:-1]):
-        if i - 1 not in A:
-            continue
-        if not tmp_idx:
-            tmp_idx.append(i)
-            continue
-        if tmp_idx[-1] + 1 == i and (
-                (tmp_idx[-1] in dp_idx) == (i in dp_idx)) and (i + 1 in A):
-            tmp_idx.append(i)
-            if i != A[-2]:
-                continue
-        if len(tmp_idx) > 1:
-            sh_idx.append(list(tmp_idx))
-        tmp_idx = [i]
-
-    return sh_idx
+    return rows_to_use
 
 
 def baseline_correct_via_stripe(d, g, m, frac_high_med):
@@ -149,18 +171,12 @@ def baseline_correct_via_stripe(d, g, m, frac_high_med):
     dd[g != 0] = np.nan  # only high gain data
     dd[m != 0] = np.nan  # only good pixels
 
-    sh_idxs = get_shadowed_stripe(dd, 30, 0.95)
-
-    # collect all shadowed regions excluding double pixels
-    idx = []
-    for sh_idx in sh_idxs:
-        if len(sh_idx) > 2:
-            idx += sh_idx
+    sh_rows = get_shadowed_stripe(dd, 30, 0.95)
 
-    if len(idx) < 3:
+    if sh_rows.sum() < 3:
         return d, 0
 
-    shift = np.nanmedian(dd[idx, :])
+    shift = np.nanmedian(dd[sh_rows, :])
     d[g == 0] -= shift
     d[g == 1] -= shift / frac_high_med
     return d, shift
-- 
GitLab