From cf627845e69fbee844540ef3ebb8fdb681d3eba8 Mon Sep 17 00:00:00 2001
From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de>
Date: Wed, 30 Aug 2023 15:59:05 +0200
Subject: [PATCH] Many bug fixes.

---
 src/calng/CrystfelRunner.py | 166 ++++++++++++++++++++++++------------
 1 file changed, 111 insertions(+), 55 deletions(-)

diff --git a/src/calng/CrystfelRunner.py b/src/calng/CrystfelRunner.py
index 9950f88e..04064293 100644
--- a/src/calng/CrystfelRunner.py
+++ b/src/calng/CrystfelRunner.py
@@ -39,8 +39,8 @@ def reparse_fix_comma_mess(l):
     in the GUI actually allows a line to contain commas. Unfortunately, the string we
     are given when we get the property has these commas replaced with \\. I guess some
     escaping is broken. Anyway, TODO: report bug"""
-    res = []
-    acc = None
+    res = list()
+    acc = list()
     next_is_continuation = False
 
     for thing in l:
@@ -56,12 +56,28 @@ def reparse_fix_comma_mess(l):
             else:
                 if acc:
                     res.append(",".join(acc))
-                    acc = None
+                    acc = list()
                 res.append(thing)
             next_is_continuation = False
     if acc:
         res.append(",".join(acc))
-    return res
+
+    # Danilo: that is not sufficient! This bug is resistent!
+    # if an usilated number is found in the list, concatenate it with the
+    # previous element
+    final = list()
+    current_group = list()
+    for current in res:
+        if re.match(r"(\d+)", current):
+            current_group += [current]
+        else:
+            # flush what was there first
+            if len(current_group) > 0:
+                final += [",".join(current_group)]
+            current_group = [current]
+    if len(current_group) > 0:
+        final += [",".join(current_group)]
+    return final
 
 
 @KARABO_CLASSINFO("CrystfelRunner", "0.0")
@@ -110,6 +126,11 @@ class CrystfelRunner(PythonDevice):
             .assignmentMandatory()
             .commit(),
 
+            STRING_ELEMENT(expected)
+            .key("dataFormat.dataKey")
+            .assignmentMandatory()
+            .commit(),
+
             NODE_ELEMENT(expected)
             .key("crystfelStats")
             .commit(),
@@ -281,11 +302,20 @@ class CrystfelRunner(PythonDevice):
             .assignmentOptional()
             .defaultValue(
                 [
-                    "--indexing=mosflm",
-                    "--int-radius=3,5,7",
+                    "--pdb=/gpfs/exfel/exp/XMPL/201750/p700000/proc/r0030/hewl.cell",
+                    "--highres=1.6",
+                    "--peaks=peakfinder8",
+                    "--threshold=200",
+                    "--min-snr=5",
+                    "--min-pix-count=1",
+                    "--max-pix-count=20",
+                    "--indexing=xgandalf",
+                    "--multi",
                     "--local-bg-radius=3",
-                    "--max-res=1600",
-                    "--highres=0.4",
+                    "--int-radius=2,3,5",
+                    "--max-res=1200",
+                    "--min-peaks=6",
+                    "--no-non-hits-in-stream",
                 ]
             )
             .commit(),
@@ -378,7 +408,8 @@ class CrystfelRunner(PythonDevice):
 
         if self.unsafe_get("crystfelArgs.doPeakfinding"):
             # TODO: maybe support passing data even when there are already peaks
-            images = data_hash.get("image.data")
+            key = self.get("dataFormat.dataKey")
+            images = data_hash.get(key)
             num_frames = images.shape[0]
             num_modules = images.shape[1]
             # TODO: apply mask to data
@@ -474,9 +505,8 @@ class CrystfelRunner(PythonDevice):
         if len(streamOutputPath) == 0:
             # TODO: Is /tmp ok?
             # the goal is only to prevent
-            user = env["USER"]
             name = self.get("deviceId").replace("/", "_")
-            streamOutputPath = f"/tmp/{user}/out_{name}.stream"
+            streamOutputPath = f"/tmp/out_{name}.stream"
             deleteOutput = True
         args = [
             "indexamajig",
@@ -487,8 +517,22 @@ class CrystfelRunner(PythonDevice):
             streamOutputPath,
             "-g",
             self.get("crystfelArgs.geometryPath"),
-            "--no-mask-data",
         ]
+        # needs to come here
+        # For some reason, the --pdb option needs to come very early
+        # moving it later may cause a mess
+        # allow the user to set the options early on
+
+        # The option --int-radius must be set with the format --int-radius=2,3,4
+        # Therefore we cannot substitute commas in this argument setting with spaces
+        # Somehow Karabo substitutes ["--int-radius=3,4,5"] with ["--int-radius=3", "4", "5"] for Danilo
+        # and it adds \\ escape sequences for David H.
+        # both of those "effects" are handled in the hack function below
+        fixed_misc = reparse_fix_comma_mess(self.get("crystfelArgs.misc"))
+        args += fixed_misc
+
+        # this should override the --peaks option set by the user, if they
+        # want to use an external peakfinder
         if not self.get("crystfelArgs.doPeakfinding"):
             args.extend(
                 [
@@ -497,7 +541,11 @@ class CrystfelRunner(PythonDevice):
                 ]
             )
 
-        args.extend(reparse_fix_comma_mess(self.get("crystfelArgs.misc")))
+        # Danilo: Is this the only way?
+        args += [
+            "--no-mask-data",
+            ]
+
         self.set("crystfelArgs.commandline", " ".join(args))
 
         # TODO: cd somewhere for the indexamajig folder?
@@ -512,15 +560,15 @@ class CrystfelRunner(PythonDevice):
             env=env,
         )
 
-        # do something with stream file
-        self._tail_proc = subprocess.Popen(
-            ["tail", "-F", "-n", "+0", streamOutputPath],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            shell=False,
-            text=True,
-            bufsize=1,
-        )
+        ## do something with stream file
+        #self._tail_proc = subprocess.Popen(
+        #    ["tail", "-F", "-n", "+0", streamOutputPath],
+        #    stdout=subprocess.PIPE,
+        #    stderr=subprocess.PIPE,
+        #    shell=False,
+        #    text=True,
+        #    bufsize=1,
+        #)
 
         def collect_details():
             """This goes deeper in the output file structure to collect more information.
@@ -529,37 +577,6 @@ class CrystfelRunner(PythonDevice):
             results = dict()
 
             try:
-                # handle output stream
-                reading_chunk = False
-                n_reflections = list()
-                cell = list()
-                det_centre = list()
-                for line in self._tail_proc.stdout:
-                    if reading_chunk:
-                        if line.startswith('----- End chunk -----'):
-                            reading_chunk = False
-                        elif line.startswith('num_reflections'):
-                            n_reflections[-1] = int(line.split(" = ")[1])
-                        if "Cell parameters" in line:
-                            # Cell parameters 7.96703 7.95613 3.81936 nm, \
-                            # 90.15754 90.52371 90.09970 deg
-                            lsplit = line.split()
-                            a, b, c = [float(i) for i in lsplit[2:5]]
-                            alpha, beta, gamma = [float(i) for i in lsplit[6:9]]
-                            cell.append(np.array([a, b, c, alpha, beta, gamma]))
-                        if "predict_refine/det_shift" in line:
-                            # predict_refine/det_shift x = -0.013 y = -0.113 mm
-                            lsplit = line.split()
-                            det_x = float(lsplit[-5])*1e-3
-                            det_y = float(lsplit[-2])*1e-3
-                            det_centre += [np.array([det_x, det_y])]
-                    elif line.startswith('----- Begin chunk -----'):
-                        reading_chunk = True
-                        n_reflections.append(0)
-                n_reflections = np.array(n_reflections)
-                results['reflections'] = np.sum(n_reflections)
-                results['cell'] = np.stack(cell, axis=0)
-
                 # handle std. error output
                 stderr_re = re.compile(
                     r"(?P<images>\d+) images processed, (?P<hits>\d+) hits"
@@ -569,6 +586,42 @@ class CrystfelRunner(PythonDevice):
                 for line in self._crystfel_proc.stderr:
                     if (match := stderr_re.match(line)) is not None:
                         results.update(match.groupdict())
+                    print(line.strip())
+                for line in self._crystfel_proc.stdout:
+                    print(line.strip())
+
+                # handle output stream
+                reading_chunk = False
+                n_reflections = list()
+                cell = list()
+                det_centre = list()
+                with open(streamOutputPath, "r") as outFile:
+                    #for line in self._tail_proc.stdout:
+                    for line in outFile:
+                        if reading_chunk:
+                            if line.startswith('----- End chunk -----'):
+                                reading_chunk = False
+                            elif line.startswith('num_reflections'):
+                                n_reflections[-1] = int(line.split(" = ")[1])
+                            if "Cell parameters" in line:
+                                # Cell parameters 7.96703 7.95613 3.81936 nm, \
+                                # 90.15754 90.52371 90.09970 deg
+                                lsplit = line.split()
+                                a, b, c = [float(i) for i in lsplit[2:5]]
+                                alpha, beta, gamma = [float(i) for i in lsplit[6:9]]
+                                cell.append(np.array([a, b, c, alpha, beta, gamma]))
+                            if "predict_refine/det_shift" in line:
+                                # predict_refine/det_shift x = -0.013 y = -0.113 mm
+                                lsplit = line.split()
+                                det_x = float(lsplit[-5])*1e-3
+                                det_y = float(lsplit[-2])*1e-3
+                                det_centre += [np.array([det_x, det_y])]
+                        elif line.startswith('----- Begin chunk -----'):
+                            reading_chunk = True
+                            n_reflections.append(0)
+                n_reflections = np.array(n_reflections)
+                results['reflections'] = np.sum(n_reflections)
+                results['cell'] = np.stack(cell, axis=0)
 
                 n_data = results["images"]
                 if n_data <= 0:
@@ -592,9 +645,12 @@ class CrystfelRunner(PythonDevice):
                     f"crystfelStats", Hash(*itertools.chain.from_iterable(results.items()))
                 )
 
-                # ... and elete output stream file if requested:
+                # ... and delete output stream file if requested:
                 if deleteOutput:
-                    os.remove(streamOutputPath)
+                    try:
+                        os.remove(streamOutputPath)
+                    except:
+                        pass
 
         threading.Thread(target=collect_details, daemon=True).start()
 
-- 
GitLab