From cde3eebfa829caa0eec6758ac9824d0882d816de Mon Sep 17 00:00:00 2001
From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de>
Date: Fri, 20 Jan 2023 17:28:12 +0100
Subject: [PATCH] Added debugging messages, using parallelization by default,
 disabled slow and probably useless refined peak finding, added function to
 find match between two train IDs.

---
 pes_to_spec/model.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py
index 96e8756..ff640e1 100644
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -32,6 +32,11 @@ def matching_ids(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> np.ndarray:
     unique_ids = list(set(a).intersection(b).intersection(c))
     return np.array(unique_ids)
 
+def matching_two_ids(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+    """Returns list of train IDs common to sets a and b."""
+    unique_ids = list(set(a).intersection(b))
+    return np.array(unique_ids)
+
 class PromptNotFoundError(Exception):
     """
     Exception representing the error condition generated by not finding the prompt peak.
@@ -66,6 +71,7 @@ class HighResolutionSmoother(TransformerMixin, BaseEstimator):
 
         Returns: The object itself.
         """
+        print("Storing high resolution energy")
         self.energy = np.copy(fit_params["energy"])
         if len(self.energy.shape) == 2:
             self.energy = self.energy[0,:]
@@ -80,6 +86,7 @@ class HighResolutionSmoother(TransformerMixin, BaseEstimator):
 
         Returns: Smoothened out spectrum.
         """
+        print("Smoothing high-resolution spectrum")
         if self.high_res_sigma <= 0:
             return X
         # use a default energy axis is none is given
@@ -191,6 +198,7 @@ class SelectRelevantLowResolution(TransformerMixin, BaseEstimator):
 
         Returns: Concatenated and pre-processed low-resolution data of shape (train_id, features).
         """
+        print("Selecting area close to the peak")
         if self.tof_start is None:
             raise NotImplementedError("The low-resolution data cannot be transformed before the prompt has been identified. Call the fit function first.")
         items = [X[k] for k in self.channels]
@@ -220,11 +228,11 @@ class SelectRelevantLowResolution(TransformerMixin, BaseEstimator):
         best_guess = int(peak_idx[0])
         # look around this estimate for the maximum
         # this is probably not necessary
-        min_search = max(best_guess - 10, 0)
-        max_search = min(best_guess + 10, len(sum_low_res))
-        restricted_arr = sum_low_res[min_search:max_search]
-        improved_guess = min_search + int(np.argmax(restricted_arr))
-        return improved_guess
+        #min_search = max(best_guess - 10, 0)
+        #max_search = min(best_guess + 10, len(sum_low_res))
+        #restricted_arr = sum_low_res[min_search:max_search]
+        #improved_guess = min_search + int(np.argmax(restricted_arr))
+        return best_guess
 
     def fit(self, X: Dict[str, np.ndarray], y: Optional[Any]=None) -> TransformerMixin:
         """
@@ -236,6 +244,7 @@ class SelectRelevantLowResolution(TransformerMixin, BaseEstimator):
 
         Returns: The object itself.
         """
+        print("Estimating peak position")
         self.tof_start = self.estimate_prompt_peak(X)
         return self
 
@@ -427,7 +436,7 @@ def _fit_estimator(estimator, X: np.ndarray, y: np.ndarray):
 
 class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator):
 
-    def __init__(self, estimator, *, n_jobs=None):
+    def __init__(self, estimator, *, n_jobs=8):
         self.estimator = estimator
         self.n_jobs = n_jobs
 
@@ -449,12 +458,14 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator):
                 "multi-output regression but has only one."
             )
 
+        print(f"Fitting multiple regressors with n_jobs={self.n_jobs}")
         self.estimators_ = Parallel(n_jobs=self.n_jobs)(
             delayed(_fit_estimator)(
                 self.estimator, X, y[:, i]
             )
             for i in range(y.shape[1])
         )
+        print("End of fit")
 
         return self
 
@@ -469,6 +480,7 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator):
             Multi-output targets predicted across multiple predictors.
             Note: Separate models are generated for each predictor.
         """
+        print("Inferring ...")
         y = Parallel(n_jobs=self.n_jobs)(
             delayed(e.predict)(X, return_std) for e in self.estimators_
             #delayed(e.predict)(X) for e in self.estimators_
-- 
GitLab