From cde3eebfa829caa0eec6758ac9824d0882d816de Mon Sep 17 00:00:00 2001 From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de> Date: Fri, 20 Jan 2023 17:28:12 +0100 Subject: [PATCH] Added debugging messages, using parallelization by default, disabled slow and probably useless refined peak finding, added function to find match between two train IDs. --- pes_to_spec/model.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index 96e8756..ff640e1 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -32,6 +32,11 @@ def matching_ids(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> np.ndarray: unique_ids = list(set(a).intersection(b).intersection(c)) return np.array(unique_ids) +def matching_two_ids(a: np.ndarray, b: np.ndarray) -> np.ndarray: + """Returns list of train IDs common to sets a and b.""" + unique_ids = list(set(a).intersection(b)) + return np.array(unique_ids) + class PromptNotFoundError(Exception): """ Exception representing the error condition generated by not finding the prompt peak. @@ -66,6 +71,7 @@ class HighResolutionSmoother(TransformerMixin, BaseEstimator): Returns: The object itself. """ + print("Storing high resolution energy") self.energy = np.copy(fit_params["energy"]) if len(self.energy.shape) == 2: self.energy = self.energy[0,:] @@ -80,6 +86,7 @@ class HighResolutionSmoother(TransformerMixin, BaseEstimator): Returns: Smoothened out spectrum. """ + print("Smoothing high-resolution spectrum") if self.high_res_sigma <= 0: return X # use a default energy axis is none is given @@ -191,6 +198,7 @@ class SelectRelevantLowResolution(TransformerMixin, BaseEstimator): Returns: Concatenated and pre-processed low-resolution data of shape (train_id, features). """ + print("Selecting area close to the peak") if self.tof_start is None: raise NotImplementedError("The low-resolution data cannot be transformed before the prompt has been identified. Call the fit function first.") items = [X[k] for k in self.channels] @@ -220,11 +228,11 @@ class SelectRelevantLowResolution(TransformerMixin, BaseEstimator): best_guess = int(peak_idx[0]) # look around this estimate for the maximum # this is probably not necessary - min_search = max(best_guess - 10, 0) - max_search = min(best_guess + 10, len(sum_low_res)) - restricted_arr = sum_low_res[min_search:max_search] - improved_guess = min_search + int(np.argmax(restricted_arr)) - return improved_guess + #min_search = max(best_guess - 10, 0) + #max_search = min(best_guess + 10, len(sum_low_res)) + #restricted_arr = sum_low_res[min_search:max_search] + #improved_guess = min_search + int(np.argmax(restricted_arr)) + return best_guess def fit(self, X: Dict[str, np.ndarray], y: Optional[Any]=None) -> TransformerMixin: """ @@ -236,6 +244,7 @@ class SelectRelevantLowResolution(TransformerMixin, BaseEstimator): Returns: The object itself. """ + print("Estimating peak position") self.tof_start = self.estimate_prompt_peak(X) return self @@ -427,7 +436,7 @@ def _fit_estimator(estimator, X: np.ndarray, y: np.ndarray): class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator): - def __init__(self, estimator, *, n_jobs=None): + def __init__(self, estimator, *, n_jobs=8): self.estimator = estimator self.n_jobs = n_jobs @@ -449,12 +458,14 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator): "multi-output regression but has only one." ) + print(f"Fitting multiple regressors with n_jobs={self.n_jobs}") self.estimators_ = Parallel(n_jobs=self.n_jobs)( delayed(_fit_estimator)( self.estimator, X, y[:, i] ) for i in range(y.shape[1]) ) + print("End of fit") return self @@ -469,6 +480,7 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator): Multi-output targets predicted across multiple predictors. Note: Separate models are generated for each predictor. """ + print("Inferring ...") y = Parallel(n_jobs=self.n_jobs)( delayed(e.predict)(X, return_std) for e in self.estimators_ #delayed(e.predict)(X) for e in self.estimators_ -- GitLab