From eed28bdc3056a21bdc9f4ac1fabb51dd628ad265 Mon Sep 17 00:00:00 2001 From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de> Date: Tue, 10 Jan 2023 16:18:07 +0100 Subject: [PATCH] Allow for no smoothing. --- pes_to_spec/model.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index ffc01bb..7e49e89 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -8,7 +8,6 @@ from autograd import grad from scipy.signal import fftconvolve from scipy.signal import find_peaks_cwt from scipy.optimize import fmin_l_bfgs_b -from sklearn.preprocessing import StandardScaler from sklearn.decomposition import KernelPCA, PCA from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.base import TransformerMixin, BaseEstimator @@ -34,6 +33,7 @@ class PromptNotFoundError(Exception): def __str__(self) -> str: return "No prompt peak has been detected." + class HighResolutionSmoother(TransformerMixin, BaseEstimator): """ Smoothens out the high resolution data. @@ -72,6 +72,8 @@ class HighResolutionSmoother(TransformerMixin, BaseEstimator): Returns: Smoothened out spectrum. """ + if self.high_res_sigma <= 0: + return X # use a default energy axis is none is given # assume only the energy step energy = np.broadcast_to(self.energy, X.shape) @@ -449,6 +451,8 @@ class Model(TransformerMixin, BaseEstimator): Set to None to perform no selection. validation_size: Fraction (number between 0 and 1) of the data to take for validation and systematic uncertainty estimate. + n_pca_nonlinear: Number of nonlinear PCA components added at the preprocessing stage + to obtain nonlinearities as an input and improve the prediction. """ def __init__(self, @@ -465,9 +469,13 @@ class Model(TransformerMixin, BaseEstimator): if n_pca_nonlinear <= 0: x_pca_model = PCA(n_pca_lr, whiten=True) else: - x_pca_model = FeatureUnion([('linear', PCA(n_pca_lr, whiten=True)), + x_pca_model = FeatureUnion([ + ('linear', PCA(n_pca_lr, whiten=True)), ('nonlinear', Pipeline([('prep', PCA(n_pca_lr, whiten=True)), - ('kpca', KernelPCA(n_pca_nonlinear, kernel='rbf', n_jobs=-1)), + ('kpca', KernelPCA(n_pca_nonlinear, + kernel='rbf', + gamma=0.1, + n_jobs=-1)), ])), ]) self.x_model = Pipeline([ @@ -475,7 +483,8 @@ class Model(TransformerMixin, BaseEstimator): ('pca', x_pca_model), ('unc', UncertaintyHolder()), ]) - self.y_model = Pipeline([('smoothen', HighResolutionSmoother(high_res_sigma)), + self.y_model = Pipeline([ + ('smoothen', HighResolutionSmoother(high_res_sigma)), ('pca', PCA(n_pca_hr, whiten=False)), ('unc', UncertaintyHolder()), ]) -- GitLab