diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index 31c891d7c80442e2a5e2fd1b946679b1a7dfadb5..7e49e89e508d2e3c28af5d0ee70ab0ed81d771d2 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -8,14 +8,13 @@ from autograd import grad from scipy.signal import fftconvolve from scipy.signal import find_peaks_cwt from scipy.optimize import fmin_l_bfgs_b -from sklearn.decomposition import PCA -from sklearn.pipeline import Pipeline +from sklearn.decomposition import KernelPCA, PCA +from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.base import TransformerMixin, BaseEstimator from sklearn.base import RegressorMixin from itertools import product from sklearn.model_selection import train_test_split - import matplotlib.pyplot as plt from typing import Any, Dict, List, Optional @@ -34,6 +33,7 @@ class PromptNotFoundError(Exception): def __str__(self) -> str: return "No prompt peak has been detected." + class HighResolutionSmoother(TransformerMixin, BaseEstimator): """ Smoothens out the high resolution data. @@ -72,6 +72,8 @@ class HighResolutionSmoother(TransformerMixin, BaseEstimator): Returns: Smoothened out spectrum. """ + if self.high_res_sigma <= 0: + return X # use a default energy axis is none is given # assume only the energy step energy = np.broadcast_to(self.energy, X.shape) @@ -369,7 +371,7 @@ class FitModel(RegressorMixin, BaseEstimator): grad_loss, disp=True, factr=1e7, - maxiter=100, + maxiter=1000, iprint=0) # Inference @@ -449,6 +451,8 @@ class Model(TransformerMixin, BaseEstimator): Set to None to perform no selection. validation_size: Fraction (number between 0 and 1) of the data to take for validation and systematic uncertainty estimate. + n_pca_nonlinear: Number of nonlinear PCA components added at the preprocessing stage + to obtain nonlinearities as an input and improve the prediction. """ def __init__(self, @@ -459,15 +463,29 @@ class Model(TransformerMixin, BaseEstimator): high_res_sigma: float=0.2, tof_start: Optional[int]=None, delta_tof: Optional[int]=300, - validation_size: float=0.05): + validation_size: float=0.05, + n_pca_nonlinear: int=0): # models + if n_pca_nonlinear <= 0: + x_pca_model = PCA(n_pca_lr, whiten=True) + else: + x_pca_model = FeatureUnion([ + ('linear', PCA(n_pca_lr, whiten=True)), + ('nonlinear', Pipeline([('prep', PCA(n_pca_lr, whiten=True)), + ('kpca', KernelPCA(n_pca_nonlinear, + kernel='rbf', + gamma=0.1, + n_jobs=-1)), + ])), + ]) self.x_model = Pipeline([ ('select', SelectRelevantLowResolution(channels, tof_start, delta_tof)), - ('pca', PCA(n_pca_lr, whiten=True)), + ('pca', x_pca_model), ('unc', UncertaintyHolder()), ]) - self.y_model = Pipeline([('smoothen', HighResolutionSmoother(high_res_sigma)), - ('pca', PCA(n_pca_hr, whiten=True)), + self.y_model = Pipeline([ + ('smoothen', HighResolutionSmoother(high_res_sigma)), + ('pca', PCA(n_pca_hr, whiten=False)), ('unc', UncertaintyHolder()), ]) self.fit_model = FitModel() @@ -525,7 +543,11 @@ class Model(TransformerMixin, BaseEstimator): low_res = self.x_model['select'].transform(low_res_data) low_pca = self.x_model['pca'].transform(low_res) - low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) + if isinstance(self.x_model['pca'], FeatureUnion): + n = self.x_model['pca'].transformer_list[0][1].n_components + low_pca_rec = self.x_model['pca'].transformer_list[0][1].inverse_transform(low_pca[:, :n]) + else: + low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) low_pca_unc = np.mean(np.sqrt(np.mean((low_res - low_pca_rec)**2, axis=1, keepdims=True)), axis=0, keepdims=True) self.x_model['unc'].set_uncertainty(low_pca_unc) @@ -543,7 +565,11 @@ class Model(TransformerMixin, BaseEstimator): """ low_res = self.x_model['select'].transform(low_res_data) low_pca = self.x_model['pca'].transform(low_res) - low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) + if isinstance(self.x_model['pca'], FeatureUnion): + n = self.x_model['pca'].transformer_list[0][1].n_components + low_pca_rec = self.x_model['pca'].transformer_list[0][1].inverse_transform(low_pca[:, :n]) + else: + low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) low_pca_unc = self.x_model['unc'].uncertainty() #fig = plt.figure(figsize=(8, 16))