Skip to content
Snippets Groups Projects
Commit f3d98448 authored by Danilo Enoque Ferreira de Lima's avatar Danilo Enoque Ferreira de Lima
Browse files

Merge branch 'kernelPCA' into 'main'

Add Kernel PCA as an alternative preprocessing step

See merge request !1
parents dde729ab eed28bdc
No related branches found
No related tags found
1 merge request!1Add Kernel PCA as an alternative preprocessing step
...@@ -8,14 +8,13 @@ from autograd import grad ...@@ -8,14 +8,13 @@ from autograd import grad
from scipy.signal import fftconvolve from scipy.signal import fftconvolve
from scipy.signal import find_peaks_cwt from scipy.signal import find_peaks_cwt
from scipy.optimize import fmin_l_bfgs_b from scipy.optimize import fmin_l_bfgs_b
from sklearn.decomposition import PCA from sklearn.decomposition import KernelPCA, PCA
from sklearn.pipeline import Pipeline from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import TransformerMixin, BaseEstimator from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.base import RegressorMixin from sklearn.base import RegressorMixin
from itertools import product from itertools import product
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
...@@ -34,6 +33,7 @@ class PromptNotFoundError(Exception): ...@@ -34,6 +33,7 @@ class PromptNotFoundError(Exception):
def __str__(self) -> str: def __str__(self) -> str:
return "No prompt peak has been detected." return "No prompt peak has been detected."
class HighResolutionSmoother(TransformerMixin, BaseEstimator): class HighResolutionSmoother(TransformerMixin, BaseEstimator):
""" """
Smoothens out the high resolution data. Smoothens out the high resolution data.
...@@ -72,6 +72,8 @@ class HighResolutionSmoother(TransformerMixin, BaseEstimator): ...@@ -72,6 +72,8 @@ class HighResolutionSmoother(TransformerMixin, BaseEstimator):
Returns: Smoothened out spectrum. Returns: Smoothened out spectrum.
""" """
if self.high_res_sigma <= 0:
return X
# use a default energy axis is none is given # use a default energy axis is none is given
# assume only the energy step # assume only the energy step
energy = np.broadcast_to(self.energy, X.shape) energy = np.broadcast_to(self.energy, X.shape)
...@@ -369,7 +371,7 @@ class FitModel(RegressorMixin, BaseEstimator): ...@@ -369,7 +371,7 @@ class FitModel(RegressorMixin, BaseEstimator):
grad_loss, grad_loss,
disp=True, disp=True,
factr=1e7, factr=1e7,
maxiter=100, maxiter=1000,
iprint=0) iprint=0)
# Inference # Inference
...@@ -449,6 +451,8 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -449,6 +451,8 @@ class Model(TransformerMixin, BaseEstimator):
Set to None to perform no selection. Set to None to perform no selection.
validation_size: Fraction (number between 0 and 1) of the data to take for validation_size: Fraction (number between 0 and 1) of the data to take for
validation and systematic uncertainty estimate. validation and systematic uncertainty estimate.
n_pca_nonlinear: Number of nonlinear PCA components added at the preprocessing stage
to obtain nonlinearities as an input and improve the prediction.
""" """
def __init__(self, def __init__(self,
...@@ -459,15 +463,29 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -459,15 +463,29 @@ class Model(TransformerMixin, BaseEstimator):
high_res_sigma: float=0.2, high_res_sigma: float=0.2,
tof_start: Optional[int]=None, tof_start: Optional[int]=None,
delta_tof: Optional[int]=300, delta_tof: Optional[int]=300,
validation_size: float=0.05): validation_size: float=0.05,
n_pca_nonlinear: int=0):
# models # models
if n_pca_nonlinear <= 0:
x_pca_model = PCA(n_pca_lr, whiten=True)
else:
x_pca_model = FeatureUnion([
('linear', PCA(n_pca_lr, whiten=True)),
('nonlinear', Pipeline([('prep', PCA(n_pca_lr, whiten=True)),
('kpca', KernelPCA(n_pca_nonlinear,
kernel='rbf',
gamma=0.1,
n_jobs=-1)),
])),
])
self.x_model = Pipeline([ self.x_model = Pipeline([
('select', SelectRelevantLowResolution(channels, tof_start, delta_tof)), ('select', SelectRelevantLowResolution(channels, tof_start, delta_tof)),
('pca', PCA(n_pca_lr, whiten=True)), ('pca', x_pca_model),
('unc', UncertaintyHolder()), ('unc', UncertaintyHolder()),
]) ])
self.y_model = Pipeline([('smoothen', HighResolutionSmoother(high_res_sigma)), self.y_model = Pipeline([
('pca', PCA(n_pca_hr, whiten=True)), ('smoothen', HighResolutionSmoother(high_res_sigma)),
('pca', PCA(n_pca_hr, whiten=False)),
('unc', UncertaintyHolder()), ('unc', UncertaintyHolder()),
]) ])
self.fit_model = FitModel() self.fit_model = FitModel()
...@@ -525,7 +543,11 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -525,7 +543,11 @@ class Model(TransformerMixin, BaseEstimator):
low_res = self.x_model['select'].transform(low_res_data) low_res = self.x_model['select'].transform(low_res_data)
low_pca = self.x_model['pca'].transform(low_res) low_pca = self.x_model['pca'].transform(low_res)
low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) if isinstance(self.x_model['pca'], FeatureUnion):
n = self.x_model['pca'].transformer_list[0][1].n_components
low_pca_rec = self.x_model['pca'].transformer_list[0][1].inverse_transform(low_pca[:, :n])
else:
low_pca_rec = self.x_model['pca'].inverse_transform(low_pca)
low_pca_unc = np.mean(np.sqrt(np.mean((low_res - low_pca_rec)**2, axis=1, keepdims=True)), axis=0, keepdims=True) low_pca_unc = np.mean(np.sqrt(np.mean((low_res - low_pca_rec)**2, axis=1, keepdims=True)), axis=0, keepdims=True)
self.x_model['unc'].set_uncertainty(low_pca_unc) self.x_model['unc'].set_uncertainty(low_pca_unc)
...@@ -543,7 +565,11 @@ class Model(TransformerMixin, BaseEstimator): ...@@ -543,7 +565,11 @@ class Model(TransformerMixin, BaseEstimator):
""" """
low_res = self.x_model['select'].transform(low_res_data) low_res = self.x_model['select'].transform(low_res_data)
low_pca = self.x_model['pca'].transform(low_res) low_pca = self.x_model['pca'].transform(low_res)
low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) if isinstance(self.x_model['pca'], FeatureUnion):
n = self.x_model['pca'].transformer_list[0][1].n_components
low_pca_rec = self.x_model['pca'].transformer_list[0][1].inverse_transform(low_pca[:, :n])
else:
low_pca_rec = self.x_model['pca'].inverse_transform(low_pca)
low_pca_unc = self.x_model['unc'].uncertainty() low_pca_unc = self.x_model['unc'].uncertainty()
#fig = plt.figure(figsize=(8, 16)) #fig = plt.figure(figsize=(8, 16))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment