From 3b1b5aa359614d4810f162c3e9de0b502362ac99 Mon Sep 17 00:00:00 2001 From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de> Date: Fri, 13 Jan 2023 16:39:47 +0100 Subject: [PATCH] Added pre-PCA step before Nystroem to reduce the dimensionality in advance. --- pes_to_spec/model.py | 53 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index 761e086..5200c68 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -502,31 +502,32 @@ class Model(TransformerMixin, BaseEstimator): def __init__(self, channels:List[str]=[f"channel_{j}_{k}" for j, k in product(range(1, 5), ["A", "B", "C", "D"])], - n_pca_lr: int=1000, - n_pca_hr: int=40, + n_pca_lr: int=600, + n_pca_hr: int=20, high_res_sigma: float=0.2, tof_start: Optional[int]=None, delta_tof: Optional[int]=300, validation_size: float=0.05, - n_nonlinear_kernel: int=10000): + n_nonlinear_kernel: int=5000): # models - self.x_model = Pipeline([ - ('select', SelectRelevantLowResolution(channels, tof_start, delta_tof)), - ('pca', PCA(n_pca_lr, whiten=True)), - ('unc', UncertaintyHolder()), - ]) + x_model_steps = list() + x_model_steps += [('select', SelectRelevantLowResolution(channels, tof_start, delta_tof))] + if n_nonlinear_kernel > 0: + x_model_steps += [('fex', Pipeline([('prepca', PCA(n_pca_lr, whiten=True)), + ('nystroem', Nystroem(n_components=n_nonlinear_kernel, kernel='rbf', gamma=None, n_jobs=-1)), + ]))] + x_model_steps += [ + ('pca', PCA(n_pca_lr, whiten=True)), + ('unc', UncertaintyHolder()), + ] + self.x_model = Pipeline(x_model_steps) self.y_model = Pipeline([ ('smoothen', HighResolutionSmoother(high_res_sigma)), ('pca', PCA(n_pca_hr, whiten=True)), ('unc', UncertaintyHolder()), ]) - fit_steps = list() - if n_nonlinear_kernel > 0: - fit_steps += [('fex', Nystroem(n_components=n_nonlinear_kernel, kernel='rbf', gamma=None, n_jobs=-1))] - #fit_steps += [('regression', FitModel())] - fit_steps += [('regression', MultiOutputWithStd(ARDRegression(n_iter=30, verbose=True)))] - #fit_steps += [('regression', MultiOutputWithStd(LinearSVR(verbose=10, max_iter=2000, tol=1e-5)))] - self.fit_model = Pipeline(fit_steps) + #self.fit_model = FitModel() + self.fit_model = MultiOutputWithStd(ARDRegression(n_iter=30, verbose=True)) # size of the test subset self.validation_size = validation_size @@ -581,12 +582,11 @@ class Model(TransformerMixin, BaseEstimator): self.y_model['unc'].set_uncertainty(high_pca_unc) low_res = self.x_model['select'].transform(low_res_data) - low_pca = self.x_model['pca'].transform(low_res) - if isinstance(self.x_model['pca'], FeatureUnion): - n = self.x_model['pca'].transformer_list[0][1].n_components - low_pca_rec = self.x_model['pca'].transformer_list[0][1].inverse_transform(low_pca[:, :n]) - else: - low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) + pca_model = self.x_model['pca'] + if 'fex' in self.x_model.named_steps: + pca_model = self.x_model['fex'].named_steps['prepca'] + low_pca = pca_model.transform(low_res) + low_pca_rec = pca_model.inverse_transform(low_pca) low_pca_unc = np.mean(np.sqrt(np.mean((low_res - low_pca_rec)**2, axis=1, keepdims=True)), axis=0, keepdims=True) self.x_model['unc'].set_uncertainty(low_pca_unc) @@ -603,12 +603,11 @@ class Model(TransformerMixin, BaseEstimator): Returns: Ratio of root-mean-squared-error of the data reconstruction using the existing PCA model and the one from the original model. """ low_res = self.x_model['select'].transform(low_res_data) - low_pca = self.x_model['pca'].transform(low_res) - if isinstance(self.x_model['pca'], FeatureUnion): - n = self.x_model['pca'].transformer_list[0][1].n_components - low_pca_rec = self.x_model['pca'].transformer_list[0][1].inverse_transform(low_pca[:, :n]) - else: - low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) + pca_model = self.x_model['pca'] + if 'fex' in self.x_model.named_steps: + pca_model = self.x_model['fex'].named_steps['prepca'] + low_pca = pca_model.transform(low_res) + low_pca_rec = pca_model.inverse_transform(low_pca) low_pca_unc = self.x_model['unc'].uncertainty() #fig = plt.figure(figsize=(8, 16)) -- GitLab