diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index 761e0864aea029d95e48b3ecf597a4553cd15508..5200c68185667faf3fb253515780c82c7829428f 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -502,31 +502,32 @@ class Model(TransformerMixin, BaseEstimator): def __init__(self, channels:List[str]=[f"channel_{j}_{k}" for j, k in product(range(1, 5), ["A", "B", "C", "D"])], - n_pca_lr: int=1000, - n_pca_hr: int=40, + n_pca_lr: int=600, + n_pca_hr: int=20, high_res_sigma: float=0.2, tof_start: Optional[int]=None, delta_tof: Optional[int]=300, validation_size: float=0.05, - n_nonlinear_kernel: int=10000): + n_nonlinear_kernel: int=5000): # models - self.x_model = Pipeline([ - ('select', SelectRelevantLowResolution(channels, tof_start, delta_tof)), - ('pca', PCA(n_pca_lr, whiten=True)), - ('unc', UncertaintyHolder()), - ]) + x_model_steps = list() + x_model_steps += [('select', SelectRelevantLowResolution(channels, tof_start, delta_tof))] + if n_nonlinear_kernel > 0: + x_model_steps += [('fex', Pipeline([('prepca', PCA(n_pca_lr, whiten=True)), + ('nystroem', Nystroem(n_components=n_nonlinear_kernel, kernel='rbf', gamma=None, n_jobs=-1)), + ]))] + x_model_steps += [ + ('pca', PCA(n_pca_lr, whiten=True)), + ('unc', UncertaintyHolder()), + ] + self.x_model = Pipeline(x_model_steps) self.y_model = Pipeline([ ('smoothen', HighResolutionSmoother(high_res_sigma)), ('pca', PCA(n_pca_hr, whiten=True)), ('unc', UncertaintyHolder()), ]) - fit_steps = list() - if n_nonlinear_kernel > 0: - fit_steps += [('fex', Nystroem(n_components=n_nonlinear_kernel, kernel='rbf', gamma=None, n_jobs=-1))] - #fit_steps += [('regression', FitModel())] - fit_steps += [('regression', MultiOutputWithStd(ARDRegression(n_iter=30, verbose=True)))] - #fit_steps += [('regression', MultiOutputWithStd(LinearSVR(verbose=10, max_iter=2000, tol=1e-5)))] - self.fit_model = Pipeline(fit_steps) + #self.fit_model = FitModel() + self.fit_model = MultiOutputWithStd(ARDRegression(n_iter=30, verbose=True)) # size of the test subset self.validation_size = validation_size @@ -581,12 +582,11 @@ class Model(TransformerMixin, BaseEstimator): self.y_model['unc'].set_uncertainty(high_pca_unc) low_res = self.x_model['select'].transform(low_res_data) - low_pca = self.x_model['pca'].transform(low_res) - if isinstance(self.x_model['pca'], FeatureUnion): - n = self.x_model['pca'].transformer_list[0][1].n_components - low_pca_rec = self.x_model['pca'].transformer_list[0][1].inverse_transform(low_pca[:, :n]) - else: - low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) + pca_model = self.x_model['pca'] + if 'fex' in self.x_model.named_steps: + pca_model = self.x_model['fex'].named_steps['prepca'] + low_pca = pca_model.transform(low_res) + low_pca_rec = pca_model.inverse_transform(low_pca) low_pca_unc = np.mean(np.sqrt(np.mean((low_res - low_pca_rec)**2, axis=1, keepdims=True)), axis=0, keepdims=True) self.x_model['unc'].set_uncertainty(low_pca_unc) @@ -603,12 +603,11 @@ class Model(TransformerMixin, BaseEstimator): Returns: Ratio of root-mean-squared-error of the data reconstruction using the existing PCA model and the one from the original model. """ low_res = self.x_model['select'].transform(low_res_data) - low_pca = self.x_model['pca'].transform(low_res) - if isinstance(self.x_model['pca'], FeatureUnion): - n = self.x_model['pca'].transformer_list[0][1].n_components - low_pca_rec = self.x_model['pca'].transformer_list[0][1].inverse_transform(low_pca[:, :n]) - else: - low_pca_rec = self.x_model['pca'].inverse_transform(low_pca) + pca_model = self.x_model['pca'] + if 'fex' in self.x_model.named_steps: + pca_model = self.x_model['fex'].named_steps['prepca'] + low_pca = pca_model.transform(low_res) + low_pca_rec = pca_model.inverse_transform(low_pca) low_pca_unc = self.x_model['unc'].uncertainty() #fig = plt.figure(figsize=(8, 16))