From 8eb2d8be399cdb716f0a35d3e001910b280cb710 Mon Sep 17 00:00:00 2001 From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de> Date: Mon, 19 Dec 2022 18:39:53 +0100 Subject: [PATCH] Save properties. --- pes_to_spec/model.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index af421a4..fa88929 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -9,7 +9,7 @@ from scipy.optimize import fmin_l_bfgs_b from sklearn.decomposition import PCA, IncrementalPCA from sklearn.model_selection import train_test_split -import logging +from time import time_ns import matplotlib.pyplot as plt @@ -84,6 +84,18 @@ class Model(object): # smoothing of the SPEC data in eV self.high_res_sigma = high_res_sigma + def parameters(self) -> Dict[str, Any]: + """ + Dump parameters as a dictionary. + """ + return dict(channels=self.channels, + n_pca_lr=self.n_pca_lr, + n_pca_hr=self.n_pca_hr, + high_res_sigma=self.high_res_sigma, + tof_start=self.tof_start, + delta_tof=self.delta_tof, + validation_size=self.validation_size) + def preprocess_low_res(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray: """ Get a dictionary with the channel names for the inut low resolution data and output @@ -181,34 +193,23 @@ class Model(object): self.high_res_photon_energy = high_res_photon_energy - print("Find peaks.") # if the prompt peak has not been given, guess it if self.tof_start is None: self.tof_start = self.estimate_prompt_peak(low_res_data) - print("Prompt at", self.tof_start) - print("Pre-processing low") low_res = self.preprocess_low_res(low_res_data) - print("Pre-processing high") high_res = self.preprocess_high_res(high_res_data, high_res_photon_energy) # fit PCA - print("PCA low", low_res.shape) low_pca = self.lr_pca.fit_transform(low_res) - print("PCA high") high_pca = self.hr_pca.fit_transform(high_res) - print("Split") # split in train and test for PCA uncertainty evaluation low_pca_train, low_pca_test, high_pca_train, high_pca_test = train_test_split(low_pca, high_pca, test_size=self.validation_size, random_state=42) # fit the linear model - print("Fit") self.fit_model.fit(low_pca_train, high_pca_train, low_pca_test, high_pca_test) - print("PCA unc") high_pca_rec = self.hr_pca.inverse_transform(high_pca) self.high_pca_unc = np.sqrt(np.mean((high_res - high_pca_rec)**2, axis=0, keepdims=True)) - print("Done") - return high_res def predict(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray: @@ -228,8 +229,13 @@ class Model(object): high_pca = self.fit_model.predict(low_pca) high_res_predicted = self.hr_pca.inverse_transform(high_pca["Y"]) n_high_res_features = high_res_predicted.shape[1] - high_res_unc = self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"]) - high_res_predicted - result = np.stack((high_res_predicted, high_res_unc, np.broadcast_to(self.high_pca_unc, (n_trains, n_high_res_features))), axis=2) + high_res_unc = (self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"]) + - high_res_predicted) + result = np.stack((high_res_predicted, + high_res_unc, + np.broadcast_to(self.high_pca_unc, + (n_trains, n_high_res_features))), + axis=2) return result def save(self, filename: str, lr_pca_filename: str, hr_pca_filename: str): @@ -243,6 +249,7 @@ class Model(object): """ with h5py.File(filename, 'w') as hf: d = self.fit_model.as_dict() + d.update(self.parameters()) for key, value in d.items(): if isinstance(value, int): hf.attrs[key] = value @@ -266,6 +273,8 @@ class Model(object): d = {k: hf[k][()] for k in hf.keys()} d.update({k: hf.attrs[k] for k in hf.attrs}) self.fit_model.from_dict(d) + for key in self.parameters().keys(): + setattr(self, key, d[key]) self.lr_pca = joblib.load(lr_pca_filename) self.hr_pca = joblib.load(hr_pca_filename) -- GitLab