Save properties.

8eb2d8be · Danilo Ferreira de Lima · 4ca967d5 · 8eb2d8be
Commit 8eb2d8be authored 2 years ago by Danilo Ferreira de Lima
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -9,7 +9,7 @@ from scipy.optimize import fmin_l_bfgs_b
 from sklearn.decomposition import PCA, IncrementalPCA
 from sklearn.model_selection import train_test_split
-import logging
+from time import time_ns
 import matplotlib.pyplot as plt
@@ -84,6 +84,18 @@ class Model(object):
        # smoothing of the SPEC data in eV
        self.high_res_sigma = high_res_sigma
+    def parameters(self) -> Dict[str, Any]:
+        """
+        Dump parameters as a dictionary.
+        """
+        return dict(channels=self.channels,
+                    n_pca_lr=self.n_pca_lr,
+                    n_pca_hr=self.n_pca_hr,
+                    high_res_sigma=self.high_res_sigma,
+                    tof_start=self.tof_start,
+                    delta_tof=self.delta_tof,
+                    validation_size=self.validation_size)
    def preprocess_low_res(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
        """
        Get a dictionary with the channel names for the inut low resolution data and output
@@ -181,34 +193,23 @@ class Model(object):
        self.high_res_photon_energy = high_res_photon_energy
-        print("Find peaks.")
        # if the prompt peak has not been given, guess it
        if self.tof_start is None:
            self.tof_start = self.estimate_prompt_peak(low_res_data)
-            print("Prompt at", self.tof_start)
-        print("Pre-processing low")
        low_res = self.preprocess_low_res(low_res_data)
-        print("Pre-processing high")
        high_res = self.preprocess_high_res(high_res_data, high_res_photon_energy)
        # fit PCA
-        print("PCA low", low_res.shape)
        low_pca = self.lr_pca.fit_transform(low_res)
-        print("PCA high")
        high_pca = self.hr_pca.fit_transform(high_res)
-        print("Split")
        # split in train and test for PCA uncertainty evaluation
        low_pca_train, low_pca_test, high_pca_train, high_pca_test = train_test_split(low_pca, high_pca, test_size=self.validation_size, random_state=42)
        # fit the linear model
-        print("Fit")
        self.fit_model.fit(low_pca_train, high_pca_train, low_pca_test, high_pca_test)
-        print("PCA unc")
        high_pca_rec = self.hr_pca.inverse_transform(high_pca)
        self.high_pca_unc =  np.sqrt(np.mean((high_res - high_pca_rec)**2, axis=0, keepdims=True))
-        print("Done")
        return high_res
    def predict(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
@@ -228,8 +229,13 @@ class Model(object):
        high_pca = self.fit_model.predict(low_pca)
        high_res_predicted = self.hr_pca.inverse_transform(high_pca["Y"])
        n_high_res_features = high_res_predicted.shape[1]
-        high_res_unc = self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"]) - high_res_predicted
+        high_res_unc = (self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"])
-        result = np.stack((high_res_predicted, high_res_unc, np.broadcast_to(self.high_pca_unc, (n_trains, n_high_res_features))), axis=2)
+                         - high_res_predicted)
+        result = np.stack((high_res_predicted,
+                           high_res_unc,
+                           np.broadcast_to(self.high_pca_unc,
+                                           (n_trains, n_high_res_features))),
+                           axis=2)
        return result
    def save(self, filename: str, lr_pca_filename: str, hr_pca_filename: str):
@@ -243,6 +249,7 @@ class Model(object):
        """
        with h5py.File(filename, 'w') as hf:
            d = self.fit_model.as_dict()
+            d.update(self.parameters())
            for key, value in d.items():
                if isinstance(value, int):
                    hf.attrs[key] = value
@@ -266,6 +273,8 @@ class Model(object):
            d = {k: hf[k][()] for k in hf.keys()}
            d.update({k: hf.attrs[k] for k in hf.attrs})
            self.fit_model.from_dict(d)
+            for key in self.parameters().keys():
+                setattr(self, key, d[key])
        self.lr_pca = joblib.load(lr_pca_filename)
        self.hr_pca = joblib.load(hr_pca_filename)