From 8eb2d8be399cdb716f0a35d3e001910b280cb710 Mon Sep 17 00:00:00 2001
From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de>
Date: Mon, 19 Dec 2022 18:39:53 +0100
Subject: [PATCH] Save properties.

---
 pes_to_spec/model.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py
index af421a4..fa88929 100644
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -9,7 +9,7 @@ from scipy.optimize import fmin_l_bfgs_b
 from sklearn.decomposition import PCA, IncrementalPCA
 from sklearn.model_selection import train_test_split
 
-import logging
+from time import time_ns
 
 import matplotlib.pyplot as plt
 
@@ -84,6 +84,18 @@ class Model(object):
         # smoothing of the SPEC data in eV
         self.high_res_sigma = high_res_sigma
 
+    def parameters(self) -> Dict[str, Any]:
+        """
+        Dump parameters as a dictionary.
+        """
+        return dict(channels=self.channels,
+                    n_pca_lr=self.n_pca_lr,
+                    n_pca_hr=self.n_pca_hr,
+                    high_res_sigma=self.high_res_sigma,
+                    tof_start=self.tof_start,
+                    delta_tof=self.delta_tof,
+                    validation_size=self.validation_size)
+
     def preprocess_low_res(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
         """
         Get a dictionary with the channel names for the inut low resolution data and output
@@ -181,34 +193,23 @@ class Model(object):
 
         self.high_res_photon_energy = high_res_photon_energy
 
-        print("Find peaks.")
         # if the prompt peak has not been given, guess it
         if self.tof_start is None:
             self.tof_start = self.estimate_prompt_peak(low_res_data)
-            print("Prompt at", self.tof_start)
 
-        print("Pre-processing low")
         low_res = self.preprocess_low_res(low_res_data)
-        print("Pre-processing high")
         high_res = self.preprocess_high_res(high_res_data, high_res_photon_energy)
         # fit PCA
-        print("PCA low", low_res.shape)
         low_pca = self.lr_pca.fit_transform(low_res)
-        print("PCA high")
         high_pca = self.hr_pca.fit_transform(high_res)
-        print("Split")
         # split in train and test for PCA uncertainty evaluation
         low_pca_train, low_pca_test, high_pca_train, high_pca_test = train_test_split(low_pca, high_pca, test_size=self.validation_size, random_state=42)
         # fit the linear model
-        print("Fit")
         self.fit_model.fit(low_pca_train, high_pca_train, low_pca_test, high_pca_test)
 
-        print("PCA unc")
         high_pca_rec = self.hr_pca.inverse_transform(high_pca)
         self.high_pca_unc =  np.sqrt(np.mean((high_res - high_pca_rec)**2, axis=0, keepdims=True))
 
-        print("Done")
-
         return high_res
 
     def predict(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
@@ -228,8 +229,13 @@ class Model(object):
         high_pca = self.fit_model.predict(low_pca)
         high_res_predicted = self.hr_pca.inverse_transform(high_pca["Y"])
         n_high_res_features = high_res_predicted.shape[1]
-        high_res_unc = self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"]) - high_res_predicted
-        result = np.stack((high_res_predicted, high_res_unc, np.broadcast_to(self.high_pca_unc, (n_trains, n_high_res_features))), axis=2)
+        high_res_unc = (self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"])
+                         - high_res_predicted)
+        result = np.stack((high_res_predicted,
+                           high_res_unc,
+                           np.broadcast_to(self.high_pca_unc,
+                                           (n_trains, n_high_res_features))),
+                           axis=2)
         return result
 
     def save(self, filename: str, lr_pca_filename: str, hr_pca_filename: str):
@@ -243,6 +249,7 @@ class Model(object):
         """
         with h5py.File(filename, 'w') as hf:
             d = self.fit_model.as_dict()
+            d.update(self.parameters())
             for key, value in d.items():
                 if isinstance(value, int):
                     hf.attrs[key] = value
@@ -266,6 +273,8 @@ class Model(object):
             d = {k: hf[k][()] for k in hf.keys()}
             d.update({k: hf.attrs[k] for k in hf.attrs})
             self.fit_model.from_dict(d)
+            for key in self.parameters().keys():
+                setattr(self, key, d[key])
         self.lr_pca = joblib.load(lr_pca_filename)
         self.hr_pca = joblib.load(hr_pca_filename)
 
-- 
GitLab