From 6b5af7b136b29b7e4b2ee5c974432c6ceb4828c7 Mon Sep 17 00:00:00 2001
From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de>
Date: Wed, 21 Dec 2022 14:18:49 +0100
Subject: [PATCH] Record time taken for test. Remove unnecessary variables when
 writing model to HDF5 file.

---
 pes_to_spec/model.py                 | 52 +++++++---------------------
 pes_to_spec/test/offline_analysis.py | 31 +++++++++++++----
 2 files changed, 36 insertions(+), 47 deletions(-)

diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py
index 98627fd..767c575 100644
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -128,7 +128,7 @@ class Model(TransformerMixin, BaseEstimator):
                  channels:List[str]=[f"channel_{j}_{k}"
                                      for j, k in product(range(1, 5), ["A", "B", "C", "D"])],
                  n_pca_lr: int=600,
-                 n_pca_hr: int=40,
+                 n_pca_hr: int=20,
                  high_res_sigma: float=0.2,
                  tof_start: Optional[int]=None,
                  delta_tof: Optional[int]=300,
@@ -284,7 +284,7 @@ class Model(TransformerMixin, BaseEstimator):
         Returns: Smoothened high resolution spectrum.
         """
 
-        self.high_res_photon_energy = high_res_photon_energy
+        self.high_res_photon_energy = high_res_photon_energy[0, np.newaxis, :]
 
         # if the prompt peak has not been given, guess it
         if self.tof_start is None:
@@ -403,26 +403,14 @@ class FitModel(object):
     Linear regression model with uncertainties.
     """
     def __init__(self):
-        # training dataset
-        self.X_train: Optional[np.ndarray] = None
-        self.Y_train: Optional[np.ndarray] = None
-
-        # test dataset to evaluate uncertainty
-        self.X_test: Optional[np.ndarray] = None
-        self.Y_test: Optional[np.ndarray] = None
-
-        # normalized target
-        self.Y_train_norm = None
-        self.Y_test_norm = None
-
         # model parameter sizes
         self.Nx: int = 0
         self.Ny: int = 0
 
         # fit result
-        self.A_inf: np.ndarray = None
-        self.b_inf: np.ndarray = None
-        self.u_inf: np.ndarray = None
+        self.A_inf: Optional[np.ndarray] = None
+        self.b_inf: Optional[np.ndarray] = None
+        self.u_inf: Optional[np.ndarray] = None
 
         # fit monitoring
         self.loss_train: List[float] = list()
@@ -435,17 +423,9 @@ class FitModel(object):
         Perform the fit and evaluate uncertainties with the test set.
         """
 
-        # training dataset
-        self.X_train: np.ndarray = X_train
-        self.Y_train: np.ndarray = Y_train
-
-        # test dataset to evaluate uncertainty
-        self.X_test: np.ndarray = X_test
-        self.Y_test: np.ndarray = Y_test
-
         # model parameter sizes
-        self.Nx: int = int(self.X_train.shape[1])
-        self.Ny: int = int(self.Y_train.shape[1])
+        self.Nx: int = int(X_train.shape[1])
+        self.Ny: int = int(Y_train.shape[1])
 
         # initial parameter values
         A0: np.ndarray = np.eye(self.Nx, self.Ny).reshape(self.Nx*self.Ny)
@@ -491,8 +471,8 @@ class FitModel(object):
 
             Returns: The loss value.
             """
-            l_train = loss(x, self.X_train, self.Y_train)
-            l_test = loss(x, self.X_test, self.Y_test)
+            l_train = loss(x, X_train, Y_train)
+            l_test = loss(x, X_test, Y_test)
 
             self.loss_train += [l_train]
             self.loss_test += [l_test]
@@ -507,7 +487,7 @@ class FitModel(object):
 
             Returns: The loss value.
             """
-            l_train = loss(x, self.X_train, self.Y_train)
+            l_train = loss(x, X_train, Y_train)
             return l_train
 
         grad_loss = grad(loss_train)
@@ -534,10 +514,6 @@ class FitModel(object):
         Returns: Dictionary with all relevant variables.
         """
         return dict(
-                    X_train=self.X_train,
-                    X_test=self.X_test,
-                    Y_train=self.Y_train,
-                    Y_test=self.Y_test,
                     A_inf=self.A_inf,
                     b_inf=self.b_inf,
                     u_inf=self.u_inf,
@@ -554,10 +530,6 @@ class FitModel(object):
           in_dict: The input dictionary with relevant variables.
 
         """
-        self.X_train = in_dict["X_train"]
-        self.X_test = in_dict["X_test"]
-        self.Y_train = in_dict["Y_train"]
-        self.Y_test = in_dict["Y_test"]
         self.A_inf = in_dict["A_inf"]
         self.b_inf = in_dict["b_inf"]
         self.u_inf = in_dict["u_inf"]
@@ -587,9 +559,9 @@ class FitModel(object):
         result["Y_eps"] = np.exp(X @ self.A_eps + result["Y_unc"])
 
         #self.result["res"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"]) # transform PCA space to real space
-        #self.result["res_unc"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.model["u_inf"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) 
+        #self.result["res_unc"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.model["u_inf"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] )
         #self.result["res_unc"] = np.fabs(self.result["res_unc"])
-        #self.result["res_eps"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.result["res_pca_eps"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) 
+        #self.result["res_eps"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.result["res_pca_eps"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] )
         #self.result["res_eps"] = np.fabs(self.result["res_eps"])
         #self.Yhat_pca = self.model["spec_pca_model"].inverse_transform(self.model["Y_test"])
         #self.result["res_unc_specpca"] =  np.sqrt(((self.Yhat_pca - self.model["spec_target"])**2).mean(axis=0))
diff --git a/pes_to_spec/test/offline_analysis.py b/pes_to_spec/test/offline_analysis.py
index 9ce3272..2a165ce 100755
--- a/pes_to_spec/test/offline_analysis.py
+++ b/pes_to_spec/test/offline_analysis.py
@@ -17,6 +17,9 @@ from matplotlib.gridspec import GridSpec
 
 from typing import Optional
 
+from time import time_ns
+import pandas as pd
+
 def plot_pes(filename: str, pes_raw_int: np.ndarray):
     """
     Plot low-resolution spectrum.
@@ -109,6 +112,9 @@ def main():
     #retvol_raw = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.value"].select_trains(by_id[tids]).ndarray()
     #retvol_raw_timestamp = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.timestamp"].select_trains(by_id[tids]).ndarray()
 
+    t = list()
+    t_names = list()
+
     # these have been manually selected:
     #useful_channels = ["channel_1_D",
     #                  "channel_2_B",
@@ -116,34 +122,45 @@ def main():
     #                  "channel_3_B",
     #                  "channel_4_C",
     #                  "channel_4_D"]
-    model = Model(channels=channels,
-                 n_pca_lr=600,
-                 n_pca_hr=40,
-                 high_res_sigma=0.2,
-                 tof_start=None,
-                 delta_tof=300,
-                 validation_size=0.05)
+    model = Model()
 
     train_idx = np.isin(tids, train_tids)
 
     model.debug_peak_finding(pes_raw, "test_peak_finding.png")
     print("Fitting")
+    start = time_ns()
     model.fit({k: v[train_idx, :]
                for k, v in pes_raw.items()},
               spec_raw_int[train_idx, :],
               spec_raw_pe[train_idx, :])
+    t += [time_ns() - start]
+    t_names += ["Fit"]
     spec_smooth = model.preprocess_high_res(spec_raw_int, spec_raw_pe)
 
     print("Saving the model")
+    start = time_ns()
     model.save("model.h5")
+    t += [time_ns() - start]
+    t_names += ["Save"]
 
     print("Loading the model")
+    start = time_ns()
     model = Model()
     model.load("model.h5")
+    t += [time_ns() - start]
+    t_names += ["Load"]
 
     # test
     print("Predict")
+    start = time_ns()
     spec_pred = model.predict(pes_raw)
+    t += [time_ns() - start]
+    t_names += ["Predict"]
+
+    print("Time taken in ms")
+    df_time = pd.DataFrame(data=dict(time=t, name=t_names))
+    df_time.time *= 1e-6
+    print(df_time)
 
     print("Plotting")
     # plot
-- 
GitLab