From 6b5af7b136b29b7e4b2ee5c974432c6ceb4828c7 Mon Sep 17 00:00:00 2001 From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de> Date: Wed, 21 Dec 2022 14:18:49 +0100 Subject: [PATCH] Record time taken for test. Remove unnecessary variables when writing model to HDF5 file. --- pes_to_spec/model.py | 52 +++++++--------------------- pes_to_spec/test/offline_analysis.py | 31 +++++++++++++---- 2 files changed, 36 insertions(+), 47 deletions(-) diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index 98627fd..767c575 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -128,7 +128,7 @@ class Model(TransformerMixin, BaseEstimator): channels:List[str]=[f"channel_{j}_{k}" for j, k in product(range(1, 5), ["A", "B", "C", "D"])], n_pca_lr: int=600, - n_pca_hr: int=40, + n_pca_hr: int=20, high_res_sigma: float=0.2, tof_start: Optional[int]=None, delta_tof: Optional[int]=300, @@ -284,7 +284,7 @@ class Model(TransformerMixin, BaseEstimator): Returns: Smoothened high resolution spectrum. """ - self.high_res_photon_energy = high_res_photon_energy + self.high_res_photon_energy = high_res_photon_energy[0, np.newaxis, :] # if the prompt peak has not been given, guess it if self.tof_start is None: @@ -403,26 +403,14 @@ class FitModel(object): Linear regression model with uncertainties. """ def __init__(self): - # training dataset - self.X_train: Optional[np.ndarray] = None - self.Y_train: Optional[np.ndarray] = None - - # test dataset to evaluate uncertainty - self.X_test: Optional[np.ndarray] = None - self.Y_test: Optional[np.ndarray] = None - - # normalized target - self.Y_train_norm = None - self.Y_test_norm = None - # model parameter sizes self.Nx: int = 0 self.Ny: int = 0 # fit result - self.A_inf: np.ndarray = None - self.b_inf: np.ndarray = None - self.u_inf: np.ndarray = None + self.A_inf: Optional[np.ndarray] = None + self.b_inf: Optional[np.ndarray] = None + self.u_inf: Optional[np.ndarray] = None # fit monitoring self.loss_train: List[float] = list() @@ -435,17 +423,9 @@ class FitModel(object): Perform the fit and evaluate uncertainties with the test set. """ - # training dataset - self.X_train: np.ndarray = X_train - self.Y_train: np.ndarray = Y_train - - # test dataset to evaluate uncertainty - self.X_test: np.ndarray = X_test - self.Y_test: np.ndarray = Y_test - # model parameter sizes - self.Nx: int = int(self.X_train.shape[1]) - self.Ny: int = int(self.Y_train.shape[1]) + self.Nx: int = int(X_train.shape[1]) + self.Ny: int = int(Y_train.shape[1]) # initial parameter values A0: np.ndarray = np.eye(self.Nx, self.Ny).reshape(self.Nx*self.Ny) @@ -491,8 +471,8 @@ class FitModel(object): Returns: The loss value. """ - l_train = loss(x, self.X_train, self.Y_train) - l_test = loss(x, self.X_test, self.Y_test) + l_train = loss(x, X_train, Y_train) + l_test = loss(x, X_test, Y_test) self.loss_train += [l_train] self.loss_test += [l_test] @@ -507,7 +487,7 @@ class FitModel(object): Returns: The loss value. """ - l_train = loss(x, self.X_train, self.Y_train) + l_train = loss(x, X_train, Y_train) return l_train grad_loss = grad(loss_train) @@ -534,10 +514,6 @@ class FitModel(object): Returns: Dictionary with all relevant variables. """ return dict( - X_train=self.X_train, - X_test=self.X_test, - Y_train=self.Y_train, - Y_test=self.Y_test, A_inf=self.A_inf, b_inf=self.b_inf, u_inf=self.u_inf, @@ -554,10 +530,6 @@ class FitModel(object): in_dict: The input dictionary with relevant variables. """ - self.X_train = in_dict["X_train"] - self.X_test = in_dict["X_test"] - self.Y_train = in_dict["Y_train"] - self.Y_test = in_dict["Y_test"] self.A_inf = in_dict["A_inf"] self.b_inf = in_dict["b_inf"] self.u_inf = in_dict["u_inf"] @@ -587,9 +559,9 @@ class FitModel(object): result["Y_eps"] = np.exp(X @ self.A_eps + result["Y_unc"]) #self.result["res"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"]) # transform PCA space to real space - #self.result["res_unc"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.model["u_inf"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) + #self.result["res_unc"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.model["u_inf"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) #self.result["res_unc"] = np.fabs(self.result["res_unc"]) - #self.result["res_eps"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.result["res_pca_eps"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) + #self.result["res_eps"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.result["res_pca_eps"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] ) #self.result["res_eps"] = np.fabs(self.result["res_eps"]) #self.Yhat_pca = self.model["spec_pca_model"].inverse_transform(self.model["Y_test"]) #self.result["res_unc_specpca"] = np.sqrt(((self.Yhat_pca - self.model["spec_target"])**2).mean(axis=0)) diff --git a/pes_to_spec/test/offline_analysis.py b/pes_to_spec/test/offline_analysis.py index 9ce3272..2a165ce 100755 --- a/pes_to_spec/test/offline_analysis.py +++ b/pes_to_spec/test/offline_analysis.py @@ -17,6 +17,9 @@ from matplotlib.gridspec import GridSpec from typing import Optional +from time import time_ns +import pandas as pd + def plot_pes(filename: str, pes_raw_int: np.ndarray): """ Plot low-resolution spectrum. @@ -109,6 +112,9 @@ def main(): #retvol_raw = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.value"].select_trains(by_id[tids]).ndarray() #retvol_raw_timestamp = run["SA3_XTD10_PES/MDL/DAQ_MPOD", "u212.timestamp"].select_trains(by_id[tids]).ndarray() + t = list() + t_names = list() + # these have been manually selected: #useful_channels = ["channel_1_D", # "channel_2_B", @@ -116,34 +122,45 @@ def main(): # "channel_3_B", # "channel_4_C", # "channel_4_D"] - model = Model(channels=channels, - n_pca_lr=600, - n_pca_hr=40, - high_res_sigma=0.2, - tof_start=None, - delta_tof=300, - validation_size=0.05) + model = Model() train_idx = np.isin(tids, train_tids) model.debug_peak_finding(pes_raw, "test_peak_finding.png") print("Fitting") + start = time_ns() model.fit({k: v[train_idx, :] for k, v in pes_raw.items()}, spec_raw_int[train_idx, :], spec_raw_pe[train_idx, :]) + t += [time_ns() - start] + t_names += ["Fit"] spec_smooth = model.preprocess_high_res(spec_raw_int, spec_raw_pe) print("Saving the model") + start = time_ns() model.save("model.h5") + t += [time_ns() - start] + t_names += ["Save"] print("Loading the model") + start = time_ns() model = Model() model.load("model.h5") + t += [time_ns() - start] + t_names += ["Load"] # test print("Predict") + start = time_ns() spec_pred = model.predict(pes_raw) + t += [time_ns() - start] + t_names += ["Predict"] + + print("Time taken in ms") + df_time = pd.DataFrame(data=dict(time=t, name=t_names)) + df_time.time *= 1e-6 + print(df_time) print("Plotting") # plot -- GitLab