Skip to content
Snippets Groups Projects
Commit 8eb2d8be authored by Danilo Ferreira de Lima's avatar Danilo Ferreira de Lima
Browse files

Save properties.

parent 4ca967d5
No related branches found
No related tags found
No related merge requests found
...@@ -9,7 +9,7 @@ from scipy.optimize import fmin_l_bfgs_b ...@@ -9,7 +9,7 @@ from scipy.optimize import fmin_l_bfgs_b
from sklearn.decomposition import PCA, IncrementalPCA from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import logging from time import time_ns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -84,6 +84,18 @@ class Model(object): ...@@ -84,6 +84,18 @@ class Model(object):
# smoothing of the SPEC data in eV # smoothing of the SPEC data in eV
self.high_res_sigma = high_res_sigma self.high_res_sigma = high_res_sigma
def parameters(self) -> Dict[str, Any]:
"""
Dump parameters as a dictionary.
"""
return dict(channels=self.channels,
n_pca_lr=self.n_pca_lr,
n_pca_hr=self.n_pca_hr,
high_res_sigma=self.high_res_sigma,
tof_start=self.tof_start,
delta_tof=self.delta_tof,
validation_size=self.validation_size)
def preprocess_low_res(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray: def preprocess_low_res(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
""" """
Get a dictionary with the channel names for the inut low resolution data and output Get a dictionary with the channel names for the inut low resolution data and output
...@@ -181,34 +193,23 @@ class Model(object): ...@@ -181,34 +193,23 @@ class Model(object):
self.high_res_photon_energy = high_res_photon_energy self.high_res_photon_energy = high_res_photon_energy
print("Find peaks.")
# if the prompt peak has not been given, guess it # if the prompt peak has not been given, guess it
if self.tof_start is None: if self.tof_start is None:
self.tof_start = self.estimate_prompt_peak(low_res_data) self.tof_start = self.estimate_prompt_peak(low_res_data)
print("Prompt at", self.tof_start)
print("Pre-processing low")
low_res = self.preprocess_low_res(low_res_data) low_res = self.preprocess_low_res(low_res_data)
print("Pre-processing high")
high_res = self.preprocess_high_res(high_res_data, high_res_photon_energy) high_res = self.preprocess_high_res(high_res_data, high_res_photon_energy)
# fit PCA # fit PCA
print("PCA low", low_res.shape)
low_pca = self.lr_pca.fit_transform(low_res) low_pca = self.lr_pca.fit_transform(low_res)
print("PCA high")
high_pca = self.hr_pca.fit_transform(high_res) high_pca = self.hr_pca.fit_transform(high_res)
print("Split")
# split in train and test for PCA uncertainty evaluation # split in train and test for PCA uncertainty evaluation
low_pca_train, low_pca_test, high_pca_train, high_pca_test = train_test_split(low_pca, high_pca, test_size=self.validation_size, random_state=42) low_pca_train, low_pca_test, high_pca_train, high_pca_test = train_test_split(low_pca, high_pca, test_size=self.validation_size, random_state=42)
# fit the linear model # fit the linear model
print("Fit")
self.fit_model.fit(low_pca_train, high_pca_train, low_pca_test, high_pca_test) self.fit_model.fit(low_pca_train, high_pca_train, low_pca_test, high_pca_test)
print("PCA unc")
high_pca_rec = self.hr_pca.inverse_transform(high_pca) high_pca_rec = self.hr_pca.inverse_transform(high_pca)
self.high_pca_unc = np.sqrt(np.mean((high_res - high_pca_rec)**2, axis=0, keepdims=True)) self.high_pca_unc = np.sqrt(np.mean((high_res - high_pca_rec)**2, axis=0, keepdims=True))
print("Done")
return high_res return high_res
def predict(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray: def predict(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
...@@ -228,8 +229,13 @@ class Model(object): ...@@ -228,8 +229,13 @@ class Model(object):
high_pca = self.fit_model.predict(low_pca) high_pca = self.fit_model.predict(low_pca)
high_res_predicted = self.hr_pca.inverse_transform(high_pca["Y"]) high_res_predicted = self.hr_pca.inverse_transform(high_pca["Y"])
n_high_res_features = high_res_predicted.shape[1] n_high_res_features = high_res_predicted.shape[1]
high_res_unc = self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"]) - high_res_predicted high_res_unc = (self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"])
result = np.stack((high_res_predicted, high_res_unc, np.broadcast_to(self.high_pca_unc, (n_trains, n_high_res_features))), axis=2) - high_res_predicted)
result = np.stack((high_res_predicted,
high_res_unc,
np.broadcast_to(self.high_pca_unc,
(n_trains, n_high_res_features))),
axis=2)
return result return result
def save(self, filename: str, lr_pca_filename: str, hr_pca_filename: str): def save(self, filename: str, lr_pca_filename: str, hr_pca_filename: str):
...@@ -243,6 +249,7 @@ class Model(object): ...@@ -243,6 +249,7 @@ class Model(object):
""" """
with h5py.File(filename, 'w') as hf: with h5py.File(filename, 'w') as hf:
d = self.fit_model.as_dict() d = self.fit_model.as_dict()
d.update(self.parameters())
for key, value in d.items(): for key, value in d.items():
if isinstance(value, int): if isinstance(value, int):
hf.attrs[key] = value hf.attrs[key] = value
...@@ -266,6 +273,8 @@ class Model(object): ...@@ -266,6 +273,8 @@ class Model(object):
d = {k: hf[k][()] for k in hf.keys()} d = {k: hf[k][()] for k in hf.keys()}
d.update({k: hf.attrs[k] for k in hf.attrs}) d.update({k: hf.attrs[k] for k in hf.attrs})
self.fit_model.from_dict(d) self.fit_model.from_dict(d)
for key in self.parameters().keys():
setattr(self, key, d[key])
self.lr_pca = joblib.load(lr_pca_filename) self.lr_pca = joblib.load(lr_pca_filename)
self.hr_pca = joblib.load(hr_pca_filename) self.hr_pca = joblib.load(hr_pca_filename)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment