Skip to content
Snippets Groups Projects
Commit 08453c3c authored by Danilo Ferreira de Lima's avatar Danilo Ferreira de Lima
Browse files

Clean up.

parent e92c84ca
No related branches found
No related tags found
No related merge requests found
......@@ -9,6 +9,7 @@ from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.model_selection import train_test_split
from sklearn.base import TransformerMixin, BaseEstimator
from itertools import product
from time import time_ns
import matplotlib.pyplot as plt
......@@ -142,7 +143,7 @@ class Model(TransformerMixin, BaseEstimator):
self.hr_pca = PCA(n_pca_hr, whiten=True)
# PCA unc. in high resolution
self.high_pca_unc: Optional[np.ndarray] = None
self.high_pca_unc: np.ndarray = np.zeros((1, 0), dtype=float)
# fit model
self.fit_model = FitModel()
......@@ -296,16 +297,22 @@ class Model(TransformerMixin, BaseEstimator):
low_pca = self.lr_pca.fit_transform(low_res)
high_pca = self.hr_pca.fit_transform(high_res)
# split in train and test for PCA uncertainty evaluation
low_pca_train, low_pca_test, high_pca_train, high_pca_test = train_test_split(low_pca, high_pca, test_size=self.validation_size, random_state=42)
(low_pca_train, low_pca_test,
high_pca_train, high_pca_test) = train_test_split(low_pca, high_pca,
test_size=self.validation_size,
random_state=42)
# fit the linear model
self.fit_model.fit(low_pca_train, high_pca_train, low_pca_test, high_pca_test)
self.fit_model.fit(low_pca_train,
high_pca_train,
low_pca_test,
high_pca_test)
high_pca_rec = self.hr_pca.inverse_transform(high_pca)
self.high_pca_unc = np.sqrt(np.mean((high_res - high_pca_rec)**2, axis=0, keepdims=True))
return high_res
def predict(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
def predict(self, low_res_data: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
"""
Predict a high-resolution spectrum from a low resolution given one.
The output includes the uncertainty in its second and third entries of the first dimension.
......@@ -313,25 +320,22 @@ class Model(TransformerMixin, BaseEstimator):
Args:
low_res_data: Low resolution data as in the fit step with shape (train_id, channel, ToF channel).
Returns: High resolution data with shape (train_id, ToF channel, 3).
The component 0 of the last dimension is the predicted spectrum.
Components 1 and 2 correspond to two sources of uncertainty.
Returns: High resolution data with shape (train_id, energy channel) in a dictionary containing
the expected prediction in key "expected", the stat. uncertainty in key "unc" and
a (1, energy channel) array for the PCA syst. uncertainty in key "pca".
"""
low_res = self.preprocess_low_res(low_res_data)
low_pca = self.lr_pca.transform(low_res)
n_trains = low_res.shape[0]
# Get high res.
high_pca = self.fit_model.predict(low_pca)
high_res_predicted = self.hr_pca.inverse_transform(high_pca["Y"])
n_high_res_features = high_res_predicted.shape[1]
high_res_unc = (self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"])
- high_res_predicted)
result = np.stack((high_res_predicted,
high_res_unc,
np.broadcast_to(self.high_pca_unc,
(n_trains, n_high_res_features))),
axis=2)
return result
n_trains = low_pca.shape[0]
pca_y = np.concatenate((high_pca["Y"], high_pca["Y"] + high_pca["Y_eps"]), axis=0)
high_res_predicted = self.hr_pca.inverse_transform(pca_y)
expected = high_res_predicted[:n_trains, :]
unc = high_res_predicted[n_trains:, :] - expected
return dict(expected=expected,
unc=unc,
pca=self.high_pca_unc)
def save(self, filename: str):
"""
......@@ -557,14 +561,5 @@ class FitModel(object):
result["Y_unc"] = self.u_inf[0,:]
# input-dependent uncertainty
result["Y_eps"] = np.exp(X @ self.A_eps + result["Y_unc"])
#self.result["res"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"]) # transform PCA space to real space
#self.result["res_unc"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.model["u_inf"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] )
#self.result["res_unc"] = np.fabs(self.result["res_unc"])
#self.result["res_eps"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.result["res_pca_eps"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] )
#self.result["res_eps"] = np.fabs(self.result["res_eps"])
#self.Yhat_pca = self.model["spec_pca_model"].inverse_transform(self.model["Y_test"])
#self.result["res_unc_specpca"] = np.sqrt(((self.Yhat_pca - self.model["spec_target"])**2).mean(axis=0))
#self.result["res_unc_total"] = np.sqrt(self.result["res_eps"]**2 + self.result["res_unc_specpca"]**2)
return result
......@@ -15,7 +15,7 @@ matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from typing import Optional
from typing import Dict, Optional
from time import time_ns
import pandas as pd
......@@ -40,13 +40,13 @@ def plot_pes(filename: str, pes_raw_int: np.ndarray):
fig.savefig(filename)
plt.close(fig)
def plot_result(filename: str, spec_pred: np.ndarray, spec_smooth: np.ndarray, spec_raw_pe: np.ndarray, spec_raw_int: Optional[np.ndarray]=None):
def plot_result(filename: str, spec_pred: Dict[str, np.ndarray], spec_smooth: np.ndarray, spec_raw_pe: np.ndarray, spec_raw_int: Optional[np.ndarray]=None):
"""
Plot result with uncertainty band.
Args:
filename: Output file name.
spec_pred: Predicted result with uncertainty bands in a shape of (3, features).
spec_pred: Predicted result with uncertainty bands in a dictionary.
spec_smooth: Smoothened expected result with shape (features,).
spec_raw_pe: x axis with the photon energy in eV.
spec_raw_int: Original true expected result with shape (features,).
......@@ -55,15 +55,16 @@ def plot_result(filename: str, spec_pred: np.ndarray, spec_smooth: np.ndarray, s
fig = plt.figure(figsize=(16, 8))
gs = GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
eps = np.mean(spec_pred[:, 1])
unc_stat = np.mean(spec_pred["unc"])
unc_pca = np.mean(spec_pred["pca"])
ax.plot(spec_raw_pe, spec_smooth, c='b', lw=3, label="High-resolution measurement (smoothened)")
ax.plot(spec_raw_pe, spec_pred[:, 0], c='r', lw=3, label="High-resolution prediction")
ax.fill_between(spec_raw_pe, spec_pred[:, 0] - spec_pred[:, 1], spec_pred[:, 0] + spec_pred[:, 1], facecolor='red', alpha=0.6, label="68% unc. (stat.)")
ax.fill_between(spec_raw_pe, spec_pred[:, 0] - spec_pred[:, 2], spec_pred[:, 0] + spec_pred[:, 2], facecolor='magenta', alpha=0.6, label="68% unc. (syst., PCA)")
ax.plot(spec_raw_pe, spec_pred["expected"], c='r', lw=3, label="High-resolution prediction")
ax.fill_between(spec_raw_pe, spec_pred["expected"] - spec_pred["unc"], spec_pred["expected"] + spec_pred["unc"], facecolor='red', alpha=0.6, label="68% unc. (stat.)")
ax.fill_between(spec_raw_pe, spec_pred["expected"] - spec_pred["pca"], spec_pred["expected"] + spec_pred["pca"], facecolor='magenta', alpha=0.6, label="68% unc. (syst., PCA)")
if spec_raw_int is not None:
ax.plot(spec_raw_pe, spec_raw_int, c='b', lw=1, ls='--', label="High-resolution measurement")
ax.legend()
ax.set(title=f"avg(unc) = {eps}",
ax.set(title=f"avg(stat unc) = {unc_stat}, avg(pca unc) = {unc_pca}",
xlabel="Photon energy [eV]",
ylabel="Intensity")
fig.savefig(filename)
......@@ -167,7 +168,9 @@ def main():
for tid in test_tids:
idx = np.where(tid==tids)[0][0]
plot_result(f"test_{tid}.png",
spec_pred[idx, :, :],
{k: item[idx, ...] if k != "pca"
else item[0, ...]
for k, item in spec_pred.items()},
spec_smooth[idx, :],
spec_raw_pe[idx, :],
spec_raw_int[idx, :])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment