Clean up.

08453c3c · Danilo Ferreira de Lima · e92c84ca · 08453c3c · 08453c3c
Commit 08453c3c authored 2 years ago by Danilo Ferreira de Lima
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -9,6 +9,7 @@ from sklearn.decomposition import PCA, IncrementalPCA
 from sklearn.model_selection import train_test_split
 from sklearn.base import TransformerMixin, BaseEstimator
 from itertools import product
+from time import time_ns

 import matplotlib.pyplot as plt

@@ -142,7 +143,7 @@ class Model(TransformerMixin, BaseEstimator):
        self.hr_pca = PCA(n_pca_hr, whiten=True)

        # PCA unc. in high resolution
-        self.high_pca_unc: Optional[np.ndarray] = None
+        self.high_pca_unc: np.ndarray = np.zeros((1, 0), dtype=float)

        # fit model
        self.fit_model = FitModel()
@@ -296,16 +297,22 @@ class Model(TransformerMixin, BaseEstimator):
        low_pca = self.lr_pca.fit_transform(low_res)
        high_pca = self.hr_pca.fit_transform(high_res)
        # split in train and test for PCA uncertainty evaluation
-        low_pca_train, low_pca_test, high_pca_train, high_pca_test = train_test_split(low_pca, high_pca, test_size=self.validation_size, random_state=42)
+        (low_pca_train, low_pca_test,
+         high_pca_train, high_pca_test) = train_test_split(low_pca, high_pca,
+                                                           test_size=self.validation_size,
+                                                           random_state=42)
        # fit the linear model
-        self.fit_model.fit(low_pca_train, high_pca_train, low_pca_test, high_pca_test)
+        self.fit_model.fit(low_pca_train,
+                           high_pca_train,
+                           low_pca_test,
+                           high_pca_test)

        high_pca_rec = self.hr_pca.inverse_transform(high_pca)
        self.high_pca_unc =  np.sqrt(np.mean((high_res - high_pca_rec)**2, axis=0, keepdims=True))

        return high_res

-    def predict(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
+    def predict(self, low_res_data: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
        """
        Predict a high-resolution spectrum from a low resolution given one.
        The output includes the uncertainty in its second and third entries of the first dimension.
@@ -313,25 +320,22 @@ class Model(TransformerMixin, BaseEstimator):
        Args:
          low_res_data: Low resolution data as in the fit step with shape (train_id, channel, ToF channel).

-        Returns: High resolution data with shape (train_id, ToF channel, 3).
-                 The component 0 of the last dimension is the predicted spectrum.
-                 Components 1 and 2 correspond to two sources of uncertainty.
+        Returns: High resolution data with shape (train_id, energy channel) in a dictionary containing
+                 the expected prediction in key "expected", the stat. uncertainty in key "unc" and
+                 a (1, energy channel) array for the PCA syst. uncertainty in key "pca".
        """
        low_res = self.preprocess_low_res(low_res_data)
        low_pca = self.lr_pca.transform(low_res)
-        n_trains = low_res.shape[0]
        # Get high res.
        high_pca = self.fit_model.predict(low_pca)
-        high_res_predicted = self.hr_pca.inverse_transform(high_pca["Y"])
-        n_high_res_features = high_res_predicted.shape[1]
-        high_res_unc = (self.hr_pca.inverse_transform(high_pca["Y"] + high_pca["Y_eps"])
-                         - high_res_predicted)
-        result = np.stack((high_res_predicted,
-                           high_res_unc,
-                           np.broadcast_to(self.high_pca_unc,
-                                           (n_trains, n_high_res_features))),
-                           axis=2)
-        return result
+        n_trains = low_pca.shape[0]
+        pca_y = np.concatenate((high_pca["Y"], high_pca["Y"] + high_pca["Y_eps"]), axis=0)
+        high_res_predicted = self.hr_pca.inverse_transform(pca_y)
+        expected = high_res_predicted[:n_trains, :]
+        unc = high_res_predicted[n_trains:, :] - expected
+        return dict(expected=expected,
+                    unc=unc,
+                    pca=self.high_pca_unc)

    def save(self, filename: str):
        """
@@ -557,14 +561,5 @@ class FitModel(object):
        result["Y_unc"] = self.u_inf[0,:]
        # input-dependent uncertainty
        result["Y_eps"] = np.exp(X @ self.A_eps + result["Y_unc"])
-
-        #self.result["res"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"]) # transform PCA space to real space
-        #self.result["res_unc"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.model["u_inf"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] )
-        #self.result["res_unc"] = np.fabs(self.result["res_unc"])
-        #self.result["res_eps"] = self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] + self.result["res_pca_eps"]) - self.model["spec_pca_model"].inverse_transform(self.result["res_pca"] )
-        #self.result["res_eps"] = np.fabs(self.result["res_eps"])
-        #self.Yhat_pca = self.model["spec_pca_model"].inverse_transform(self.model["Y_test"])
-        #self.result["res_unc_specpca"] =  np.sqrt(((self.Yhat_pca - self.model["spec_target"])**2).mean(axis=0))
-        #self.result["res_unc_total"] =  np.sqrt(self.result["res_eps"]**2 + self.result["res_unc_specpca"]**2)
        return result

--- a/pes_to_spec/test/offline_analysis.py
+++ b/pes_to_spec/test/offline_analysis.py
@@ -15,7 +15,7 @@ matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from matplotlib.gridspec import GridSpec

-from typing import Optional
+from typing import Dict, Optional

 from time import time_ns
 import pandas as pd
@@ -40,13 +40,13 @@ def plot_pes(filename: str, pes_raw_int: np.ndarray):
    fig.savefig(filename)
    plt.close(fig)

-def plot_result(filename: str, spec_pred: np.ndarray, spec_smooth: np.ndarray, spec_raw_pe: np.ndarray, spec_raw_int: Optional[np.ndarray]=None):
+def plot_result(filename: str, spec_pred: Dict[str, np.ndarray], spec_smooth: np.ndarray, spec_raw_pe: np.ndarray, spec_raw_int: Optional[np.ndarray]=None):
    """
    Plot result with uncertainty band.

    Args:
      filename: Output file name.
-      spec_pred: Predicted result with uncertainty bands in a shape of (3, features).
+      spec_pred: Predicted result with uncertainty bands in a dictionary.
      spec_smooth: Smoothened expected result with shape (features,).
      spec_raw_pe: x axis with the photon energy in eV.
      spec_raw_int: Original true expected result with shape (features,).
@@ -55,15 +55,16 @@ def plot_result(filename: str, spec_pred: np.ndarray, spec_smooth: np.ndarray, s
    fig = plt.figure(figsize=(16, 8))
    gs = GridSpec(1, 1)
    ax = fig.add_subplot(gs[0, 0])
-    eps = np.mean(spec_pred[:, 1])
+    unc_stat = np.mean(spec_pred["unc"])
+    unc_pca = np.mean(spec_pred["pca"])
    ax.plot(spec_raw_pe, spec_smooth, c='b', lw=3, label="High-resolution measurement (smoothened)")
-    ax.plot(spec_raw_pe, spec_pred[:, 0], c='r', lw=3, label="High-resolution prediction")
-    ax.fill_between(spec_raw_pe, spec_pred[:, 0] - spec_pred[:, 1], spec_pred[:, 0] + spec_pred[:, 1], facecolor='red', alpha=0.6, label="68% unc. (stat.)")
-    ax.fill_between(spec_raw_pe, spec_pred[:, 0] - spec_pred[:, 2], spec_pred[:, 0] + spec_pred[:, 2], facecolor='magenta', alpha=0.6, label="68% unc. (syst., PCA)")
+    ax.plot(spec_raw_pe, spec_pred["expected"], c='r', lw=3, label="High-resolution prediction")
+    ax.fill_between(spec_raw_pe, spec_pred["expected"] - spec_pred["unc"], spec_pred["expected"] + spec_pred["unc"], facecolor='red', alpha=0.6, label="68% unc. (stat.)")
+    ax.fill_between(spec_raw_pe, spec_pred["expected"] - spec_pred["pca"], spec_pred["expected"] + spec_pred["pca"], facecolor='magenta', alpha=0.6, label="68% unc. (syst., PCA)")
    if spec_raw_int is not None:
        ax.plot(spec_raw_pe, spec_raw_int, c='b', lw=1, ls='--', label="High-resolution measurement")
    ax.legend()
-    ax.set(title=f"avg(unc) = {eps}",
+    ax.set(title=f"avg(stat unc) = {unc_stat}, avg(pca unc) = {unc_pca}",
           xlabel="Photon energy [eV]",
           ylabel="Intensity")
    fig.savefig(filename)
@@ -167,7 +168,9 @@ def main():
    for tid in test_tids:
        idx = np.where(tid==tids)[0][0]
        plot_result(f"test_{tid}.png",
-                    spec_pred[idx, :, :],
+                   {k: item[idx, ...] if k != "pca"
+                       else item[0, ...]
+                       for k, item in spec_pred.items()},
                    spec_smooth[idx, :],
                    spec_raw_pe[idx, :],
                    spec_raw_int[idx, :])