Storing KDE of the XGM and intensity profiles.

13e51a7d · Danilo Ferreira de Lima · f83e3c90 · 13e51a7d
Commit 13e51a7d authored 2 years ago by Danilo Ferreira de Lima
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -52,11 +52,11 @@ class FitModel(RegressorMixin, BaseEstimator):
        # fit result
        self.pars: Optional[Dict[str, np.ndarray]] = None
        self.structure: Optional[Dict[str, Tuple[int, int]]] = None
        # fit monitoring
        self.loss_train: List[float] = list()
        self.loss_test: List[float] = list()
        self.nll_train: Optional[np.ndarray] = None
        self.nll_train_expected: Optional[np.ndarray] = None
@@ -163,7 +163,7 @@ class FitModel(RegressorMixin, BaseEstimator):
                              iprint=0)
        # Inference
-        self.pars = FitModel.get_pars(sc_op[0], self.structure)        
+        self.pars = FitModel.get_pars(sc_op[0], self.structure)
        self.nll_train = sc_op[1]
        self.nll_train_expected = np.mean(self.nll(X, pars=self.pars), axis=0, keepdims=True)
@@ -219,22 +219,22 @@ class FitModel(RegressorMixin, BaseEstimator):
        -log p = log(2) + log(b)
        We return -log [p(M|X,Y=mu(X))/p(M|X_train,Y_train)] = -log p(M|X,Y=mu(X)) + log p(M|X_train, Y_traun)
        Negative log likelihood L allows one
        Args:
          X: The input data.
          Y: The true result. If None, set it to the expectation.
        Returns: negative log likelihood.
        """
        if pars is None:
            pars = self.pars
        A = pars["A_inf"]
        b = pars["b_inf"]
        X2 = anp.square(X)
        Ap = pars["Ap_inf"]
        Y_pred = anp.matmul(X2, Ap) + anp.matmul(X, A) + b
        log_inv_alpha = anp.matmul(X, pars["log_inv_alpha_prime"]) + pars["log_inv_alpha"]
        sqrt_alpha = anp.exp(-0.5*log_inv_alpha)
        #log_inv_tau1 = pars["log_inv_tau1"]
@@ -399,7 +399,7 @@ class UncertaintyHolder(TransformerMixin, BaseEstimator):
    def uncertainty(self):
        """The uncertainty recorded."""
        return self.unc
 class DataHolder(TransformerMixin, BaseEstimator):
    """
    Keep track of relevance dictionaries.
@@ -416,7 +416,7 @@ class DataHolder(TransformerMixin, BaseEstimator):
          data: Data.
        """
        self.data = deepcopy(data)
    def get_data(self) -> Dict[str, Any]:
        """
        Get.
@@ -647,7 +647,7 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator):
            return np.asarray(y).T, np.asarray(unc).T
        return np.asarray(y).T
 class Model(TransformerMixin, BaseEstimator):
    """
    Object representing a previous fit of the model to be used to predict high-resolution
@@ -707,6 +707,8 @@ class Model(TransformerMixin, BaseEstimator):
        self.fit_model = MultiOutputWithStd(BayesianRidge(n_iter=300, tol=1e-8, verbose=True), n_jobs=8)
        #self.fit_model = FitModel()
+        self.kde_xgm = None
+        self.kde_intensity = None
        self.mu_intensity = np.nan
        self.std_intensity = np.nan
@@ -754,22 +756,22 @@ class Model(TransformerMixin, BaseEstimator):
        Returns: Smoothened spectrum.
        """
        return self.y_model['smoothen'].transform(high_res_data)
    def uniformize(self, intensity: np.ndarray) -> np.ndarray:
        """
        Calculate weights to uniformize data in variable intensity.
        Args:
          intensity: The variable to uniformize the weights on.
        Return: weights.
        """
-        kde = KernelDensity()
+        self.kde_xgm = KernelDensity()
-        kde.fit(intensity)
+        self.kde_xgm.fit(intensity)
        q = np.quantile(intensity, [0.10, 0.90])
        l, h = q[0], q[1]
        x = intensity*((intensity > l) & (intensity < h)) + l*(intensity <= l) + h*(intensity >= h)
-        log_prob = kde.score_samples(x)
+        log_prob = self.kde_xgm.score_samples(x)
        w = np.exp(-log_prob)
        w = w/np.median(w)
        return w
@@ -793,7 +795,7 @@ class Model(TransformerMixin, BaseEstimator):
        Returns: Smoothened high resolution spectrum.
        """
        if weights is None:
-            weights = np.ones(high_Res_data.shape[0])
+            weights = np.ones(high_res_data.shape[0])
        print("Fitting PCA on low-resolution data.")
        low_res_select = self.x_select.fit_transform(low_res_data)
        n_components = min(self.x_model["pca"].n_components, low_res_select.shape[0])
@@ -802,6 +804,8 @@ class Model(TransformerMixin, BaseEstimator):
        print("Fitting PCA on high-resolution data.")
        y_t = self.y_model.fit_transform(high_res_data, smoothen__energy=high_res_photon_energy)
        intensity = np.sum(y_t, axis=1)
+        self.kde_intensity = KernelDensity()
+        self.kde_intensity.fit(intensity)
        self.mu_intensity = np.mean(intensity, axis=0)
        self.sigma_intensity = np.mean(intensity, axis=0)
        #self.fit_model.set_params(fex__gamma=1.0/float(x_t.shape[0]))
@@ -826,9 +830,7 @@ class Model(TransformerMixin, BaseEstimator):
            print(f"Calculate PCA on {channel}")
            low_pca = self.channel_pca[channel].fit_transform(low_res_selected[channel])
            self.ood[channel].fit(low_pca)
-            #low_pca_rec = self.channel_pca[channel].inverse_transform(low_pca)
-            #self.channel_fit_model[channel].fit(low_pca, y_t)
        print("End of fit.")
        return high_res
@@ -844,27 +846,12 @@ class Model(TransformerMixin, BaseEstimator):
        Returns: Outlier score. If it is bigger than 0, this is an outlier.
        """
        selection_model = self.x_select
-        quality = {channel: 0.0 for channel in low_res_data.keys()}
        channels = list(low_res_data.keys())
        # check if each channel is close to the mean
        low_res_selected = selection_model.transform(low_res_data, keep_dictionary_structure=True)
        low_pca = {ch: self.channel_pca[ch].transform(low_res_selected[ch])
                   for ch in channels}
        return {ch: self.ood[ch].predict(low_pca[ch]) for ch in channels}
-        ## for chi2
-        #deviation = {ch: low_pca[ch]
-        #             for ch in channels}
-        #ndof = {ch: float(deviation[ch].shape[1] - 1)
-        #        for ch in channels}
-        #chi2 = {ch: np.sum(deviation[ch]**2, axis=1, keepdims=True)
-        #        for ch in channels}
-        #chi2_mu = {ch: scipy.stats.chi2.mean(ndof[ch])
-        #           for ch in channels}
-        #chi2_sigma = {ch: scipy.stats.chi2.std(ndof[ch])
-        #              for ch in channels}
-        #return {ch: (chi2[ch] - chi2_mu[ch])/chi2_sigma[ch]
-        #        for ch in channels}
    def check_compatibility(self, low_res_data: Dict[str, np.ndarray]) -> np.ndarray:
        """
@@ -878,17 +865,16 @@ class Model(TransformerMixin, BaseEstimator):
        """
        low_res = self.x_select.transform(low_res_data)
        pca_model = self.x_model
-        #pca_model = self.x_model['pca']
-        #if 'fex' in self.x_model.named_steps:
-        #    pca_model = self.x_model['fex'].named_steps['prepca']
        low_pca = pca_model.transform(low_res)
        return self.ood['full'].predict(low_pca)
-        #deviation = low_pca
-        #ndof = float(deviation.shape[1] - 1)
+    def xgm_profile(self) -> KernelDensity:
-        #chi2 = np.sum(deviation**2, axis=1, keepdims=True)
+        """Get KDE for the XGM intensity."""
-        #chi2_mu = scipy.stats.chi2.mean(ndof)
+        return self.kde_xgm
-        #chi2_sigma = scipy.stats.chi2.std(ndof)
-        #return (chi2 - chi2_mu)/chi2_sigma
+    def intensity_profile(self) -> KernelDensity:
+        """Get KDE for the predicted intensity."""
+        return self.kde_intensity
    def predict(self, low_res_data: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
        """
@@ -945,7 +931,9 @@ class Model(TransformerMixin, BaseEstimator):
                     #self.channel_fit_model
                     DataHolder(self.mu_intensity),
                     DataHolder(self.sigma_intensity),
-                     self.ood
+                     self.ood,
+                     self.kde_xgm,
+                     self.kde_intensity,
                     ], filename, compress='zlib')
    @staticmethod
@@ -964,7 +952,9 @@ class Model(TransformerMixin, BaseEstimator):
         #channel_fit_model
         mu_intensity,
         sigma_intensity,
-         ood
+         ood,
+         kde_xgm,
+         kde_intensity,
        ) = joblib.load(filename)
        obj = Model()
        obj.x_select = x_select
@@ -976,5 +966,7 @@ class Model(TransformerMixin, BaseEstimator):
        obj.ood = ood
        obj.mu_intensity = mu_intensity.get_data()
        obj.sigma_intensity = sigma_intensity.get_data()
+        obj.kde_xgm = kde_xgm
+        obj.kde_intensity = kde_intensity
        return obj