From 44353618804cd63f5fde7bb91ce3cf1bf8e47c93 Mon Sep 17 00:00:00 2001
From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de>
Date: Thu, 12 Jan 2023 15:33:34 +0100
Subject: [PATCH] Using ARDRegressor instead of BFGS to avoid using special
 code.

---
 pes_to_spec/model.py | 146 +++++++++++++++++++++++++------------------
 1 file changed, 86 insertions(+), 60 deletions(-)

diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py
index ee97b7e..a45f760 100644
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -13,13 +13,18 @@ from sklearn.pipeline import Pipeline, FeatureUnion
 from sklearn.base import TransformerMixin, BaseEstimator
 from sklearn.base import RegressorMixin
 from sklearn.kernel_approximation import Nystroem
-from sklearn.linear_model import Ridge
+from sklearn.linear_model import ARDRegression
+#from sklearn.gaussian_process import GaussianProcessRegressor
+#from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
 from itertools import product
 from sklearn.model_selection import train_test_split
 
+from sklearn.base import clone, MetaEstimatorMixin
+from joblib import Parallel, delayed
+
 import matplotlib.pyplot as plt
 
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union, Tuple
 
 def matching_ids(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> np.ndarray:
     """Returns list of train IDs common to sets a, b and c."""
@@ -311,11 +316,10 @@ class FitModel(RegressorMixin, BaseEstimator):
 
         # initial parameter values
         A0: np.ndarray = np.eye(self.Nx, self.Ny).reshape(self.Nx*self.Ny)
-        #Aeps: np.ndarray = np.zeros(self.Nx)
         b0: np.ndarray = np.zeros(self.Ny)
-        u0: np.ndarray = -2*np.ones(self.Ny)
-        #x0: np.ndarray = np.concatenate((A0, b0, u0, Aeps), axis=0)
-        x0: np.ndarray = np.concatenate((A0, b0, u0), axis=0)
+        Aeps: np.ndarray = np.zeros(self.Nx)
+        u0: np.ndarray = np.zeros(self.Ny)
+        x0: np.ndarray = np.concatenate((A0, b0, u0, Aeps), axis=0)
 
         # reset loss monitoring
         self.loss_train: List[float] = list()
@@ -337,9 +341,8 @@ class FitModel(RegressorMixin, BaseEstimator):
             b = x[self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape((1, self.Ny))
 
             b_eps = x[(self.Nx*self.Ny+self.Ny):(self.Nx*self.Ny+self.Ny+self.Ny)].reshape((1, self.Ny))
-            #A_eps = x[(self.Nx*self.Ny+self.Ny+self.Ny):].reshape((self.Nx, 1))
-            #log_unc = anp.matmul(X, A_eps) + b_eps
-            log_unc = b_eps
+            A_eps = x[(self.Nx*self.Ny+self.Ny+self.Ny):].reshape((self.Nx, 1))
+            log_unc = anp.matmul(X, A_eps) + b_eps
 
             #log_unc = anp.log(anp.exp(log_unc) + anp.exp(log_eps))
             iunc2 = anp.exp(-2*log_unc)
@@ -393,63 +396,86 @@ class FitModel(RegressorMixin, BaseEstimator):
         self.A_inf = sc_op[0][:self.Nx*self.Ny].reshape(self.Nx, self.Ny)
         self.b_inf = sc_op[0][self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape(1, self.Ny)
         self.u_inf = sc_op[0][(self.Nx*self.Ny+self.Ny):(self.Nx*self.Ny+self.Ny+self.Ny)].reshape(1, self.Ny) # removed np.exp
-        #self.A_eps = sc_op[0][(self.Nx*self.Ny+self.Ny+self.Ny):].reshape(self.Nx, 1)
+        self.A_eps = sc_op[0][(self.Nx*self.Ny+self.Ny+self.Ny):].reshape(self.Nx, 1)
 
-    def as_dict(self) -> Dict[str, Any]:
+    def predict(self, X: np.ndarray, return_std: bool=False) -> Union[Tuple[np.ndarray, np.ndarray], np.ndarray]:
         """
-        Export relevant variables to rebuild this object from a simple dictionary.
+        Predict y from X.
 
         Args:
+          X: Input dataset.
 
-        Returns: Dictionary with all relevant variables.
+        Returns: Predicted Y and, if return_std is True, also its uncertainty.
         """
-        return dict(
-                    A_inf=self.A_inf,
-                    b_inf=self.b_inf,
-                    u_inf=self.u_inf,
-                    #A_eps=self.A_eps,
-                    loss_train=self.loss_train,
-                    loss_test=self.loss_test,
-                    Nx=self.Nx,
-                    Ny=self.Ny)
-    def from_dict(self, in_dict: Dict[str, Any]):
-        """
-        Rebuild this object from a dictionary.
 
-        Args:
-          in_dict: The input dictionary with relevant variables.
+        # result
+        y = (X @ self.A_inf + self.b_inf)
+        if not return_std:
+            return y
+        # flat uncertainty
+        y_unc = self.u_inf[0,:]
+        # input-dependent uncertainty
+        y_eps = np.exp(X @ self.A_eps + y_unc)
+        return y, y_eps
 
-        """
-        self.A_inf = in_dict["A_inf"]
-        self.b_inf = in_dict["b_inf"]
-        self.u_inf = in_dict["u_inf"]
-        #self.A_eps = in_dict["A_eps"]
-        self.loss_train = in_dict["loss_train"]
-        self.loss_test = in_dict["loss_test"]
-        self.Nx = in_dict["Nx"]
-        self.Ny = in_dict["Ny"]
 
-    def predict(self, X: np.ndarray) -> Dict[str, np.ndarray]:
-        """
-        Predict y from X.
+def _fit_estimator(estimator, X: np.ndarray, y: np.ndarray):
+    estimator = clone(estimator)
+    estimator.fit(X, y)
+    return estimator
+
+class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator):
+
+    def __init__(self, estimator, *, n_jobs=None):
+        self.estimator = estimator
+        self.n_jobs = n_jobs
+
+    def fit(self, X: np.ndarray, y: np.ndarray):
+        """Fit the model to data, separately for each output variable.
 
         Args:
-          X: Input dataset.
+          X: {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input data.
+          y: {array-like, sparse matrix} of shape (n_samples, n_outputs)
+            Multi-output targets. An indicator matrix turns on multilabel
+            estimation.
+
+        Returns: self.
+        """
+        if y.ndim == 1:
+            raise ValueError(
+                "y must have at least two dimensions for "
+                "multi-output regression but has only one."
+            )
+
+        self.estimators_ = Parallel(n_jobs=self.n_jobs)(
+            delayed(_fit_estimator)(
+                self.estimator, X, y[:, i]
+            )
+            for i in range(y.shape[1])
+        )
 
-        Returns: Predicted Y.
-        """
+        return self
 
-        result = dict()
+    def predict(self, X: np.ndarray, return_std: bool=False) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+        """Predict multi-output variable using model for each target variable.
 
-        # result
-        result["Y"] = (X @ self.A_inf + self.b_inf)
-        # flat uncertainty
-        result["Y_unc"] = self.u_inf[0,:]
-        # input-dependent uncertainty
-        #result["Y_eps"] = np.exp(X @ self.A_eps + result["Y_unc"])
-        result["Y_eps"] = np.exp(result["Y_unc"])
-        return result
+        Args:
+          X: {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input data.
+
+        Returns: {array-like, sparse matrix} of shape (n_samples, n_outputs)
+            Multi-output targets predicted across multiple predictors.
+            Note: Separate models are generated for each predictor.
+        """
+        y = Parallel(n_jobs=self.n_jobs)(
+            delayed(e.predict)(X, return_std) for e in self.estimators_
+        )
+        if return_std:
+            y, unc = zip(*y)
+            return np.asarray(y).T, np.asarray(unc).T
 
+        return np.asarray(y).T
 
 class Model(TransformerMixin, BaseEstimator):
     """
@@ -475,12 +501,12 @@ class Model(TransformerMixin, BaseEstimator):
                  channels:List[str]=[f"channel_{j}_{k}"
                                      for j, k in product(range(1, 5), ["A", "B", "C", "D"])],
                  n_pca_lr: int=600,
-                 n_pca_hr: int=100,
+                 n_pca_hr: int=30,
                  high_res_sigma: float=0.2,
                  tof_start: Optional[int]=None,
                  delta_tof: Optional[int]=300,
                  validation_size: float=0.05,
-                 n_nonlinear_kernel: int=10000):
+                 n_nonlinear_kernel: int=1000):
         # models
         self.x_model = Pipeline([
                                 ('select', SelectRelevantLowResolution(channels, tof_start, delta_tof)),
@@ -494,9 +520,9 @@ class Model(TransformerMixin, BaseEstimator):
                                 ])
         fit_steps = list()
         if n_nonlinear_kernel > 0:
-            fit_steps += [('fex', Nystroem(n_components=n_nonlinear_kernel, kernel='rbf', gamma=0.5, n_jobs=-1))]
-            #fit_steps += [('regression', (Ridge(alpha=0.1)))]
-        fit_steps += [('regression', FitModel())]
+            fit_steps += [('fex', Nystroem(n_components=n_nonlinear_kernel, kernel='rbf', gamma=None, n_jobs=-1))]
+        #fit_steps += [('regression', FitModel())]
+        fit_steps += [('regression', MultiOutputWithStd(ARDRegression(n_iter=10)))]
         self.fit_model = Pipeline(fit_steps)
 
         # size of the test subset
@@ -613,10 +639,10 @@ class Model(TransformerMixin, BaseEstimator):
                  a (1, energy channel) array for the PCA syst. uncertainty in key "pca".
         """
         low_pca = self.x_model.transform(low_res_data)
-        high_pca = self.fit_model.predict(low_pca)
-        n_trains = high_pca["Y"].shape[0]
-        pca_y = np.concatenate((high_pca["Y"],
-                                high_pca["Y"] + high_pca["Y_eps"]),
+        high_pca, high_pca_unc = self.fit_model.predict(low_pca, return_std=True)
+        n_trains = high_pca.shape[0]
+        pca_y = np.concatenate((high_pca,
+                                high_pca + high_pca_unc),
                                axis=0)
         high_res_predicted = self.y_model["pca"].inverse_transform(pca_y)
         expected = high_res_predicted[:n_trains, :]
-- 
GitLab