Using ARDRegressor instead of BFGS to avoid using special code.

44353618 · Danilo Ferreira de Lima · b695793c · 44353618
Commit 44353618 authored 2 years ago by Danilo Ferreira de Lima
--- a/pes_to_spec/model.py
+++ b/pes_to_spec/model.py
@@ -13,13 +13,18 @@ from sklearn.pipeline import Pipeline, FeatureUnion
 from sklearn.base import TransformerMixin, BaseEstimator
 from sklearn.base import RegressorMixin
 from sklearn.kernel_approximation import Nystroem
-from sklearn.linear_model import Ridge
+from sklearn.linear_model import ARDRegression
+#from sklearn.gaussian_process import GaussianProcessRegressor
+#from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
 from itertools import product
 from sklearn.model_selection import train_test_split
+from sklearn.base import clone, MetaEstimatorMixin
+from joblib import Parallel, delayed
 import matplotlib.pyplot as plt
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union, Tuple
 def matching_ids(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> np.ndarray:
    """Returns list of train IDs common to sets a, b and c."""
@@ -311,11 +316,10 @@ class FitModel(RegressorMixin, BaseEstimator):
        # initial parameter values
        A0: np.ndarray = np.eye(self.Nx, self.Ny).reshape(self.Nx*self.Ny)
-        #Aeps: np.ndarray = np.zeros(self.Nx)
        b0: np.ndarray = np.zeros(self.Ny)
-        u0: np.ndarray = -2*np.ones(self.Ny)
+        Aeps: np.ndarray = np.zeros(self.Nx)
-        #x0: np.ndarray = np.concatenate((A0, b0, u0, Aeps), axis=0)
+        u0: np.ndarray = np.zeros(self.Ny)
-        x0: np.ndarray = np.concatenate((A0, b0, u0), axis=0)
+        x0: np.ndarray = np.concatenate((A0, b0, u0, Aeps), axis=0)
        # reset loss monitoring
        self.loss_train: List[float] = list()
@@ -337,9 +341,8 @@ class FitModel(RegressorMixin, BaseEstimator):
            b = x[self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape((1, self.Ny))
            b_eps = x[(self.Nx*self.Ny+self.Ny):(self.Nx*self.Ny+self.Ny+self.Ny)].reshape((1, self.Ny))
-            #A_eps = x[(self.Nx*self.Ny+self.Ny+self.Ny):].reshape((self.Nx, 1))
+            A_eps = x[(self.Nx*self.Ny+self.Ny+self.Ny):].reshape((self.Nx, 1))
-            #log_unc = anp.matmul(X, A_eps) + b_eps
+            log_unc = anp.matmul(X, A_eps) + b_eps
-            log_unc = b_eps
            #log_unc = anp.log(anp.exp(log_unc) + anp.exp(log_eps))
            iunc2 = anp.exp(-2*log_unc)
@@ -393,63 +396,86 @@ class FitModel(RegressorMixin, BaseEstimator):
        self.A_inf = sc_op[0][:self.Nx*self.Ny].reshape(self.Nx, self.Ny)
        self.b_inf = sc_op[0][self.Nx*self.Ny:(self.Nx*self.Ny+self.Ny)].reshape(1, self.Ny)
        self.u_inf = sc_op[0][(self.Nx*self.Ny+self.Ny):(self.Nx*self.Ny+self.Ny+self.Ny)].reshape(1, self.Ny) # removed np.exp
-        #self.A_eps = sc_op[0][(self.Nx*self.Ny+self.Ny+self.Ny):].reshape(self.Nx, 1)
+        self.A_eps = sc_op[0][(self.Nx*self.Ny+self.Ny+self.Ny):].reshape(self.Nx, 1)
-    def as_dict(self) -> Dict[str, Any]:
+    def predict(self, X: np.ndarray, return_std: bool=False) -> Union[Tuple[np.ndarray, np.ndarray], np.ndarray]:
        """
-        Export relevant variables to rebuild this object from a simple dictionary.
+        Predict y from X.
        Args:
+          X: Input dataset.
-        Returns: Dictionary with all relevant variables.
+        Returns: Predicted Y and, if return_std is True, also its uncertainty.
        """
-        return dict(
-                    A_inf=self.A_inf,
-                    b_inf=self.b_inf,
-                    u_inf=self.u_inf,
-                    #A_eps=self.A_eps,
-                    loss_train=self.loss_train,
-                    loss_test=self.loss_test,
-                    Nx=self.Nx,
-                    Ny=self.Ny)
-    def from_dict(self, in_dict: Dict[str, Any]):
-        """
-        Rebuild this object from a dictionary.
-        Args:
+        # result
-          in_dict: The input dictionary with relevant variables.
+        y = (X @ self.A_inf + self.b_inf)
+        if not return_std:
+            return y
+        # flat uncertainty
+        y_unc = self.u_inf[0,:]
+        # input-dependent uncertainty
+        y_eps = np.exp(X @ self.A_eps + y_unc)
+        return y, y_eps
-        """
-        self.A_inf = in_dict["A_inf"]
-        self.b_inf = in_dict["b_inf"]
-        self.u_inf = in_dict["u_inf"]
-        #self.A_eps = in_dict["A_eps"]
-        self.loss_train = in_dict["loss_train"]
-        self.loss_test = in_dict["loss_test"]
-        self.Nx = in_dict["Nx"]
-        self.Ny = in_dict["Ny"]
-    def predict(self, X: np.ndarray) -> Dict[str, np.ndarray]:
+def _fit_estimator(estimator, X: np.ndarray, y: np.ndarray):
-        """
+    estimator = clone(estimator)
-        Predict y from X.
+    estimator.fit(X, y)
+    return estimator
+class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator):
+    def __init__(self, estimator, *, n_jobs=None):
+        self.estimator = estimator
+        self.n_jobs = n_jobs
+    def fit(self, X: np.ndarray, y: np.ndarray):
+        """Fit the model to data, separately for each output variable.
        Args:
-          X: Input dataset.
+          X: {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input data.
+          y: {array-like, sparse matrix} of shape (n_samples, n_outputs)
+            Multi-output targets. An indicator matrix turns on multilabel
+            estimation.
+        Returns: self.
+        """
+        if y.ndim == 1:
+            raise ValueError(
+                "y must have at least two dimensions for "
+                "multi-output regression but has only one."
+            )
+        self.estimators_ = Parallel(n_jobs=self.n_jobs)(
+            delayed(_fit_estimator)(
+                self.estimator, X, y[:, i]
+            )
+            for i in range(y.shape[1])
+        )
-        Returns: Predicted Y.
+        return self
-        """
-        result = dict()
+    def predict(self, X: np.ndarray, return_std: bool=False) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+        """Predict multi-output variable using model for each target variable.
-        # result
+        Args:
-        result["Y"] = (X @ self.A_inf + self.b_inf)
+          X: {array-like, sparse matrix} of shape (n_samples, n_features)
-        # flat uncertainty
+            The input data.
-        result["Y_unc"] = self.u_inf[0,:]
-        # input-dependent uncertainty
+        Returns: {array-like, sparse matrix} of shape (n_samples, n_outputs)
-        #result["Y_eps"] = np.exp(X @ self.A_eps + result["Y_unc"])
+            Multi-output targets predicted across multiple predictors.
-        result["Y_eps"] = np.exp(result["Y_unc"])
+            Note: Separate models are generated for each predictor.
-        return result
+        """
+        y = Parallel(n_jobs=self.n_jobs)(
+            delayed(e.predict)(X, return_std) for e in self.estimators_
+        )
+        if return_std:
+            y, unc = zip(*y)
+            return np.asarray(y).T, np.asarray(unc).T
+        return np.asarray(y).T
 class Model(TransformerMixin, BaseEstimator):
    """
@@ -475,12 +501,12 @@ class Model(TransformerMixin, BaseEstimator):
                 channels:List[str]=[f"channel_{j}_{k}"
                                     for j, k in product(range(1, 5), ["A", "B", "C", "D"])],
                 n_pca_lr: int=600,
-                 n_pca_hr: int=100,
+                 n_pca_hr: int=30,
                 high_res_sigma: float=0.2,
                 tof_start: Optional[int]=None,
                 delta_tof: Optional[int]=300,
                 validation_size: float=0.05,
-                 n_nonlinear_kernel: int=10000):
+                 n_nonlinear_kernel: int=1000):
        # models
        self.x_model = Pipeline([
                                ('select', SelectRelevantLowResolution(channels, tof_start, delta_tof)),
@@ -494,9 +520,9 @@ class Model(TransformerMixin, BaseEstimator):
                                ])
        fit_steps = list()
        if n_nonlinear_kernel > 0:
-            fit_steps += [('fex', Nystroem(n_components=n_nonlinear_kernel, kernel='rbf', gamma=0.5, n_jobs=-1))]
+            fit_steps += [('fex', Nystroem(n_components=n_nonlinear_kernel, kernel='rbf', gamma=None, n_jobs=-1))]
-            #fit_steps += [('regression', (Ridge(alpha=0.1)))]
+        #fit_steps += [('regression', FitModel())]
-        fit_steps += [('regression', FitModel())]
+        fit_steps += [('regression', MultiOutputWithStd(ARDRegression(n_iter=10)))]
        self.fit_model = Pipeline(fit_steps)
        # size of the test subset
@@ -613,10 +639,10 @@ class Model(TransformerMixin, BaseEstimator):
                 a (1, energy channel) array for the PCA syst. uncertainty in key "pca".
        """
        low_pca = self.x_model.transform(low_res_data)
-        high_pca = self.fit_model.predict(low_pca)
+        high_pca, high_pca_unc = self.fit_model.predict(low_pca, return_std=True)
-        n_trains = high_pca["Y"].shape[0]
+        n_trains = high_pca.shape[0]
-        pca_y = np.concatenate((high_pca["Y"],
+        pca_y = np.concatenate((high_pca,
-                                high_pca["Y"] + high_pca["Y_eps"]),
+                                high_pca + high_pca_unc),
                               axis=0)
        high_res_predicted = self.y_model["pca"].inverse_transform(pca_y)
        expected = high_res_predicted[:n_trains, :]