From 19e1cebcc6e385288c5104cd303a8a8225d24a47 Mon Sep 17 00:00:00 2001 From: Danilo Ferreira de Lima <danilo.enoque.ferreira.de.lima@xfel.de> Date: Sat, 6 May 2023 12:33:19 +0200 Subject: [PATCH] Removed multi output optimization for ARD, since it does not work in this case. Use generic parallelization. --- pes_to_spec/model.py | 59 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/pes_to_spec/model.py b/pes_to_spec/model.py index 10df31f..2ff7ab4 100644 --- a/pes_to_spec/model.py +++ b/pes_to_spec/model.py @@ -362,7 +362,7 @@ def _fit_estimator(estimator, X: np.ndarray, y: np.ndarray, w: Optional[np.ndarr estimator.fit(X, y, w) return estimator -class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator): +class MultiOutputRidgeWithStd(MetaEstimatorMixin, BaseEstimator): def __init__(self, estimator, *, n_jobs=-1): self.estimator = estimator @@ -416,6 +416,59 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator): y_std = np.sqrt(sigmas_squared_data + self.fast_inv_alpha) return y, y_std +class MultiOutputGenericWithStd(MetaEstimatorMixin, BaseEstimator): + + def __init__(self, estimator, *, n_jobs=-1): + self.estimator = estimator + self.n_jobs = n_jobs + + def fit(self, X: np.ndarray, y: np.ndarray, weights: Optional[np.ndarray]=None): + """Fit the model to data, separately for each output variable. + + Args: + X: {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + y: {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets. An indicator matrix turns on multilabel + estimation. + + Returns: self. + """ + if y.ndim == 1: + raise ValueError( + "y must have at least two dimensions for " + "multi-output regression but has only one." + ) + self.estimators_ = Parallel(n_jobs=self.n_jobs)( + delayed(_fit_estimator)( + self.estimator, X, y[:, i], weights + ) + for i in range(y.shape[1]) + ) + + return self + + def predict(self, X: np.ndarray, return_std: bool=False) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + """Predict multi-output variable using model for each target variable. + + Args: + X: {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Returns: {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets predicted across multiple predictors. + Note: Separate models are generated for each predictor. + """ + y = Parallel(n_jobs=self.n_jobs)( + delayed(e.predict)(X, return_std) for e in self.estimators_ + ) + if return_std: + y, unc = zip(*y) + return np.asarray(y).T, np.asarray(unc).T + + return np.asarray(y).T + + class UncorrelatedDeviation(OutlierMixin, BaseEstimator): """ Detect outliers from uncorrelated inputs. @@ -550,9 +603,9 @@ class Model(TransformerMixin, BaseEstimator): if model_type == "bnn": self.fit_model = BNNModel() elif model_type == "ridge": - self.fit_model = MultiOutputWithStd(BayesianRidge(n_iter=300, verbose=True), n_jobs=8) + self.fit_model = MultiOutputRidgeWithStd(BayesianRidge(n_iter=300, verbose=True), n_jobs=8) elif model_type == "ard": - self.fit_model = MultiOutputWithStd(ARDRegression(n_iter=300, verbose=True), n_jobs=8) + self.fit_model = MultiOutputGenericWithStd(ARDRegression(n_iter=300, verbose=True), n_jobs=8) self.model_type = model_type self.kde_xgm = None -- GitLab