Skip to content
Snippets Groups Projects
Commit 3db94efa authored by Danilo Ferreira de Lima's avatar Danilo Ferreira de Lima
Browse files

Corrected chi^2 definition.

parent 649fc407
No related branches found
No related tags found
1 merge request!9Speed up prediction and outlier detection.
......@@ -2,4 +2,4 @@
Estimate high-resolution photon spectrometer data from low-resolution non-invasive measurements.
"""
VERSION = "0.2.0"
VERSION = "0.2.1"
......@@ -651,11 +651,17 @@ class MultiOutputWithStd(MetaEstimatorMixin, BaseEstimator):
class UncorrelatedDeviation(OutlierMixin, BaseEstimator):
"""
Detect outliers from uncorrelated whitened mean-centered inputs.
Detect outliers from uncorrelated inputs.
It uses a chi^2 sum over the features to flatten the features.
The standard deviation is estimated using quantiles.
Args:
sigma: Number of standard deviations of the chi^2 distribution.
"""
def __init__(self, contamination: float=0.003):
def __init__(self, sigma: float=5.0):
super().__init__()
self.contamination = contamination
self.sigma = sigma
def fit(self, X, y=None) -> OutlierMixin:
"""
......@@ -667,14 +673,18 @@ class UncorrelatedDeviation(OutlierMixin, BaseEstimator):
Returns: Itself.
"""
self.bounds_ = np.quantile(X, (self.contamination/2.0, 0.5, 1.0 - self.contamination/2.0), axis=0)
bounds_ = np.quantile(X, (0.003/2.0, 0.5, 1.0 - 0.003/2.0), axis=0)
self.ndof_ = float(X.shape[1] - 1.0)
self.med_ = bounds_[1, np.newaxis, ...]
self.sigma_ = (bounds_[2, np.newaxis, ...] - bounds_[0, np.newaxis, ...])/3.0
return self
def decision_function(self, X: np.ndarray) -> np.ndarray:
"""
Return the decision function.
This is chi^2/ndof - 1 - sigma*sqrt(var_chi2)
"""
return self.score_samples(X) - 1.0
return (self.score_samples(X) - 1.0 - self.sigma*np.sqrt(2.0/self.ndof_))
def score_samples(self, X: np.ndarray) -> np.ndarray:
"""
......@@ -683,11 +693,9 @@ class UncorrelatedDeviation(OutlierMixin, BaseEstimator):
Args:
X: The new input data.
Returns: The Mahalanobis distance.
Returns: The chi^2 test statistic.
"""
med = self.bounds_[1, np.newaxis, ...]
sigma = (self.bounds_[2, np.newaxis, ...] - self.bounds_[0, np.newaxis, ...])/0.5
return np.fabs((X - med)/sigma)
return np.sum(((X - self.med_)/self.sigma_)**2, axis=1)/float(self.ndof_)
def predict(self, X):
"""
......@@ -702,10 +710,8 @@ class UncorrelatedDeviation(OutlierMixin, BaseEstimator):
Returns -1 for anomalies/outliers and +1 for inliers.
"""
values = self.decision_function(X)
is_lower = np.any(X < self.bounds_[0, np.newaxis, ...], axis=1)
is_upper = np.any(X > self.bounds_[2, np.newaxis, ...], axis=1)
is_inlier = np.full(values.shape[0], -1, dtype=int)
is_inlier[is_lower | is_upper] = 1
is_inlier = np.full(values.shape[0], 1, dtype=int)
is_inlier[values > 0] = -1
return is_inlier
def score(self, X, y, sample_weight=None):
......@@ -784,7 +790,7 @@ class Model(TransformerMixin, BaseEstimator):
])
#self.ood = {ch: IsolationForest(n_jobs=-1)
# for ch in channels+['full']}
self.ood = {ch: UncorrelatedDeviation(contamination=0.003)
self.ood = {ch: UncorrelatedDeviation(sigma=5)
for ch in channels+['full']}
#self.ood = {ch: EllipticEnvelope(contamination=0.003)
# for ch in channels+['full']}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment