import numpy as np
import scipy.stats as stats
from .base import BaseThresholder
from .thresh_utility import check_scores, cut, normalize
[docs]
class MAD(BaseThresholder):
r"""MAD class for Median Absolute Deviation thresholder.
Use the median absolute deviation to evaluate a non-parametric
means to threshold scores generated by the decision_scores
where outliers are set to any value beyond the mean plus the
median absolute deviation over the standard deviation.
See :cite:`archana2015mad` for details.
Parameters
----------
random_state : int, optional (default=1234)
Random seed for the random number generators of the thresholders. Can also
be set to None.
Attributes
----------
thresh_ : threshold value that separates inliers from outliers
dscores_ : 1D array of decomposed decision scores
Notes
-----
The median absolute deviation is defined as:
.. math::
MAD = med\lvert x - med(x)\rvert \mathrm{.}
And the threshold is set such that:
.. math::
\mathrm{lim} = \bar{x} + \frac{MAD}{\sigma} \mathrm{,}
where :math:`\bar{x}` and :math:`\sigma` are the mean and
standard deviation of the scores respectively
"""
def __init__(self, random_state=1234):
self.random_state = random_state
[docs]
def eval(self, decision):
"""Outlier/inlier evaluation process for decision scores.
Parameters
----------
decision : np.array or list of shape (n_samples)
or np.array of shape (n_samples, n_detectors)
which are the decision scores from a
outlier detection.
Returns
-------
outlier_labels : numpy array of shape (n_samples,)
For each observation, tells whether or not
it should be considered as an outlier according to the
fitted model. 0 stands for inliers and 1 for outliers.
"""
decision = check_scores(decision, random_state=self.random_state)
decision = normalize(decision)
self.dscores_ = decision
# Set limit
mean = np.mean(decision)
limit = mean + \
stats.median_abs_deviation(decision, scale=np.std(decision))
self.thresh_ = limit
return cut(decision, limit)