Source code for pythresh.thresholds.zscore

import numpy as np
import scipy.stats as stats

from .base import BaseThresholder


[docs] class ZSCORE(BaseThresholder): r"""ZSCORE class for ZSCORE thresholder. Use the zscore to evaluate a non-parametric means to threshold scores generated by the decision_scores where outliers are set to any value beyond a zscore of one. See :cite:`bagdonavicius2020zscore` for details. Parameters ---------- factor : int, optional (default=1) The factor to multiply the zscore by to set the threshold. The default is 1. random_state : int, optional (default=1234) Random seed for the random number generators of the thresholders. Can also be set to None. Attributes ---------- thresh_ : threshold value that separates inliers from outliers dscores_ : 1D array of decomposed decision scores Notes ----- The z-score can be calculated as follows: .. math:: Z = \frac{x-\bar{x}}{\sigma} \mathrm{,} where :math:`\bar{x}` and :math:`\sigma` are the mean and the standard deviation of the decision scores respectively. The threshold is set that any value beyond an absolute z-score of 1 is considered and outlier. """ def __init__(self, factor=1, random_state=1234): super().__init__() self.factor = factor self.random_state = random_state np.random.seed(random_state)
[docs] def eval(self, decision): """Outlier/inlier evaluation process for decision scores. Parameters ---------- decision : np.array or list of shape (n_samples) or np.array of shape (n_samples, n_detectors) which are the decision scores from a outlier detection. Returns ------- outlier_labels : numpy array of shape (n_samples,) For each observation, tells whether or not it should be considered as an outlier according to the fitted model. 0 stands for inliers and 1 for outliers. """ decision = self._data_setup(decision) # Get the zscore of the decision scores zscore = stats.zscore(decision) # Set the limit to where the zscore is greater than the factor labels = np.zeros(len(decision), dtype=int) mask = np.where(zscore >= self.factor) labels[mask] = 1 self.thresh_ = np.min(decision[labels == 1]) return labels