Source code for hyperts.framework.stats.ocsvm

# -*- coding:utf-8 -*-
"""

"""
import numpy as np
from sklearn.svm import OneClassSVM
from hyperts.framework.wrappers import BaseAnomalyDetectorWrapper


[docs]class TSOneClassSVM(BaseAnomalyDetectorWrapper): """One-Class Support Vector Mechine for anomaly detection. Parameters ---------- kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \ default='rbf' Specifies the kernel type to be used in the algorithm. If none is given, 'rbf' will be used. If a callable is given it is used to precompute the kernel matrix. degree : int, default=2 Degree of the polynomial kernel function ('poly'). Ignored by all other kernels. gamma : {'scale', 'auto'} or float, default='scale' Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features. .. versionchanged:: 0.22 The default value of ``gamma`` changed from 'auto' to 'scale'. coef0 : float, default=0.0 Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'. tol : float, default=1e-3 Tolerance for stopping criterion. nu : float, default=0.5 An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Should be in the interval (0, 1]. By default 0.5 will be taken. shrinking : bool, default=True Whether to use the shrinking heuristic. See the :ref:`User Guide <shrinking_svm>`. cache_size : float, default=200 Specify the size of the kernel cache (in MB). max_iter : int, default=-1 Hard limit on iterations within solver, or -1 for no limit. contamination : 'auto' or float, default=0.05 The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Used when fitting to define the threshold on the scores of the samples. - If 'auto', the threshold is determined as in the original paper. - If float, the contamination should be in the range (0, 0.5]. verbose : bool, default=False Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context. """ def __init__(self, kernel="rbf", degree=2, gamma="auto", coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200, max_iter=-1, contamination=0.05, verbose=False, name='one class svm'): super(TSOneClassSVM, self).__init__(name=name, contamination=contamination) self.model = OneClassSVM( kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, tol=tol, nu=nu, shrinking=shrinking, cache_size=cache_size, max_iter=max_iter, verbose=verbose) def _fit(self, X, y=None, **kwargs): self.model.fit(X=X, y=None, sample_weight=kwargs.get('sample_weight', None)) self.decision_scores_ = self.model.decision_function(X) * -1 self._get_decision_attributes() def _predict(self, X, **kwargs): decision_func = self.decision_function(X) is_outlier = np.zeros_like(decision_func, dtype=int) is_outlier[decision_func > self.threshold_] = 1 return is_outlier
[docs] def decision_function(self, X): """Predict anomaly scores for sequences in X. Parameters ---------- X : numpy array of shape (n_samples, n_features). Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ self._check_is_fitted() if isinstance(X, np.ndarray): X = np.array(X) if len(X.shape) == 1: X = X.reshape(-1, 1) decision_func = self.model.decision_function(X) return decision_func * -1