Source code for autoflow.pipeline.components.preprocessing.abnormal.isolation_forest

from functools import partial
from typing import Dict

from sklearn.ensemble import IsolationForest as SklearnIsolationForest

from autoflow.pipeline.components.data_process_base import AutoFlowDataProcessAlgorithm

__all__ = ["IsolationForest"]


def outlier_rejection(X, y, hyperparams):
    model = SklearnIsolationForest(
        max_samples=hyperparams.get("max_samples", 100),
        contamination=hyperparams.get("contamination", 0.4),
        random_state=hyperparams.get("random_state", 42)
    )
    model.fit(X)
    y_pred = model.predict(X)
    return X[y_pred == 1], y[y_pred == 1]


[docs]class IsolationForest(AutoFlowDataProcessAlgorithm): class__ = "FunctionSampler" module__ = "imblearn"
[docs] def after_process_hyperparams(self, hyperparams: dict) -> Dict: hyperparams = super(IsolationForest, self).after_process_hyperparams(hyperparams) hyperparams["func"] = partial(outlier_rejection, hyperparams=hyperparams) return hyperparams