Source code for autoflow.feature_engineer.compress.variance

from time import time
from typing import Union

import numpy as np
import pandas as pd

from autoflow.feature_engineer.compress.similarity_base import SimilarityBase


[docs]class Variance(SimilarityBase):
[docs] def fit(self, X: Union[pd.DataFrame, np.ndarray], y=None): self.name = f"variance<={self.threshold}" if isinstance(X, np.ndarray): X = pd.DataFrame(X) self._type = "ndarray" start = time() for col in X.columns.values: var = np.var(X[col].values) if var <= self.threshold: self.to_delete.append(col) end = time() self.logger.debug("use time:", end - start) return self