Source code for autoflow.pipeline.components.data_process_base
import pandas as pd
from autoflow.pipeline.components.base import AutoFlowComponent
from autoflow.pipeline.dataframe import GenericDataFrame
[docs]class AutoFlowDataProcessAlgorithm(AutoFlowComponent):
need_y = True
[docs] def fit(self, X_train, y_train=None,
X_valid=None, y_valid=None,
X_test=None, y_test=None):
self.build_proxy_estimator()
return self
[docs] def fit_transform(self, X_train=None, y_train=None, X_valid=None, y_valid=None, X_test=None, y_test=None):
return self.fit(X_train, y_train, X_valid, y_valid, X_test, y_test).transform(X_train, X_valid, X_test, y_train)
[docs] def transform(self, X_train: GenericDataFrame = None, X_valid=None, X_test=None, y_train=None):
sample_X_test = self.hyperparams.get("sample_X_test", False)
if y_train is not None:
X_train, y_train = self._transform(X_train, y_train)
if (not self.need_y) and sample_X_test:
X_valid, _ = self._transform(X_valid, None)
X_test, _ = self._transform(X_test, None)
return {
"X_train": X_train,
"X_valid": X_valid,
"X_test": X_test,
"y_train": y_train
}
def _transform(self, X: GenericDataFrame, y):
columns = X.columns
feature_groups = X.feature_groups
columns_metadata = X.columns_metadata
X_, y_ = self._transform_proc(X, y)
X = GenericDataFrame(pd.DataFrame(X_, columns=columns), feature_groups=feature_groups, columns_metadata=columns_metadata)
return X, y_
def _transform_proc(self, X_train, y_train):
return self.estimator.fit_sample(X_train, y_train)