Source code for autoflow.feature_engineer.encode.label_encode

import numpy as np
import pandas as pd
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.preprocessing import LabelEncoder as SklearnLabelEncoder

from autoflow.utils.data import to_array

__all__ = ["LabelEncoder"]


[docs]class LabelEncoder(TransformerMixin, BaseEstimator):
[docs] def fit(self, X, y=None): X = to_array(X) encoders = [] for i in range(X.shape[1]): cur = X[:, i] encoder = SklearnLabelEncoder().fit(cur) # [cur != -999] encoders.append(encoder) self.encoders = encoders return self
[docs] def transform(self, X, y=None): if isinstance(X, pd.DataFrame): columns = X.columns index = X.index else: columns = [str(i) for i in range(X.shape[1])] index = range(X.shape[0]) X = to_array(X) arrs = [] assert X.shape[1] == len(self.encoders) for i in range(X.shape[1]): cur = X[:, i] # arr = np.zeros_like(cur) encoder = self.encoders[i] arr = encoder.transform(cur) # arr[cur == -999] = -999 arrs.append(arr) return pd.DataFrame(np.vstack(arrs).T, columns=columns, index=index)