Source code for autoflow.feature_engineer.transform.guassion_tranform
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.base import TransformerMixin, BaseEstimator
[docs]class GuassionTransformer(TransformerMixin, BaseEstimator):
def __init__(self):
self._type = "DataFrame"
[docs] def fit(self, X, y=None):
if isinstance(X, np.ndarray):
X = pd.DataFrame(X)
self._type = "ndarray"
skew_array = stats.skew(X)
minimum_array = map(lambda x: min(X[x]), X.columns)
std_array = np.std(X)
self.transformer = dict(zip(X.columns.values, zip(std_array, minimum_array, skew_array)))
for ii, column in enumerate(X.columns.values):
one = X[column]
std, skew, minimum = self.transformer.get(column)
if skew > 2 * std:
if skew > 3 * std:
if minimum < 0:
one = np.log10(one + np.array([2 * std + minimum + 1] * X.shape[0]))
else:
one = np.log10(one)
else:
if minimum < 0:
one = np.sqrt(one + np.array([2 * std + minimum + 1] * X.shape[0]))
else:
one = np.sqrt(one)
if abs(stats.skew(one)) < abs(skew):
X[column] = one
return X.values
if __name__ == '__main__':
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
GuassionTransformer().fit(X,y)