Source code for autoflow.ensemble.base

from typing import List

import numpy as np
from sklearn.base import BaseEstimator

from autoflow.data_container.base import get_container_data
from autoflow.utils import typing_


[docs]class EnsembleEstimator(BaseEstimator):
    mainTask = None

[docs]    def build_prediction_list(self):
        prediction_list = []
        assert len(self.y_true_indexes_list) > 1
        # splitter 的 random_state都是相同的， 所以认为  y_true_indexes_list 的每个 y_true_indexes 都相同
        assert not np.any(np.array([np.hstack(y_true_indexes) for y_true_indexes in  self.y_true_indexes_list]).var(axis=0))
        for y_preds in self.y_preds_list:
            prediction_list.append(np.concatenate(y_preds))  # concat in axis 0
        self.prediction_list = prediction_list
        y_true_indexes = self.y_true_indexes_list[0]
        self.stacked_y_true = self.y_true[np.hstack(y_true_indexes)]
        assert self.prediction_list[0].shape[0] == self.stacked_y_true.shape[0]

[docs]    def fit_trained_data(
            self,
            estimators_list: List[List[typing_.GenericEstimator]],
            y_true_indexes_list: List[List[np.ndarray]],
            y_preds_list: List[List[np.ndarray]],
            y_true: np.ndarray
    ):
        self.y_preds_list = y_preds_list
        self.y_true_indexes_list = y_true_indexes_list
        self.estimators_list = estimators_list
        self.y_true = get_container_data(y_true)
        self.build_prediction_list()