ultraopt.learning.tpe 源代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author  : qichun tang
# @Contact    : qichun.tang@bupt.edu.cn
from copy import deepcopy
from typing import List, Optional

import numpy as np
import pandas as pd
from ConfigSpace import Configuration
from sklearn.base import BaseEstimator
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.neighbors import KernelDensity
from sklearn.utils import check_random_state

from ultraopt.utils.config_space import add_configs_origin, sample_configurations
from ultraopt.utils.config_transformer import ConfigTransformer
from ultraopt.utils.hash import get_hash_of_array
from ultraopt.utils.logging_ import get_logger


[文档]def estimate_bw(data, bw_method="scott", cv_times=100):
    # https://scikit-learn.org/stable/modules/generated/sklearn.cluster.estimate_bandwidth.html
    ndata = data.shape[0]
    if bw_method == 'scott':
        bandwidth = ndata ** (-1 / 5) * np.std(data, ddof=1)
        bandwidth = np.clip(bandwidth, 0.01, None)
    elif bw_method == 'silverman':
        bandwidth = (ndata * 3 / 4) ** (-1 / 5) * np.std(data, ddof=1)
        bandwidth = np.clip(bandwidth, 0.01, None)
    elif bw_method == 'cv':
        if ndata <= 3:
            return estimate_bw(data)
        bandwidths = np.std(data, ddof=1) ** np.linspace(-1, 1, cv_times)
        bandwidths = np.clip(bandwidths, 0.01, None)
        grid = GridSearchCV(KernelDensity(), {'bandwidth': bandwidths},
                            cv=KFold(n_splits=3, shuffle=True, random_state=0))
        grid.fit(data)
        bandwidth = grid.best_params_['bandwidth']
    elif np.isscalar(bw_method):
        bandwidth = bw_method
    else:
        raise ValueError("Unrecognized input for bw_method.")
    return bandwidth


[文档]class TreeParzenEstimator(BaseEstimator):
    def __init__(
            self,
            top_n_percent=15, min_points_in_kde=2,
            bw_method="scott", cv_times=100, kde_sample_weight_scaler=None,
            # fill_deactivated_value=False
    ):
        self.min_points_in_kde = min_points_in_kde
        # self.bw_estimation = bw_estimation
        # self.min_bandwidth = min_bandwidth
        # self.bandwidth_factor = bandwidth_factor
        self.top_n_percent = top_n_percent
        self.config_transformer: Optional[ConfigTransformer] = None
        self.logger = get_logger(self)
        self.kde_sample_weight_scaler = kde_sample_weight_scaler
        self.cv_times = cv_times
        self.bw_method = bw_method
        # self.fill_deactivated_value = fill_deactivated_value
        self.good_kdes = None
        self.bad_kdes = None

[文档]    def set_config_transformer(self, config_transformer):
        self.config_transformer = config_transformer

[文档]    def calc_groups(self, X):
        N, M = X.shape
        groups = []
        n_groups = 0
        hash2group = {}
        for i in range(M):
            col = X[:, i]
            idxs = np.arange(N)[~np.isnan(col)]
            hash = get_hash_of_array(idxs)
            if hash in hash2group:
                groups.append(hash2group[hash])
            else:
                groups.append(n_groups)
                hash2group[hash] = n_groups
                n_groups += 1
        return np.array(groups), n_groups

[文档]    def fit(self, X: np.ndarray, y: np.ndarray):
        groups, n_groups = self.calc_groups(X)
        self.groups = groups
        self.n_groups = n_groups
        good_kdes = np.zeros([n_groups], dtype=object)
        bad_kdes = deepcopy(good_kdes)
        for group in range(n_groups):
            group_mask = groups == group
            grouped_X = X[:, group_mask]
            inactive_mask = np.isnan(grouped_X[:, 0])
            active_X = grouped_X[~inactive_mask, :]
            active_y = y[~inactive_mask]
            if active_X.shape[0] < 4:  # at least have 4 samples
                continue
            N, M = active_X.shape
            # Each KDE contains at least 2 samples
            n_good = max(2, (self.top_n_percent * N) // 100)
            if n_good < self.min_points_in_kde or \
                    N - n_good < self.min_points_in_kde:
                # Too few observation samples
                continue
            idx = np.argsort(active_y)
            X_good = active_X[idx[:n_good]]
            X_bad = active_X[idx[n_good:]]
            y_good = -active_y[idx[:n_good]]
            sample_weight = None
            if self.kde_sample_weight_scaler is not None and y_good.std() != 0:
                if self.kde_sample_weight_scaler == "normalize":
                    scaled_y = (y_good - y_good.mean()) / y_good.std()
                    scaled_y -= np.min(scaled_y)
                    scaled_y /= np.max(scaled_y)
                    scaled_y += 0.5
                    sample_weight = scaled_y
                elif self.kde_sample_weight_scaler == "std-exp":
                    scaled_y = (y_good - y_good.mean()) / y_good.std()
                    sample_weight = np.exp(scaled_y)
                else:
                    raise ValueError(f"Invalid kde_sample_weight_scaler '{self.kde_sample_weight_scaler}'")
            bw_good = estimate_bw(X_good, self.bw_method, self.cv_times)
            bw_bad = estimate_bw(X_bad, self.bw_method, self.cv_times)
            good_kdes[group] = KernelDensity(bandwidth=bw_good).fit(X_good, sample_weight=sample_weight)
            bad_kdes[group] = KernelDensity(bandwidth=bw_bad).fit(X_bad)
        self.good_kdes = good_kdes
        self.bad_kdes = bad_kdes
        return self

[文档]    def predict(self, X: np.ndarray):
        n_groups = self.n_groups
        good_log_pdf = np.zeros([X.shape[0], n_groups], dtype="float64")
        bad_log_pdf = deepcopy(good_log_pdf)
        groups = self.groups
        for group, (good_kde, bad_kde) in enumerate(zip(self.good_kdes, self.bad_kdes)):
            if (not good_kde) or (not bad_kde):
                continue
            group_mask = groups == group
            grouped_X = X[:, group_mask]
            inactive_mask = np.isnan(grouped_X[:, 0])
            active_X = grouped_X[~inactive_mask, :]
            N, M = active_X.shape
            if N == 0:
                continue
            if np.any(pd.isna(active_X)):
                self.logger.warning("ETPE contains nan, mean impute.")
                active_X = SimpleImputer(strategy="mean").fit_transform(active_X)
            good_log_pdf[~inactive_mask, group] = self.good_kdes[group].score_samples(active_X)
            bad_log_pdf[~inactive_mask, group] = self.bad_kdes[group].score_samples(active_X)
            # if N_deactivated > 0 and self.fill_deactivated_value:
            #     good_log_pdf[~mask, i] = np.random.choice(good_pdf_activated)
            #     bad_log_pdf[~mask, i] = np.random.choice(bad_pdf_activated)
        if not np.all(np.isfinite(good_log_pdf)):
            self.logger.warning("good_log_pdf contains NaN or inf")
        if not np.all(np.isfinite(bad_log_pdf)):
            self.logger.warning("bad_log_pdf contains NaN or inf")
        good_log_pdf[~np.isfinite(good_log_pdf)] = -10
        bad_log_pdf[bad_log_pdf == -np.inf] = -10
        bad_log_pdf[~np.isfinite(bad_log_pdf)] = 10
        result = good_log_pdf.sum(axis=1) - bad_log_pdf.sum(axis=1)
        return result

[文档]    def sample(self, n_candidates=20, sort_by_EI=False, random_state=None, bandwidth_factor=3) -> List[Configuration]:
        # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html#sklearn.neighbors.KernelDensity
        groups = np.array(self.groups)
        rng = check_random_state(random_state)
        if self.good_kdes is None:
            self.logger.warning("good_kdes is None, random sampling.")
            return sample_configurations(self.config_transformer.config_space, n_candidates)
        sampled_matrix = np.zeros([n_candidates, len(self.groups)])
        for group, good_kde in enumerate(self.good_kdes):
            group_mask = groups == group
            if good_kde:
                # KDE采样
                bw = good_kde.bandwidth
                prev_bw = bw
                bw *= bandwidth_factor
                good_kde.set_params(bandwidth=bw)
                result = good_kde.sample(n_candidates, random_state=random_state)
                good_kde.set_params(bandwidth=prev_bw)
            else:
                # 随机采样(0-1)
                result = rng.rand(n_candidates, group_mask.sum())
            sampled_matrix[:, group_mask] = result
        candidates = self.config_transformer.inverse_transform(sampled_matrix)
        n_fails = n_candidates - len(candidates)
        add_configs_origin(candidates, "ETPE sampling")
        if n_fails:
            random_candidates = sample_configurations(self.config_transformer.config_space, n_fails)
            add_configs_origin(random_candidates, "Random Search")
            candidates.extend(random_candidates)
        if sort_by_EI:
            try:
                X = [candidate.get_array() for candidate in candidates]
                X_trans = self.config_transformer.transform(X)

                EI = self.predict(X_trans)
                indexes = np.argsort(-EI)
                candidates = [candidates[ix] for ix in indexes]
            except Exception as e:
                self.logger.error(f"sort_by_EI failed: {e}")
        return candidates