ultraopt.utils.config_transformer 源代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author  : qichun tang
# @Date    : 2020-12-14
# @Contact    : qichun.tang@bupt.edu.cn
from copy import copy
from typing import Optional, Union

import numpy as np
import pandas as pd
from ConfigSpace import ConfigurationSpace, Constant, CategoricalHyperparameter, Configuration, OrdinalHyperparameter
from ConfigSpace.util import deactivate_inactive_hyperparameters
from sklearn.preprocessing import LabelEncoder
from tabular_nn.base_tnn import get_embed_dims

from ultraopt.utils.config_space import deactivate


[文档]class ConfigTransformer(): def __init__(self, impute: Optional[float] = -1, encoder=None): self.impute = impute self.encoder = encoder
[文档] def fit(self, config_space: ConfigurationSpace): mask = [] n_choices_list = [] is_ordinal_list = [] sequence_mapper = {} n_constants = 0 n_variables = 0 n_variables_embedded = 0 n_top_levels = 0 parents = [] parent_values = [] is_child = [] # todo: 划分parents与groups for hp in config_space.get_hyperparameters(): if isinstance(hp, Constant) or \ (isinstance(hp, CategoricalHyperparameter) and len(hp.choices) == 1) or \ (isinstance(hp, OrdinalHyperparameter) and len(hp.sequence) == 1): # ignore mask.append(False) n_constants += 1 else: mask.append(True) n_variables += 1 if isinstance(hp, CategoricalHyperparameter): n_choices = len(hp.choices) n_choices_list.append(n_choices) n_variables_embedded += int(get_embed_dims(n_choices)) # avoid bug else: n_choices_list.append(0) n_variables_embedded += 1 if isinstance(hp, OrdinalHyperparameter): is_ordinal_list.append(True) sequence_mapper[len(is_ordinal_list) - 1] = hp.sequence else: is_ordinal_list.append(False) cur_parents = config_space.get_parents_of(hp.name) if len(cur_parents) == 0: n_top_levels += 1 parents.append(None) parent_values.append(None) is_child.append(False) else: is_child.append(True) parents.append(cur_parents[0]) parent_conditions = config_space.get_parent_conditions_of(hp.name) parent_condition = parent_conditions[0] parent_values.append(parent_condition.value) groups_str = [f"{parent}-{parent_value}" for parent, parent_value in zip(parents, parent_values)] group_encoder = LabelEncoder() groups = group_encoder.fit_transform(groups_str) self.is_child = is_child self.sequence_mapper = sequence_mapper self.is_ordinal_list = is_ordinal_list self.config_space = config_space self.groups_str = groups_str self.group_encoder = group_encoder self.groups = groups self.n_groups = np.max(groups) + 1 self.mask = np.array(mask, dtype="bool") self.n_choices_list = n_choices_list self.n_constants = n_constants self.n_variables = n_variables self.n_variables_embedded = n_variables_embedded self.n_top_levels = n_top_levels self.hp_names = pd.Series([hp.name for hp in config_space.get_hyperparameters()])[self.mask] high_r_mask = np.array(self.n_choices_list) > 2 self.high_r_cols = self.hp_names[high_r_mask].to_list() self.high_r_cats = [] for ix in np.arange(n_variables)[high_r_mask]: n_choices = n_choices_list[ix] cat = list(range(n_choices)) if is_child[ix]: cat.insert(0, -1) self.high_r_cats.append(cat) if self.encoder is not None: self.encoder.cols = copy(self.high_r_cols) self.encoder.categories = copy(self.high_r_cats) return self
[文档] def fit_encoder(self, vectors, losses=None): vectors = vectors[:, self.mask] df = pd.DataFrame(vectors, columns=self.hp_names) if self.encoder is not None: self.encoder.fit(df, losses)
[文档] def transform(self, vectors: np.ndarray) -> np.ndarray: vectors = np.array(vectors) vectors = vectors[:, self.mask] if self.encoder is not None: df = pd.DataFrame(vectors, columns=self.hp_names) vectors = self.encoder.transform(df) if not isinstance(vectors, np.ndarray): vectors = np.array(vectors) if self.impute is not None: if self.impute == "random_choice": vectors = self.impute_conditional_data(vectors) else: # is numeric vectors[np.isnan(vectors)] = float(self.impute) return vectors
[文档] def inverse_transform(self, array: np.ndarray, return_vector=False) -> Union[np.ndarray, None, Configuration]: if self.encoder is not None: array = self.encoder.inverse_transform(array) array = np.array(array) for i, n_choices in enumerate(self.n_choices_list): if n_choices == 2: array[:, i] = (array[:, i] > 0.5).astype("float64") is_ordinal = self.is_ordinal_list[i] if is_ordinal: sequence = self.sequence_mapper[i] array[:, i] = np.clip(np.round(array[:, i]), 0, len(sequence) - 1) N, M = array.shape result = np.zeros([N, len(self.mask)]) result[:, self.mask] = array if return_vector: return result configs = [] for i in range(N): try: config = deactivate(self.config_space, result[i, :]) config = deactivate_inactive_hyperparameters( configuration_space=self.config_space, configuration=config ) configs.append(config) except Exception as e: pass return configs
[文档] def impute_conditional_data(self, array): # copy from HpBandSter return_array = np.empty_like(array) for i in range(array.shape[0]): datum = np.copy(array[i]) nan_indices = np.argwhere(np.isnan(datum)).flatten() while (np.any(nan_indices)): nan_idx = nan_indices[0] valid_indices = np.argwhere(np.isfinite(array[:, nan_idx])).flatten() if len(valid_indices) > 0: # pick one of them at random and overwrite all NaN values row_idx = np.random.choice(valid_indices) datum[nan_indices] = array[row_idx, nan_indices] else: # no good point in the data has this value activated, so fill it with a valid but random value t = self.n_choices_list[nan_idx] if t == 0: datum[nan_idx] = np.random.rand() else: datum[nan_idx] = np.random.randint(t) nan_indices = np.argwhere(np.isnan(datum)).flatten() return_array[i, :] = datum return (return_array)