diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 911c6243154..d501424cb13 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -76,7 +76,7 @@ def __init__(self, # if True (default), use the already initialized values for the first iteration. self._start_with_defaults: bool = bool( strtobool(str(self._config.pop('start_with_defaults', True)))) - self._max_iter = int(self._config.pop('max_suggestions', 100)) + self._max_iter = int(self._config.pop('max_suggestions', 5000)) opt_targets: Dict[str, str] = self._config.pop('optimization_targets', {'score': 'min'}) self._opt_targets: Dict[str, Literal[1, -1]] = {} @@ -242,16 +242,24 @@ def bulk_register(self, is_not_empty : bool True if there is data to register, false otherwise. """ - _LOG.info("Update the optimizer with: %d configs, %d scores, %d status values", - len(configs or []), len(scores or []), len(status or [])) + _LOG.info( + "Update the optimizer with: %d configs, %d scores, %d status values", + len(configs or []), len(scores or []), len(status or []) + ) + if len(configs or []) != len(scores or []): raise ValueError("Numbers of configs and scores do not match.") if status is not None and len(configs or []) != len(status or []): raise ValueError("Numbers of configs and status values do not match.") + has_data = bool(configs and scores) - if has_data and self._start_with_defaults: + if len(configs or []) == 1: + _LOG.info("Only one configuration provided, using defaults.") + self._start_with_defaults = True + elif has_data and self._start_with_defaults: _LOG.info("Prior data exists - do *NOT* use the default initialization.") self._start_with_defaults = False + return has_data def suggest(self) -> TunableGroups: diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index e0235f76b9d..fb625285cec 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -92,9 +92,9 @@ def name(self) -> str: return f"{self.__class__.__name__}:{self._opt.__class__.__name__}" def bulk_register(self, - configs: Sequence[dict], - scores: Sequence[Optional[Dict[str, TunableValue]]], - status: Optional[Sequence[Status]] = None) -> bool: + configs: Sequence[dict], + scores: Sequence[Optional[Dict[str, TunableValue]]], + status: Optional[Sequence[Status]] = None) -> bool: if not super().bulk_register(configs, scores, status): return False @@ -104,6 +104,9 @@ def bulk_register(self, df_scores = self._adjust_signs_df( pd.DataFrame([{} if score is None else score for score in scores])) + # Convert all score columns to numeric, coercing errors to NaN + df_scores = df_scores.apply(pd.to_numeric, errors='coerce') + opt_targets = list(self._opt_targets) if status is not None: # Select only the completed trials, set scores for failed trials to +inf. @@ -125,6 +128,7 @@ def bulk_register(self, return True + def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame: """ In-place adjust the signs of the scores for MINIMIZATION problem. @@ -202,7 +206,18 @@ def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], (df_config, df_score, _df_context) = self._opt.get_best_observations() if len(df_config) == 0: return (None, None) + params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict()) scores = self._adjust_signs_df(df_score).iloc[0].to_dict() + + # Check for NaN values in all optimization targets and replace with 0 + for target in self._opt_targets: + if target in scores: + if pd.isna(scores[target]): + _LOG.warning(f"'{target}' is NaN in the best observation. Setting it to 0.") + scores[target] = 0 + else: + _LOG.warning(f"'{target}' not found in the scores.") + _LOG.debug("Best observation: %s score: %s", params, scores) return (scores, self._tunables.copy().assign(params)) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index c59adc1c672..9fc5554a26e 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -47,7 +47,7 @@ class DbSchema: # pylint: disable=too-many-instance-attributes # Common string column sizes. - _ID_LEN = 512 + _ID_LEN = 256 _PARAM_VALUE_LEN = 1024 _METRIC_VALUE_LEN = 255 _STATUS_LEN = 16 diff --git a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/sync_sched-full.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/sync_sched-full.jsonc index c72e8f4d156..fbee48b70e6 100644 --- a/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/sync_sched-full.jsonc +++ b/mlos_bench/mlos_bench/tests/config/schemas/schedulers/test-cases/good/full/sync_sched-full.jsonc @@ -7,6 +7,6 @@ "experiment_id": "MyExperimentName", "config_id": 1, "trial_id": 1, - "max_trials": 100 + "max_trials": 200 } } diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index aa948b8125a..79bd63d1137 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -35,7 +35,7 @@ def __init__(self, *, # pylint: disable=too-many-locals,too-many-arguments seed: Optional[int] = 0, run_name: Optional[str] = None, output_directory: Optional[str] = None, - max_trials: int = 100, + max_trials: int = 5000, n_random_init: Optional[int] = None, max_ratio: Optional[float] = None, use_default_config: bool = False,