microsoft · yshady · Jul 5, 2024 · Jul 8, 2024 · Jul 16, 2024 · bpkroth
@@ -76,7 +76,7 @@ def __init__(self,
         # if True (default), use the already initialized values for the first iteration.
         self._start_with_defaults: bool = bool(
             strtobool(str(self._config.pop('start_with_defaults', True))))
-        self._max_iter = int(self._config.pop('max_suggestions', 100))
+        self._max_iter = int(self._config.pop('max_suggestions', 5000))
 
         opt_targets: Dict[str, str] = self._config.pop('optimization_targets', {'score': 'min'})
         self._opt_targets: Dict[str, Literal[1, -1]] = {}
@@ -242,16 +242,24 @@ def bulk_register(self,
         is_not_empty : bool
             True if there is data to register, false otherwise.
         """
-        _LOG.info("Update the optimizer with: %d configs, %d scores, %d status values",
-                  len(configs or []), len(scores or []), len(status or []))
+        _LOG.info(
+            "Update the optimizer with: %d configs, %d scores, %d status values",
+            len(configs or []), len(scores or []), len(status or [])
+        )
+
         if len(configs or []) != len(scores or []):
             raise ValueError("Numbers of configs and scores do not match.")
         if status is not None and len(configs or []) != len(status or []):
             raise ValueError("Numbers of configs and status values do not match.")
+
         has_data = bool(configs and scores)
-        if has_data and self._start_with_defaults:
+        if len(configs or []) == 1:
+            _LOG.info("Only one configuration provided, using defaults.")
+            self._start_with_defaults = True
+        elif has_data and self._start_with_defaults:
             _LOG.info("Prior data exists - do *NOT* use the default initialization.")
             self._start_with_defaults = False
+
         return has_data
 
     def suggest(self) -> TunableGroups:

@@ -92,9 +92,9 @@ def name(self) -> str:
         return f"{self.__class__.__name__}:{self._opt.__class__.__name__}"
 
     def bulk_register(self,
-                      configs: Sequence[dict],
-                      scores: Sequence[Optional[Dict[str, TunableValue]]],
-                      status: Optional[Sequence[Status]] = None) -> bool:
+                  configs: Sequence[dict],
+                  scores: Sequence[Optional[Dict[str, TunableValue]]],
+                  status: Optional[Sequence[Status]] = None) -> bool:
 
         if not super().bulk_register(configs, scores, status):
             return False
@@ -104,6 +104,9 @@ def bulk_register(self,
         df_scores = self._adjust_signs_df(
             pd.DataFrame([{} if score is None else score for score in scores]))
 
+        # Convert all score columns to numeric, coercing errors to NaN
+        df_scores = df_scores.apply(pd.to_numeric, errors='coerce')
+
         opt_targets = list(self._opt_targets)
         if status is not None:
             # Select only the completed trials, set scores for failed trials to +inf.
@@ -125,6 +128,7 @@ def bulk_register(self,
 
         return True
 
+
     def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:
         """
         In-place adjust the signs of the scores for MINIMIZATION problem.
@@ -202,7 +206,18 @@ def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups],
         (df_config, df_score, _df_context) = self._opt.get_best_observations()
         if len(df_config) == 0:
             return (None, None)
+
         params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())
         scores = self._adjust_signs_df(df_score).iloc[0].to_dict()
+
+        # Check for NaN values in all optimization targets and replace with 0
+        for target in self._opt_targets:
+            if target in scores:
+                if pd.isna(scores[target]):
+                    _LOG.warning(f"'{target}' is NaN in the best observation. Setting it to 0.")
+                    scores[target] = 0
+            else:
+                _LOG.warning(f"'{target}' not found in the scores.")
+
         _LOG.debug("Best observation: %s score: %s", params, scores)
         return (scores, self._tunables.copy().assign(params))
@@ -47,7 +47,7 @@ class DbSchema:
     # pylint: disable=too-many-instance-attributes
 
     # Common string column sizes.
-    _ID_LEN = 512
+    _ID_LEN = 256
     _PARAM_VALUE_LEN = 1024
     _METRIC_VALUE_LEN = 255
     _STATUS_LEN = 16

@@ -7,6 +7,6 @@
         "experiment_id": "MyExperimentName",
         "config_id": 1,
         "trial_id": 1,
-        "max_trials": 100
+        "max_trials": 200
     }
 }
@@ -35,7 +35,7 @@ def __init__(self, *,  # pylint: disable=too-many-locals,too-many-arguments
                  seed: Optional[int] = 0,
                  run_name: Optional[str] = None,
                  output_directory: Optional[str] = None,
-                 max_trials: int = 100,
+                 max_trials: int = 5000,
                  n_random_init: Optional[int] = None,
                  max_ratio: Optional[float] = None,
                  use_default_config: bool = False,