-
Notifications
You must be signed in to change notification settings - Fork 72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
My fixes #815
My fixes #815
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,7 +76,7 @@ def __init__(self, | |
# if True (default), use the already initialized values for the first iteration. | ||
self._start_with_defaults: bool = bool( | ||
strtobool(str(self._config.pop('start_with_defaults', True)))) | ||
self._max_iter = int(self._config.pop('max_suggestions', 100)) | ||
self._max_iter = int(self._config.pop('max_suggestions', 5000)) | ||
|
||
opt_targets: Dict[str, str] = self._config.pop('optimization_targets', {'score': 'min'}) | ||
self._opt_targets: Dict[str, Literal[1, -1]] = {} | ||
|
@@ -242,16 +242,24 @@ def bulk_register(self, | |
is_not_empty : bool | ||
True if there is data to register, false otherwise. | ||
""" | ||
_LOG.info("Update the optimizer with: %d configs, %d scores, %d status values", | ||
len(configs or []), len(scores or []), len(status or [])) | ||
_LOG.info( | ||
"Update the optimizer with: %d configs, %d scores, %d status values", | ||
len(configs or []), len(scores or []), len(status or []) | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the change here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. none at all :) just format really There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @yshady the general rule for submitting PRs (open-source or not) is to separate the functionality changes from cosmetic fixes and make sure each PR is as small as possible (but not smaller) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah sorry this is kinda a first for me :) like a kid learning to walk for first time haha There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let me try to clean this up tomorrow after my demo and explain some of the stuff. I also still need to figure out how to use GitHub correctly |
||
|
||
if len(configs or []) != len(scores or []): | ||
raise ValueError("Numbers of configs and scores do not match.") | ||
if status is not None and len(configs or []) != len(status or []): | ||
raise ValueError("Numbers of configs and status values do not match.") | ||
|
||
has_data = bool(configs and scores) | ||
if has_data and self._start_with_defaults: | ||
if len(configs or []) == 1: | ||
_LOG.info("Only one configuration provided, using defaults.") | ||
self._start_with_defaults = True | ||
elif has_data and self._start_with_defaults: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like the meat of the change. Can you please update the PR description to explain this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there was no config it would freak out and say hey you have no trials in the db I cannot suggest a config based on nothing |
||
_LOG.info("Prior data exists - do *NOT* use the default initialization.") | ||
self._start_with_defaults = False | ||
|
||
return has_data | ||
|
||
def suggest(self) -> TunableGroups: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,9 +92,9 @@ def name(self) -> str: | |
return f"{self.__class__.__name__}:{self._opt.__class__.__name__}" | ||
|
||
def bulk_register(self, | ||
configs: Sequence[dict], | ||
scores: Sequence[Optional[Dict[str, TunableValue]]], | ||
status: Optional[Sequence[Status]] = None) -> bool: | ||
configs: Sequence[dict], | ||
scores: Sequence[Optional[Dict[str, TunableValue]]], | ||
status: Optional[Sequence[Status]] = None) -> bool: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please undo whitespace only changes. They make it harder to understand what's relevant. Thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah sorry I only use GitHub for my personal code, never work in teams using it so first time experience really. My school has decided that math is more important than GitHub experience |
||
|
||
if not super().bulk_register(configs, scores, status): | ||
return False | ||
|
@@ -104,6 +104,9 @@ def bulk_register(self, | |
df_scores = self._adjust_signs_df( | ||
pd.DataFrame([{} if score is None else score for score in scores])) | ||
|
||
# Convert all score columns to numeric, coercing errors to NaN | ||
df_scores = df_scores.apply(pd.to_numeric, errors='coerce') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this fixed already in #789 ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be but I haven't tested There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @bpkroth yes, this is more heavy-handed version of what we already have in master |
||
|
||
opt_targets = list(self._opt_targets) | ||
if status is not None: | ||
# Select only the completed trials, set scores for failed trials to +inf. | ||
|
@@ -125,6 +128,7 @@ def bulk_register(self, | |
|
||
return True | ||
|
||
|
||
def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame: | ||
""" | ||
In-place adjust the signs of the scores for MINIMIZATION problem. | ||
|
@@ -202,7 +206,18 @@ def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], | |
(df_config, df_score, _df_context) = self._opt.get_best_observations() | ||
if len(df_config) == 0: | ||
return (None, None) | ||
|
||
params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict()) | ||
scores = self._adjust_signs_df(df_score).iloc[0].to_dict() | ||
|
||
# Check for NaN values in all optimization targets and replace with 0 | ||
for target in self._opt_targets: | ||
if target in scores: | ||
if pd.isna(scores[target]): | ||
_LOG.warning(f"'{target}' is NaN in the best observation. Setting it to 0.") | ||
scores[target] = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be optional behavior. It might be better to mark the Trial as FAILED instead and ask the user to check their scripts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See also #523 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree setting nan to 0 is not ideal but I desperately wanted long running experiments There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is totally fine to report NaNs here - say, for scores that were not the primary optimization target or at all absent in a particular trial (e.g., because of the experiments' merge). Not to mention that 0 may not be the right value to impute, depending on your optimization direction. I am afraid that @yshady is bending MLOS to make the UI work smoothly and I think it should be the other way around :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @motus no the UI is separate from MLOS experiments. All it does is run the commands. Yes these fixes were to just make MLOS run for long periods of time and they are shortcuts and not ideal. @eujing can confirm as well, I am bending MLOS to just work :) not just for the UI but for the sake of collecting data There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @motus also optimizer does not like nan values from my experience There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will try to impute the correct value tomorrow |
||
else: | ||
_LOG.warning(f"'{target}' not found in the scores.") | ||
|
||
_LOG.debug("Best observation: %s score: %s", params, scores) | ||
return (scores, self._tunables.copy().assign(params)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,7 +47,7 @@ class DbSchema: | |
# pylint: disable=too-many-instance-attributes | ||
|
||
# Common string column sizes. | ||
_ID_LEN = 512 | ||
_ID_LEN = 256 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's this change about? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. some sql stuff me and Eu Jing both have this, I believe kelly too There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need that. I believe this is from the times @eujing was trying to work around my bug in MLOS when I was saving callables in the DB instead of actual values :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, @DelphianCalamity, @yshady and I have been needing this change whenever we use a fresh database and sqlalchemy is trying to create tables for the first time. @motus I think you did partially fix the original issue we had but, this column size is still too large for MySQL-backends. Summarizing a thread we have in teams with Kelly, the current offending table is CREATE TABLE trial_param (
exp_id VARCHAR(512) NOT NULL,
trial_id INTEGER NOT NULL,
param_id VARCHAR(512) NOT NULL,
param_value VARCHAR(1024),
PRIMARY KEY (exp_id, trial_id, param_id),
FOREIGN KEY (exp_id, trial_id) REFERENCES trial (exp_id, trial_id)
) The primary key becomes an issue when using standard encodings like utfmb3 (1-3 bytes per char) or utfmb4 (more bytes per char). This results in the MySQL error But I agree, these changes should be split up into individual PRs. I think Yaseen just wanted to store all his changes in a branch in-case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for explaining @eujing |
||
_PARAM_VALUE_LEN = 1024 | ||
_METRIC_VALUE_LEN = 255 | ||
_STATUS_LEN = 16 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,6 @@ | |
"experiment_id": "MyExperimentName", | ||
"config_id": 1, | ||
"trial_id": 1, | ||
"max_trials": 100 | ||
"max_trials": 200 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Revert |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,7 @@ def __init__(self, *, # pylint: disable=too-many-locals,too-many-arguments | |
seed: Optional[int] = 0, | ||
run_name: Optional[str] = None, | ||
output_directory: Optional[str] = None, | ||
max_trials: int = 100, | ||
max_trials: int = 5000, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Revert |
||
n_random_init: Optional[int] = None, | ||
max_ratio: Optional[float] = None, | ||
use_default_config: bool = False, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Think we should drop this. It can be changed in configs.