From 7dce3d10faace6bec601c7d34d7b5e24d161aa76 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 23 Jul 2024 16:58:19 -0500 Subject: [PATCH] Fixups and testing for cli config file parsing (#722) Further fixups to #717 Some parameters were not being respected from `--config test-cli-config.jsonc` files. Split out from #720 --- .../config/schedulers/sync_scheduler.jsonc | 2 +- mlos_bench/mlos_bench/launcher.py | 114 ++++++++++----- .../mlos_bench/optimizers/base_optimizer.py | 11 +- .../mlos_bench/schedulers/base_scheduler.py | 32 +++++ .../tests/config/cli/test-cli-config.jsonc | 2 +- .../tests/launcher_parse_args_test.py | 134 +++++++++++++----- 6 files changed, 215 insertions(+), 80 deletions(-) diff --git a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc index daf95d56fae..3dc8ff167ea 100644 --- a/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc +++ b/mlos_bench/mlos_bench/config/schedulers/sync_scheduler.jsonc @@ -6,7 +6,7 @@ "config": { "trial_config_repeat_count": 3, - "max_trials": -1, // Limited only in hte Optimizer logic/config. + "max_trials": -1, // Limited only in the Optimizer logic/config. "teardown": false } } diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 23421f195b2..9024b15adfd 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -44,6 +44,7 @@ class Launcher: def __init__(self, description: str, long_text: str = "", argv: Optional[List[str]] = None): # pylint: disable=too-many-statements + # pylint: disable=too-many-locals _LOG.info("Launch: %s", description) epilog = """ Additional --key=value pairs can be specified to augment or override @@ -56,7 +57,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st """ parser = argparse.ArgumentParser(description=f"{description} : {long_text}", epilog=epilog) - (args, args_rest) = self._parse_args(parser, argv) + (args, path_args, args_rest) = self._parse_args(parser, argv) # Bootstrap config loader: command line takes priority. config_path = args.config_path or [] @@ -87,11 +88,25 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self._parent_service: Service = LocalExecService(parent=self._config_loader) + # Prepare global_config from a combination of global config files, cli + # configs, and cli args. + args_dict = vars(args) + # teardown (bool) conflicts with Environment configs that use it for shell + # commands (list), so we exclude it from copying over + excluded_cli_args = path_args + ["teardown"] + # Include (almost) any item from the cli config file that either isn't in + # the cli args at all or whose cli arg is missing. + cli_config_args = { + key: val + for (key, val) in config.items() + if (args_dict.get(key) is None) and key not in excluded_cli_args + } + self.global_config = self._load_config( - config.get("globals", []) + (args.globals or []), - (args.config_path or []) + config.get("config_path", []), - args_rest, - {key: val for (key, val) in config.items() if key not in vars(args)}, + args_globals=config.get("globals", []) + (args.globals or []), + config_path=(args.config_path or []) + config.get("config_path", []), + args_rest=args_rest, + global_config=cli_config_args, ) # experiment_id is generally taken from --globals files, but we also allow # overriding it on the CLI. @@ -168,19 +183,35 @@ def service(self) -> Service: def _parse_args( parser: argparse.ArgumentParser, argv: Optional[List[str]], - ) -> Tuple[argparse.Namespace, List[str]]: + ) -> Tuple[argparse.Namespace, List[str], List[str]]: """Parse the command line arguments.""" - parser.add_argument( + + class PathArgsTracker: + """Simple class to help track which arguments are paths.""" + + def __init__(self, parser: argparse.ArgumentParser): + self._parser = parser + self.path_args: List[str] = [] + + def add_argument(self, *args: Any, **kwargs: Any) -> None: + """Add an argument to the parser and track its destination.""" + self.path_args.append(self._parser.add_argument(*args, **kwargs).dest) + + path_args_tracker = PathArgsTracker(parser) + + path_args_tracker.add_argument( "--config", required=False, - help="Main JSON5 configuration file. Its keys are the same as the" - + " command line options and can be overridden by the latter.\n" - + "\n" - + " See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ " - + " for additional config examples for this and other arguments.", + help=( + "Main JSON5 configuration file. Its keys are the same as the " + "command line options and can be overridden by the latter.\n" + "\n" + "See the `mlos_bench/config/` tree at https://github.com/microsoft/MLOS/ " + "for additional config examples for this and other arguments." + ), ) - parser.add_argument( + path_args_tracker.add_argument( "--log_file", "--log-file", required=False, @@ -192,11 +223,13 @@ def _parse_args( "--log-level", required=False, type=str, - help=f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}." - + " Set to DEBUG for debug, WARNING for warnings only.", + help=( + f"Logging level. Default is {logging.getLevelName(_LOG_LEVEL)}. " + "Set to DEBUG for debug, WARNING for warnings only." + ), ) - parser.add_argument( + path_args_tracker.add_argument( "--config_path", "--config-path", "--config-paths", @@ -207,7 +240,7 @@ def _parse_args( help="One or more locations of JSON config files.", ) - parser.add_argument( + path_args_tracker.add_argument( "--service", "--services", nargs="+", @@ -219,17 +252,19 @@ def _parse_args( ), ) - parser.add_argument( + path_args_tracker.add_argument( "--environment", required=False, help="Path to JSON file with the configuration of the benchmarking environment(s).", ) - parser.add_argument( + path_args_tracker.add_argument( "--optimizer", required=False, - help="Path to the optimizer configuration file. If omitted, run" - + " a single trial with default (or specified in --tunable_values).", + help=( + "Path to the optimizer configuration file. If omitted, run " + "a single trial with default (or specified in --tunable_values)." + ), ) parser.add_argument( @@ -243,18 +278,22 @@ def _parse_args( ), ) - parser.add_argument( + path_args_tracker.add_argument( "--scheduler", required=False, - help="Path to the scheduler configuration file. By default, use" - + " a single worker synchronous scheduler.", + help=( + "Path to the scheduler configuration file. By default, use " + "a single worker synchronous scheduler." + ), ) - parser.add_argument( + path_args_tracker.add_argument( "--storage", required=False, - help="Path to the storage configuration file." - + " If omitted, use the ephemeral in-memory SQL storage.", + help=( + "Path to the storage configuration file. " + "If omitted, use the ephemeral in-memory SQL storage." + ), ) parser.add_argument( @@ -275,24 +314,28 @@ def _parse_args( help="Seed to use with --random_init", ) - parser.add_argument( + path_args_tracker.add_argument( "--tunable_values", "--tunable-values", nargs="+", action="extend", required=False, - help="Path to one or more JSON files that contain values of the tunable" - + " parameters. This can be used for a single trial (when no --optimizer" - + " is specified) or as default values for the first run in optimization.", + help=( + "Path to one or more JSON files that contain values of the tunable " + "parameters. This can be used for a single trial (when no --optimizer " + "is specified) or as default values for the first run in optimization." + ), ) - parser.add_argument( + path_args_tracker.add_argument( "--globals", nargs="+", action="extend", required=False, - help="Path to one or more JSON files that contain additional" - + " [private] parameters of the benchmarking environment.", + help=( + "Path to one or more JSON files that contain additional " + "[private] parameters of the benchmarking environment." + ), ) parser.add_argument( @@ -328,7 +371,7 @@ def _parse_args( argv = sys.argv[1:].copy() (args, args_rest) = parser.parse_known_args(argv) - return (args, args_rest) + return (args, path_args_tracker.path_args, args_rest) @staticmethod def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: @@ -361,6 +404,7 @@ def _try_parse_extra_args(cmdline: Iterable[str]) -> Dict[str, TunableValue]: def _load_config( self, + *, args_globals: Iterable[str], config_path: Iterable[str], args_rest: Iterable[str], diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 6fa7ad87f47..42eeb8e3020 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -135,20 +135,23 @@ def __exit__( @property def current_iteration(self) -> int: """ - The current number of iterations (trials) registered. + The current number of iterations (suggestions) registered. Note: this may or may not be the same as the number of configurations. - See Also: Launcher.trial_config_repeat_count. + See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. """ return self._iter + # TODO: finish renaming iterations to suggestions. + # See Also: https://github.com/microsoft/MLOS/pull/713 + @property def max_iterations(self) -> int: """ - The maximum number of iterations (trials) to run. + The maximum number of iterations (suggestions) to run. Note: this may or may not be the same as the number of configurations. - See Also: Launcher.trial_config_repeat_count. + See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. """ return self._max_iter diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index 30b016be74b..30805c189e0 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -14,6 +14,7 @@ from pytz import UTC from typing_extensions import Literal +from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.storage.base_storage import Storage @@ -64,6 +65,7 @@ def __init__( # pylint: disable=too-many-arguments source=global_config, required_keys=["experiment_id", "trial_id"], ) + self._validate_json_config(config) self._experiment_id = config["experiment_id"].strip() self._trial_id = int(config["trial_id"]) @@ -88,6 +90,36 @@ def __init__( # pylint: disable=too-many-arguments _LOG.debug("Scheduler instantiated: %s :: %s", self, config) + def _validate_json_config(self, config: dict) -> None: + """Reconstructs a basic json config that this class might have been instantiated + from in order to validate configs provided outside the file loading + mechanism. + """ + json_config: dict = { + "class": self.__class__.__module__ + "." + self.__class__.__name__, + } + if config: + json_config["config"] = config.copy() + # The json schema does not allow for -1 as a valid value for config_id. + # As it is just a default placeholder value, and not required, we can + # remove it from the config copy prior to validation safely. + config_id = json_config["config"].get("config_id") + if config_id is not None and isinstance(config_id, int) and config_id < 0: + json_config["config"].pop("config_id") + ConfigSchema.SCHEDULER.validate(json_config) + + @property + def trial_config_repeat_count(self) -> int: + """Gets the number of trials to run for a given config.""" + return self._trial_config_repeat_count + + @property + def max_trials(self) -> int: + """Gets the maximum number of trials to run for a given experiment, or -1 for no + limit. + """ + return self._max_trials + def __repr__(self) -> str: """ Produce a human-readable version of the Scheduler (mostly for logging). diff --git a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc index 9ffaa51180e..436507ce840 100644 --- a/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc +++ b/mlos_bench/mlos_bench/tests/config/cli/test-cli-config.jsonc @@ -17,7 +17,7 @@ "services/remote/mock/mock_fileshare_service.jsonc" ], - "trial_config_repeat_count": 1, + "trial_config_repeat_count": 2, "random_seed": 42, "random_init": true diff --git a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py index f577f215262..2b9c31c014b 100644 --- a/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py +++ b/mlos_bench/mlos_bench/tests/launcher_parse_args_test.py @@ -53,13 +53,11 @@ def config_paths() -> List[str]: ] -def test_launcher_args_parse_1(config_paths: List[str]) -> None: - """ - Test that using multiple --globals arguments works and that multiple space separated - options to --config-paths works. +# This is part of the minimal required args by the Launcher. +ENV_CONF_PATH = "environments/mock/mock_env.jsonc" - Check $var expansion and Environment loading. - """ + +def _get_launcher(desc: str, cli_args: str) -> Launcher: # The VSCode pytest wrapper actually starts in a different directory before # changing into the code directory, but doesn't update the PWD environment # variable so we use a separate variable. @@ -68,26 +66,73 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: if sys.platform == "win32": # Some env tweaks for platform compatibility. environ["USER"] = environ["USERNAME"] + launcher = Launcher(description=desc, argv=cli_args.split()) + # Check the basic parent service + assert isinstance(launcher.service, SupportsConfigLoading) # built-in + assert isinstance(launcher.service, SupportsLocalExec) # built-in + return launcher - # This is part of the minimal required args by the Launcher. - env_conf_path = "environments/mock/mock_env.jsonc" + +def test_launcher_args_parse_defaults(config_paths: List[str]) -> None: + """Test that we get the defaults we expect when using minimal config arg + examples. + """ + cli_args = ( + "--config-paths " + + " ".join(config_paths) + + f" --environment {ENV_CONF_PATH}" + + " --globals globals/global_test_config.jsonc" + ) + launcher = _get_launcher(__name__, cli_args) + # Check that the first --globals file is loaded and $var expansion is handled. + assert launcher.global_config["experiment_id"] == "MockExperiment" + assert launcher.global_config["testVmName"] == "MockExperiment-vm" + # Check that secondary expansion also works. + assert launcher.global_config["testVnetName"] == "MockExperiment-vm-vnet" + # Check that we can expand a $var in a config file that references an environment variable. + assert path_join(launcher.global_config["pathVarWithEnvVarRef"], abs_path=True) == path_join( + os.getcwd(), "foo", abs_path=True + ) + assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" + assert launcher.teardown # defaults + # Check that the environment that got loaded looks to be of the right type. + env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) + assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" + assert check_class_name(launcher.environment, env_config["class"]) + # Check that the optimizer looks right. + assert isinstance(launcher.optimizer, OneShotOptimizer) + # Check that the optimizer got initialized with defaults. + assert launcher.optimizer.tunable_params.is_defaults() + assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer + # Check that we pick up the right scheduler config: + assert isinstance(launcher.scheduler, SyncScheduler) + assert launcher.scheduler.trial_config_repeat_count == 1 # default + assert launcher.scheduler.max_trials == -1 # default + + +def test_launcher_args_parse_1(config_paths: List[str]) -> None: + """ + Test that using multiple --globals arguments works and that multiple space separated + options to --config-paths works. + + Check $var expansion and Environment loading. + """ + # Here we have multiple paths following --config-paths and --service. cli_args = ( "--config-paths " + " ".join(config_paths) + " --service services/remote/mock/mock_auth_service.jsonc" - + " --service services/remote/mock/mock_remote_exec_service.jsonc" + + " services/remote/mock/mock_remote_exec_service.jsonc" + " --scheduler schedulers/sync_scheduler.jsonc" - + f" --environment {env_conf_path}" + + f" --environment {ENV_CONF_PATH}" + " --globals globals/global_test_config.jsonc" + " --globals globals/global_test_extra_config.jsonc" " --test_global_value_2 from-args" ) - launcher = Launcher(description=__name__, argv=cli_args.split()) - # Check that the parent service - assert isinstance(launcher.service, SupportsAuth) - assert isinstance(launcher.service, SupportsConfigLoading) - assert isinstance(launcher.service, SupportsLocalExec) - assert isinstance(launcher.service, SupportsRemoteExec) + launcher = _get_launcher(__name__, cli_args) + # Check some additional features of the the parent service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the first --globals file is loaded and $var expansion is handled. assert launcher.global_config["experiment_id"] == "MockExperiment" assert launcher.global_config["testVmName"] == "MockExperiment-vm" @@ -104,8 +149,8 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert launcher.global_config["varWithEnvVarRef"] == f"user:{getuser()}" assert launcher.teardown # Check that the environment that got loaded looks to be of the right type. - env_config = launcher.config_loader.load_config(env_conf_path, ConfigSchema.ENVIRONMENT) - assert check_class_name(launcher.environment, env_config["class"]) + env_config = launcher.config_loader.load_config(ENV_CONF_PATH, ConfigSchema.ENVIRONMENT) + assert env_config["class"] == "mlos_bench.environments.mock_env.MockEnv" # Check that the optimizer looks right. assert isinstance(launcher.optimizer, OneShotOptimizer) # Check that the optimizer got initialized with defaults. @@ -113,25 +158,19 @@ def test_launcher_args_parse_1(config_paths: List[str]) -> None: assert launcher.optimizer.max_iterations == 1 # value for OneShotOptimizer # Check that we pick up the right scheduler config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 3 # pylint: disable=protected-access - assert launcher.scheduler._max_trials == -1 # pylint: disable=protected-access + assert ( + launcher.scheduler.trial_config_repeat_count == 3 + ) # from the custom sync_scheduler.jsonc config + assert launcher.scheduler.max_trials == -1 def test_launcher_args_parse_2(config_paths: List[str]) -> None: """Test multiple --config-path instances, --config file vs --arg, --var=val overrides, $var templates, option args, --random-init, etc. """ - # The VSCode pytest wrapper actually starts in a different directory before - # changing into the code directory, but doesn't update the PWD environment - # variable so we use a separate variable. - # See global_test_config.jsonc for more details. - environ["CUSTOM_PATH_FROM_ENV"] = os.getcwd() - if sys.platform == "win32": - # Some env tweaks for platform compatibility. - environ["USER"] = environ["USERNAME"] - config_file = "cli/test-cli-config.jsonc" globals_file = "globals/global_test_config.jsonc" + # Here we have multiple --config-path and --service args, each with their own path. cli_args = ( " ".join([f"--config-path {config_path}" for config_path in config_paths]) + f" --config {config_file}" @@ -145,13 +184,11 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: + " --trial-config-repeat-count 5" + " --max_trials 200" ) - launcher = Launcher(description=__name__, argv=cli_args.split()) - # Check that the parent service - assert isinstance(launcher.service, SupportsAuth) - assert isinstance(launcher.service, SupportsConfigLoading) - assert isinstance(launcher.service, SupportsFileShareOps) - assert isinstance(launcher.service, SupportsLocalExec) - assert isinstance(launcher.service, SupportsRemoteExec) + launcher = _get_launcher(__name__, cli_args) + # Check some additional features of the the parent service + assert isinstance(launcher.service, SupportsAuth) # from --service + assert isinstance(launcher.service, SupportsFileShareOps) # from --config + assert isinstance(launcher.service, SupportsRemoteExec) # from --service # Check that the --globals file is loaded and $var expansion is handled # using the value provided on the CLI. assert launcher.global_config["experiment_id"] == "MockeryExperiment" @@ -202,8 +239,8 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # Check that CLI parameter overrides JSON config: assert isinstance(launcher.scheduler, SyncScheduler) - assert launcher.scheduler._trial_config_repeat_count == 5 # pylint: disable=protected-access - assert launcher.scheduler._max_trials == 200 # pylint: disable=protected-access + assert launcher.scheduler.trial_config_repeat_count == 5 # from cli args + assert launcher.scheduler.max_trials == 200 # Check that the value from the file is overridden by the CLI arg. assert config["random_seed"] == 42 @@ -213,5 +250,24 @@ def test_launcher_args_parse_2(config_paths: List[str]) -> None: # assert launcher.optimizer.seed == 1234 +def test_launcher_args_parse_3(config_paths: List[str]) -> None: + """Check that cli file values take precedence over other values.""" + config_file = "cli/test-cli-config.jsonc" + globals_file = "globals/global_test_config.jsonc" + # Here we don't override values in test-cli-config with cli args but ensure that + # those take precedence over other config files. + cli_args = ( + " ".join([f"--config-path {config_path}" for config_path in config_paths]) + + f" --config {config_file}" + + f" --globals {globals_file}" + ) + launcher = _get_launcher(__name__, cli_args) + + # Check that CLI file parameter overrides JSON config: + assert isinstance(launcher.scheduler, SyncScheduler) + # from test-cli-config.jsonc (should override scheduler config file) + assert launcher.scheduler.trial_config_repeat_count == 2 + + if __name__ == "__main__": - pytest.main([__file__, "-n1"]) + pytest.main([__file__, "-n0"])