From 082d84143694890000add1a348374c1e70679cdf Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:45:34 +0000 Subject: [PATCH 01/43] add initial alembic configs --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 118 ++++++++++++++++++ .../mlos_bench/storage/sql/alembic/README.md | 43 +++++++ .../mlos_bench/storage/sql/alembic/env.py | 78 ++++++++++++ .../storage/sql/alembic/script.py.mako | 26 ++++ .../versions/d2a708351ba8_add_alembic.py | 30 +++++ ...f83fb8ae7fc4_add_trial_runner_id_column.py | 30 +++++ mlos_bench/pyproject.toml | 4 + mlos_bench/setup.py | 8 +- 8 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic.ini create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/README.md create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/env.py create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini new file mode 100644 index 00000000000..1b6d92435c4 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -0,0 +1,118 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +# Use forward slashes (/) also on windows to provide an os agnostic path +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +timezone = UTC + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +# version_path_separator = newline +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# See README.md for details. +sqlalchemy.url = sqlite:///mlos_bench.sqlite + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md new file mode 100644 index 00000000000..ac20a0a45d7 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -0,0 +1,43 @@ +# Schema Evolution with Alembic + +This document contains some notes on how to use [`alembic`](https://alembic.sqlalchemy.org/en/latest/) for schema evolution in the `mlos_bench` project. + +## Overview + +1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: + + ```sh + cd mlos_bench/storage/sql + rm mlos_bench.sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only + ``` + + > This allows `alembic` to automatically generate a migration script from the current schema. + +2. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + + > Keep each change small and atomic. + > For example, if you want to add a new column, do that in one change. + > If you want to rename a column, do that in another change. + +3. Generate a new migration script with the following command: + + ```sh + alembic revision --autogenerate -m "Descriptive text about the change." + ``` + +4. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. + +5. Verify that the migration script works by running the following command: + + ```sh + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only + ``` + + > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. + +6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. + +7. Merge that to the `main` branch. + +8. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py new file mode 100644 index 00000000000..a6d86043fe0 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -0,0 +1,78 @@ +from logging.config import fileConfig + +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +from mlos_bench.storage.sql.schema import DbSchema +target_metadata = DbSchema(engine=None).meta +#target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako b/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako new file mode 100644 index 00000000000..fbc4b07dcef --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py new file mode 100644 index 00000000000..52b157f484a --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py @@ -0,0 +1,30 @@ +"""Add alembic + +Revision ID: d2a708351ba8 +Revises: +Create Date: 2025-01-03 21:21:13.978672+00:00 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'd2a708351ba8' +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py new file mode 100644 index 00000000000..62a5629dd45 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py @@ -0,0 +1,30 @@ +"""Add trial_runner_id column + +Revision ID: f83fb8ae7fc4 +Revises: d2a708351ba8 +Create Date: 2025-01-03 21:25:48.848196+00:00 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'f83fb8ae7fc4' +down_revision: Union[str, None] = 'd2a708351ba8' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('trial', sa.Column('trial_runner_id', sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('trial', 'trial_runner_id') + # ### end Alembic commands ### diff --git a/mlos_bench/pyproject.toml b/mlos_bench/pyproject.toml index 72bc0263077..bce7f79bbbb 100644 --- a/mlos_bench/pyproject.toml +++ b/mlos_bench/pyproject.toml @@ -61,6 +61,10 @@ exclude = ["*.tests", "*.tests.*"] [tool.setuptools.package-data] mlos_bench = [ "py.typed", "**/*.pyi", + "storage/sql/alembic.ini", + "storage/sql/alembic/env.py", + "storage/sql/alembic/script.py.mako", + "storage/sql/alembic/revisions/*.py", "config/**/*.md", "config/**/*.jsonc", "config/**/*.json", diff --git a/mlos_bench/setup.py b/mlos_bench/setup.py index cb3d975d92d..6fe8b7c8b6a 100644 --- a/mlos_bench/setup.py +++ b/mlos_bench/setup.py @@ -69,11 +69,11 @@ def _get_long_desc_from_readme(base_url: str) -> dict: # Additional tools for extra functionality. "azure": ["azure-storage-file-share", "azure-identity", "azure-keyvault"], "ssh": ["asyncssh<2.15.0"], # FIXME: asyncssh 2.15.0 has a bug that breaks the tests - "storage-sql-duckdb": ["sqlalchemy", "duckdb_engine"], - "storage-sql-mysql": ["sqlalchemy", "mysql-connector-python"], - "storage-sql-postgres": ["sqlalchemy", "psycopg2"], + "storage-sql-duckdb": ["sqlalchemy", "alembic", "duckdb_engine"], + "storage-sql-mysql": ["sqlalchemy", "alembic", "mysql-connector-python"], + "storage-sql-postgres": ["sqlalchemy", , "alembic", "psycopg2"], # sqlite3 comes with python, so we don't need to install it. - "storage-sql-sqlite": ["sqlalchemy"], + "storage-sql-sqlite": ["sqlalchemy", "alembic"], # Transitive extra_requires from mlos-core. "flaml": ["flaml[blendsearch]"], "smac": ["smac"], From 7cb1fa3404b773345c05be62c7c15426db642c16 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:46:51 +0000 Subject: [PATCH 02/43] tweaks necessary for alembic --- mlos_bench/mlos_bench/storage/sql/schema.py | 31 +++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index e6e36ea2d14..8a45963f9a5 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -15,8 +15,10 @@ """ import logging -from typing import Any, List +from typing import Any, List, Optional +import alembic +import alembic.command from sqlalchemy import ( Column, DateTime, @@ -69,11 +71,19 @@ class DbSchema: _METRIC_VALUE_LEN = 255 _STATUS_LEN = 16 - def __init__(self, engine: Engine): - """Declare the SQLAlchemy schema for the database.""" + def __init__(self, engine: Optional[Engine]): + """ + Declare the SQLAlchemy schema for the database. + + Parameters + ---------- + engine : sqlalchemy.engine.Engine + The SQLAlchemy engine to use for the DB schema. + Listed as optional for :external:mod:`alembic` schema migration + purposes, but won't be functional without one. + """ _LOG.info("Create the DB schema for: %s", engine) self._engine = engine - # TODO: bind for automatic schema updates? (#649) self._meta = MetaData() self.experiment = Table( @@ -103,7 +113,7 @@ def __init__(self, engine: Engine): ) # A workaround for SQLAlchemy issue with autoincrement in DuckDB: - if engine.dialect.name == "duckdb": + if engine and engine.dialect.name == "duckdb": seq_config_id = Sequence("seq_config_id") col_config_id = Column( "config_id", @@ -217,12 +227,22 @@ def __init__(self, engine: Engine): _LOG.debug("Schema: %s", self._meta) + @property + def meta(self) -> MetaData: + """Return the SQLAlchemy MetaData object.""" + return self._meta + def create(self) -> "DbSchema": """Create the DB schema.""" _LOG.info("Create the DB schema") + assert self._engine self._meta.create_all(self._engine) return self + def update(self) -> "DbSchema": + """Updates the DB schema to the latest version.""" + raise NotImplementedError("TODO: Schema updates are not supported yet.") + def __repr__(self) -> str: """ Produce a string with all SQL statements required to create the schema from @@ -237,6 +257,7 @@ def __repr__(self) -> str: sql : str A multi-line string with SQL statements to create the DB schema from scratch. """ + assert self._engine ddl = _DDL(self._engine.dialect) mock_engine = create_mock_engine(self._engine.url, executor=ddl) self._meta.create_all(mock_engine, checkfirst=False) From a40cfd1e1cf8c43c6711e3fca04ff1e5318d8363 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:47:47 +0000 Subject: [PATCH 03/43] wip for allowing a create/update schema on demand --- mlos_bench/mlos_bench/launcher.py | 31 ++++++++++++++++--- mlos_bench/mlos_bench/storage/base_storage.py | 4 +++ mlos_bench/mlos_bench/storage/sql/schema.py | 2 -- mlos_bench/mlos_bench/storage/sql/storage.py | 11 +++++-- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index b970b98456e..0eeee95de56 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -124,6 +124,17 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) assert isinstance(self.global_config, dict) + self.storage = self._load_storage( + args.storage or config.get("storage"), + lazy_schema_create=args.create_update_storage_schema_only or None, + ) + _LOG.info("Init storage: %s", self.storage) + + if args.create_update_storage_schema_only: + _LOG.info("Create/update storage schema only.") + self.storage.update_schema() + sys.exit(0) + # --service cli args should override the config file values. service_files: List[str] = config.get("services", []) + (args.service or []) assert isinstance(self._parent_service, SupportsConfigLoading) @@ -158,9 +169,6 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.optimizer = self._load_optimizer(args.optimizer or config.get("optimizer")) _LOG.info("Init optimizer: %s", self.optimizer) - self.storage = self._load_storage(args.storage or config.get("storage")) - _LOG.info("Init storage: %s", self.storage) - self.teardown: bool = ( bool(args.teardown) if args.teardown is not None @@ -365,6 +373,15 @@ def add_argument(self, *args: Any, **kwargs: Any) -> None: """, ) + parser.add_argument( + "--create-update-storage-schema-only", + required=False, + default=False, + dest="create_update_storage_schema_only", + action="store_true", + help="Makes sure that the storage schema is update to date for the current version of mlos_bench.", + ) + # By default we use the command line arguments, but allow the caller to # provide some explicitly for testing purposes. if argv is None: @@ -482,7 +499,11 @@ def _load_optimizer(self, args_optimizer: Optional[str]) -> Optimizer: ) return optimizer - def _load_storage(self, args_storage: Optional[str]) -> Storage: + def _load_storage( + self, + args_storage: Optional[str], + lazy_schema_create: Optional[bool] = None, + ) -> Storage: """ Instantiate the Storage object from JSON file provided in the --storage command line parameter. @@ -503,6 +524,8 @@ def _load_storage(self, args_storage: Optional[str]) -> Storage: ) class_config = self._config_loader.load_config(args_storage, ConfigSchema.STORAGE) assert isinstance(class_config, Dict) + if lazy_schema_create: + class_config["lazy_schema_create"] = lazy_schema_create storage = self._config_loader.build_storage( service=self._parent_service, config=class_config, diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index a6b3a1aa90a..ded785c7ab8 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -64,6 +64,10 @@ def __init__( self._config = config.copy() self._global_config = global_config or {} + @abstractmethod + def update_schema(self) -> None: + """Update the schema of the storage backend if needed.""" + def _validate_json_config(self, config: dict) -> None: """Reconstructs a basic json config that this class might have been instantiated from in order to validate configs provided outside the file loading diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 8a45963f9a5..dbd17f72b8b 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -17,8 +17,6 @@ import logging from typing import Any, List, Optional -import alembic -import alembic.command from sqlalchemy import ( Column, DateTime, diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 495b38e6546..401e8e22a50 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -38,7 +38,8 @@ def __init__( self._repr = f"{self._url.get_backend_name()}:{self._url.database}" _LOG.info("Connect to the database: %s", self) self._engine = create_engine(self._url, echo=self._log_sql) - self._db_schema: DbSchema + self._db_schema = DbSchema(self._engine) + self._schema_created = False if not lazy_schema_create: assert self._schema else: @@ -47,12 +48,16 @@ def __init__( @property def _schema(self) -> DbSchema: """Lazily create schema upon first access.""" - if not hasattr(self, "_db_schema"): - self._db_schema = DbSchema(self._engine).create() + if not self._schema_created: + self._db_schema.create() if _LOG.isEnabledFor(logging.DEBUG): _LOG.debug("DDL statements:\n%s", self._schema) return self._db_schema + def update_schema(self) -> None: + """Update the database schema.""" + self._db_schema.update() + def __repr__(self) -> str: return self._repr From 94d7cbefed2681fd8c3c26cd96756880df91afc2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:47:58 +0000 Subject: [PATCH 04/43] formatting --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 15 ++++++++------- .../alembic/versions/d2a708351ba8_add_alembic.py | 9 ++++++--- .../f83fb8ae7fc4_add_trial_runner_id_column.py | 15 +++++++++------ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index a6d86043fe0..976a72b72c8 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -1,9 +1,11 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# from logging.config import fileConfig -from sqlalchemy import engine_from_config -from sqlalchemy import pool - from alembic import context +from sqlalchemy import engine_from_config, pool # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -17,8 +19,9 @@ # add your model's MetaData object here # for 'autogenerate' support from mlos_bench.storage.sql.schema import DbSchema + target_metadata = DbSchema(engine=None).meta -#target_metadata = None +# target_metadata = None # other values from the config, defined by the needs of env.py, # can be acquired: @@ -64,9 +67,7 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure( - connection=connection, target_metadata=target_metadata - ) + context.configure(connection=connection, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py index 52b157f484a..62b550d5578 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py @@ -1,3 +1,7 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# """Add alembic Revision ID: d2a708351ba8 @@ -7,12 +11,11 @@ """ from typing import Sequence, Union -from alembic import op import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. -revision: str = 'd2a708351ba8' +revision: str = "d2a708351ba8" down_revision: Union[str, None] = None branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py index 62a5629dd45..d836708bb12 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py @@ -1,3 +1,7 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# """Add trial_runner_id column Revision ID: f83fb8ae7fc4 @@ -7,24 +11,23 @@ """ from typing import Sequence, Union -from alembic import op import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. -revision: str = 'f83fb8ae7fc4' -down_revision: Union[str, None] = 'd2a708351ba8' +revision: str = "f83fb8ae7fc4" +down_revision: Union[str, None] = "d2a708351ba8" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column('trial', sa.Column('trial_runner_id', sa.Integer(), nullable=True)) + op.add_column("trial", sa.Column("trial_runner_id", sa.Integer(), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('trial', 'trial_runner_id') + op.drop_column("trial", "trial_runner_id") # ### end Alembic commands ### From 92796ef932bd9f272d4b986f80685c96e4e35df9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:48:11 +0000 Subject: [PATCH 05/43] add a new column --- mlos_bench/mlos_bench/storage/sql/schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index dbd17f72b8b..451f06d2517 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -143,6 +143,7 @@ def __init__(self, engine: Optional[Engine]): Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), + Column("trial_runner_id", Integer, nullable=True, default="NULL"), Column("ts_start", DateTime, nullable=False), Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: From 309d53065b5bf23ad5390f0740cb9885eb358f9e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:52:30 +0000 Subject: [PATCH 06/43] linting --- mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako | 6 ++++-- .../sql/alembic/versions/d2a708351ba8_add_alembic.py | 8 +++++--- .../versions/f83fb8ae7fc4_add_trial_runner_id_column.py | 2 ++ 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako b/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako index fbc4b07dcef..57995a4b132 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako +++ b/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako @@ -19,8 +19,10 @@ depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} def upgrade() -> None: - ${upgrades if upgrades else "pass"} + """The schema upgrade script for this revision.""" + ${upgrades if upgrades else "pass # pylint: disable=unnecessary-pass"} def downgrade() -> None: - ${downgrades if downgrades else "pass"} + """The schema downgrade script for this revision.""" + ${downgrades if downgrades else "pass # pylint: disable=unnecessary-pass"} diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py index 62b550d5578..43c797e5a16 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py @@ -5,7 +5,7 @@ """Add alembic Revision ID: d2a708351ba8 -Revises: +Revises: Create Date: 2025-01-03 21:21:13.978672+00:00 """ @@ -22,12 +22,14 @@ def upgrade() -> None: + """The schema upgrade script for this revision.""" # ### commands auto generated by Alembic - please adjust! ### - pass + pass # pylint: disable=unnecessary-pass # ### end Alembic commands ### def downgrade() -> None: + """The schema downgrade script for this revision.""" # ### commands auto generated by Alembic - please adjust! ### - pass + pass # pylint: disable=unnecessary-pass # ### end Alembic commands ### diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py index d836708bb12..1484878da12 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py @@ -22,12 +22,14 @@ def upgrade() -> None: + """The schema upgrade script for this revision.""" # ### commands auto generated by Alembic - please adjust! ### op.add_column("trial", sa.Column("trial_runner_id", sa.Integer(), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: + """The schema downgrade script for this revision.""" # ### commands auto generated by Alembic - please adjust! ### op.drop_column("trial", "trial_runner_id") # ### end Alembic commands ### From f15da23871318c61822b866978a674f4c99e62fa Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 22:17:14 +0000 Subject: [PATCH 07/43] implement schema updates --- .../mlos_bench/storage/sql/alembic/env.py | 25 +++++++++++++------ mlos_bench/mlos_bench/storage/sql/schema.py | 18 ++++++++++++- mlos_bench/mlos_bench/storage/sql/storage.py | 7 +++++- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index 976a72b72c8..0fc26c69b2e 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -60,19 +60,28 @@ def run_migrations_online() -> None: and associate a connection with the context. """ - connectable = engine_from_config( - config.get_section(config.config_ini_section, {}), - prefix="sqlalchemy.", - poolclass=pool.NullPool, - ) + connectable = config.attributes.get("connection", None) + + if connectable is None: + # only create Engine if we don't have a Connection + # from the outside + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) - with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + else: + context.configure(connection=connectable, target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() - if context.is_offline_mode(): run_migrations_offline() else: diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 451f06d2517..e0f7e04549d 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -15,8 +15,10 @@ """ import logging +import sys from typing import Any, List, Optional +from alembic import command, config from sqlalchemy import ( Column, DateTime, @@ -34,6 +36,13 @@ ) from sqlalchemy.engine import Engine +from mlos_bench.util import path_join + +if sys.version_info < (3, 10): + from importlib_resources import files +else: + from importlib.resources import files + _LOG = logging.getLogger(__name__) @@ -240,7 +249,14 @@ def create(self) -> "DbSchema": def update(self) -> "DbSchema": """Updates the DB schema to the latest version.""" - raise NotImplementedError("TODO: Schema updates are not supported yet.") + assert self._engine + alembic_cfg = config.Config( + path_join(str(files("mlos_bench.storage.sql")), "alembic.ini", abs_path=True) + ) + with self._engine.connect() as conn: + alembic_cfg.attributes["connection"] = conn + command.upgrade(alembic_cfg, "head") + return self def __repr__(self) -> str: """ diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 401e8e22a50..e29e6062e27 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -40,8 +40,10 @@ def __init__( self._engine = create_engine(self._url, echo=self._log_sql) self._db_schema = DbSchema(self._engine) self._schema_created = False + self._schema_updated = False if not lazy_schema_create: assert self._schema + self.update_schema() else: _LOG.info("Using lazy schema create for database: %s", self) @@ -52,11 +54,14 @@ def _schema(self) -> DbSchema: self._db_schema.create() if _LOG.isEnabledFor(logging.DEBUG): _LOG.debug("DDL statements:\n%s", self._schema) + self._schema_created = True return self._db_schema def update_schema(self) -> None: """Update the database schema.""" - self._db_schema.update() + if not self._schema_updated: + self._schema.update() + self._schema_updated = True def __repr__(self) -> str: return self._repr From 96b87f806c1b970c436150edaa02ef060fecb6e1 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 22:19:15 +0000 Subject: [PATCH 08/43] tweaks --- mlos_bench/mlos_bench/launcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 0eeee95de56..68bb9d3d75f 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -126,7 +126,7 @@ def __init__(self, description: str, long_text: str = "", argv: Optional[List[st self.storage = self._load_storage( args.storage or config.get("storage"), - lazy_schema_create=args.create_update_storage_schema_only or None, + lazy_schema_create=False if args.create_update_storage_schema_only else None, ) _LOG.info("Init storage: %s", self.storage) @@ -524,7 +524,7 @@ def _load_storage( ) class_config = self._config_loader.load_config(args_storage, ConfigSchema.STORAGE) assert isinstance(class_config, Dict) - if lazy_schema_create: + if lazy_schema_create is not None: class_config["lazy_schema_create"] = lazy_schema_create storage = self._config_loader.build_storage( service=self._parent_service, From 036a80bd5659912351e7b6eeca20bdf70e1a1026 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 22:43:11 +0000 Subject: [PATCH 09/43] format --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index 0fc26c69b2e..dba8c2f4b26 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -82,6 +82,7 @@ def run_migrations_online() -> None: with context.begin_transaction(): context.run_migrations() + if context.is_offline_mode(): run_migrations_offline() else: From b44f205a9e3860b3cb89164cdb6823f415da5db1 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 16:54:20 -0600 Subject: [PATCH 10/43] syntax tweaks for python 3.10 --- mlos_bench/mlos_bench/launcher.py | 4 ++-- mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako | 8 ++++---- .../sql/alembic/versions/d2a708351ba8_add_alembic.py | 8 ++++---- .../versions/f83fb8ae7fc4_add_trial_runner_id_column.py | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 68bb9d3d75f..6dc47be902e 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -501,8 +501,8 @@ def _load_optimizer(self, args_optimizer: Optional[str]) -> Optimizer: def _load_storage( self, - args_storage: Optional[str], - lazy_schema_create: Optional[bool] = None, + args_storage: str | None, + lazy_schema_create: bool | None = None, ) -> Storage: """ Instantiate the Storage object from JSON file provided in the --storage command diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako b/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako index 57995a4b132..c8e8aee1e2b 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako +++ b/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako @@ -5,7 +5,7 @@ Revises: ${down_revision | comma,n} Create Date: ${create_date} """ -from typing import Sequence, Union +from typing import Sequence from alembic import op import sqlalchemy as sa @@ -13,9 +13,9 @@ ${imports if imports else ""} # revision identifiers, used by Alembic. revision: str = ${repr(up_revision)} -down_revision: Union[str, None] = ${repr(down_revision)} -branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} -depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} +down_revision: str | None = ${repr(down_revision)} +branch_labels: str | Sequence[str] | None = ${repr(branch_labels)} +depends_on: str | Sequence[str] | None = ${repr(depends_on)} def upgrade() -> None: diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py index 43c797e5a16..a215bb37a7c 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py @@ -9,16 +9,16 @@ Create Date: 2025-01-03 21:21:13.978672+00:00 """ -from typing import Sequence, Union +from typing import Sequence import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. revision: str = "d2a708351ba8" -down_revision: Union[str, None] = None -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None +down_revision: str | None = None +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None def upgrade() -> None: diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py index 1484878da12..c5596a91429 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py @@ -16,9 +16,9 @@ # revision identifiers, used by Alembic. revision: str = "f83fb8ae7fc4" -down_revision: Union[str, None] = "d2a708351ba8" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None +down_revision: str | None = "d2a708351ba8" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None def upgrade() -> None: From f8d82179a4a932c9783f4d7f248f4494e7f6fbf7 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 16:57:12 -0600 Subject: [PATCH 11/43] more python 3.10 changes --- .../f83fb8ae7fc4_add_trial_runner_id_column.py | 2 +- mlos_bench/mlos_bench/storage/sql/schema.py | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py index c5596a91429..12ed9124700 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py @@ -9,7 +9,7 @@ Create Date: 2025-01-03 21:25:48.848196+00:00 """ -from typing import Sequence, Union +from typing import Sequence import sqlalchemy as sa from alembic import op diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index e0f7e04549d..20e6418eb0e 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -15,8 +15,8 @@ """ import logging -import sys -from typing import Any, List, Optional +from importlib.resources import files +from typing import Any, List from alembic import command, config from sqlalchemy import ( @@ -38,11 +38,6 @@ from mlos_bench.util import path_join -if sys.version_info < (3, 10): - from importlib_resources import files -else: - from importlib.resources import files - _LOG = logging.getLogger(__name__) @@ -78,13 +73,13 @@ class DbSchema: _METRIC_VALUE_LEN = 255 _STATUS_LEN = 16 - def __init__(self, engine: Optional[Engine]): + def __init__(self, engine: Engine | None): """ Declare the SQLAlchemy schema for the database. Parameters ---------- - engine : sqlalchemy.engine.Engine + engine : sqlalchemy.engine.Engine | None The SQLAlchemy engine to use for the DB schema. Listed as optional for :external:mod:`alembic` schema migration purposes, but won't be functional without one. From 3be7c810d0036bf1e0ec3735655ec8716b9adb12 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 17:16:42 -0600 Subject: [PATCH 12/43] fixup --- mlos_bench/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/setup.py b/mlos_bench/setup.py index 6fe8b7c8b6a..9a109cd7db2 100644 --- a/mlos_bench/setup.py +++ b/mlos_bench/setup.py @@ -71,7 +71,7 @@ def _get_long_desc_from_readme(base_url: str) -> dict: "ssh": ["asyncssh<2.15.0"], # FIXME: asyncssh 2.15.0 has a bug that breaks the tests "storage-sql-duckdb": ["sqlalchemy", "alembic", "duckdb_engine"], "storage-sql-mysql": ["sqlalchemy", "alembic", "mysql-connector-python"], - "storage-sql-postgres": ["sqlalchemy", , "alembic", "psycopg2"], + "storage-sql-postgres": ["sqlalchemy", "alembic", "psycopg2"], # sqlite3 comes with python, so we don't need to install it. "storage-sql-sqlite": ["sqlalchemy", "alembic"], # Transitive extra_requires from mlos-core. From 780d98607a0d205e44205b030612fe4fcf11d2d7 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 17:18:39 -0600 Subject: [PATCH 13/43] move those changes to a new PR --- ...f83fb8ae7fc4_add_trial_runner_id_column.py | 35 ------------------- mlos_bench/mlos_bench/storage/sql/schema.py | 1 - 2 files changed, 36 deletions(-) delete mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py deleted file mode 100644 index 12ed9124700..00000000000 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# -"""Add trial_runner_id column - -Revision ID: f83fb8ae7fc4 -Revises: d2a708351ba8 -Create Date: 2025-01-03 21:25:48.848196+00:00 - -""" -from typing import Sequence - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "f83fb8ae7fc4" -down_revision: str | None = "d2a708351ba8" -branch_labels: str | Sequence[str] | None = None -depends_on: str | Sequence[str] | None = None - - -def upgrade() -> None: - """The schema upgrade script for this revision.""" - # ### commands auto generated by Alembic - please adjust! ### - op.add_column("trial", sa.Column("trial_runner_id", sa.Integer(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade() -> None: - """The schema downgrade script for this revision.""" - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("trial", "trial_runner_id") - # ### end Alembic commands ### diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 20e6418eb0e..a0c7e96440f 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -147,7 +147,6 @@ def __init__(self, engine: Engine | None): Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), - Column("trial_runner_id", Integer, nullable=True, default="NULL"), Column("ts_start", DateTime, nullable=False), Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: From 320af9bf61c7ca53d72561f9c54758d89e9b2a21 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 00:10:38 +0000 Subject: [PATCH 14/43] fixups --- mlos_bench/mlos_bench/launcher.py | 2 +- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index d963cd7d65a..72701b83aa8 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -524,7 +524,7 @@ def _load_storage( }, ) class_config = self._config_loader.load_config(args_storage, ConfigSchema.STORAGE) - assert isinstance(class_config, Dict) + assert isinstance(class_config, dict) if lazy_schema_create is not None: class_config["lazy_schema_create"] = lazy_schema_create storage = self._config_loader.build_storage( diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 1b6d92435c4..4d2a1120c54 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -3,7 +3,8 @@ [alembic] # path to migration scripts # Use forward slashes (/) also on windows to provide an os agnostic path -script_location = alembic +script_location = mlos_bench.storage.sql:alembic + # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s # Uncomment the line below if you want the files to be prepended with date and time From 7494ed4fd24e609bf237bc9cc5376fe660ecb2f5 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 00:27:47 +0000 Subject: [PATCH 15/43] lint --- .../mlos_bench/storage/sql/alembic/env.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index dba8c2f4b26..0b4066cee07 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -2,11 +2,16 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # +"""Alembic environment script.""" +# pylint: disable=no-member + from logging.config import fileConfig from alembic import context from sqlalchemy import engine_from_config, pool +from mlos_bench.storage.sql.schema import DbSchema + # this is the Alembic Config object, which provides # access to the values within the .ini file in use. config = context.config @@ -18,10 +23,7 @@ # add your model's MetaData object here # for 'autogenerate' support -from mlos_bench.storage.sql.schema import DbSchema - target_metadata = DbSchema(engine=None).meta -# target_metadata = None # other values from the config, defined by the needs of env.py, # can be acquired: @@ -30,16 +32,14 @@ def run_migrations_offline() -> None: - """Run migrations in 'offline' mode. - - This configures the context with just a URL - and not an Engine, though an Engine is acceptable - here as well. By skipping the Engine creation - we don't even need a DBAPI to be available. + """ + Run migrations in 'offline' mode. - Calls to context.execute() here emit the given string to the - script output. + This configures the context with just a URL and not an Engine, though an Engine is + acceptable here as well. By skipping the Engine creation we don't even need a DBAPI + to be available. + Calls to context.execute() here emit the given string to the script output. """ url = config.get_main_option("sqlalchemy.url") context.configure( @@ -54,11 +54,11 @@ def run_migrations_offline() -> None: def run_migrations_online() -> None: - """Run migrations in 'online' mode. - - In this scenario we need to create an Engine - and associate a connection with the context. + """ + Run migrations in 'online' mode. + In this scenario we need to create an Engine and associate a connection with the + context. """ connectable = config.attributes.get("connection", None) @@ -66,7 +66,7 @@ def run_migrations_online() -> None: # only create Engine if we don't have a Connection # from the outside connectable = engine_from_config( - config.get_section(config.config_ini_section), + config.get_section(config.config_ini_section) or {}, prefix="sqlalchemy.", poolclass=pool.NullPool, ) From 071d148aa24059fc0a5607add51831ddaa886fcd Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 00:31:58 +0000 Subject: [PATCH 16/43] fixps --- mlos_bench/mlos_bench/storage/sql/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index cce9508f32e..b3bf63d0edb 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -52,9 +52,9 @@ def _schema(self) -> DbSchema: """Lazily create schema upon first access.""" if not self._schema_created: self._db_schema.create() - if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("DDL statements:\n%s", self._schema) self._schema_created = True + if _LOG.isEnabledFor(logging.DEBUG): + _LOG.debug("DDL statements:\n%s", self._db_schema) return self._db_schema def update_schema(self) -> None: From ffe7ce1961a568b9d9664383f4c538da55a479f9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 00:43:50 +0000 Subject: [PATCH 17/43] fixups --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index fd42321bb24..030b94db5e4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,6 +40,7 @@ addopts = --ff --nf -n auto --doctest-modules + --ignore=mlos_bench/mlos_bench/storage/sql/alembic/ # --dist loadgroup # --log-level=DEBUG # Moved these to Makefile (coverage is expensive and we only need it in the pipelines generally). From de3da24d1ca0503ba63aa466b54974768cfb1ceb Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 00:48:19 +0000 Subject: [PATCH 18/43] stop overriding our log level --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 4d2a1120c54..3cfcfb0fe0c 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -94,8 +94,9 @@ keys = console keys = generic [logger_root] -level = WARNING -handlers = console +# Don't override the root logger's level, so that we can control it from mlos_bench configs. +#level = WARNING +#handlers = console qualname = [logger_sqlalchemy] From ba4fb35dd2fa4142f9ef9129a94e15f412d31088 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 00:50:05 +0000 Subject: [PATCH 19/43] another fixup --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 3cfcfb0fe0c..642970df638 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -96,7 +96,7 @@ keys = generic [logger_root] # Don't override the root logger's level, so that we can control it from mlos_bench configs. #level = WARNING -#handlers = console +handlers = qualname = [logger_sqlalchemy] From f49c3e17199f48e9941061382cca882fde7ae18f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 00:51:21 +0000 Subject: [PATCH 20/43] fixups --- .../sql/alembic/versions/d2a708351ba8_add_alembic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py index a215bb37a7c..562a27dadac 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py @@ -2,14 +2,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Add alembic +""" +Add alembic. Revision ID: d2a708351ba8 Revises: Create Date: 2025-01-03 21:21:13.978672+00:00 - """ -from typing import Sequence +from collections.abc import Sequence import sqlalchemy as sa from alembic import op From 1e981bce8b965d8c0c3cf1a04eadd9e1a92d9c32 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 00:55:39 +0000 Subject: [PATCH 21/43] disable a check in devcontainer --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f054ddfdd11..12826d8dbcc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: rev: v5.0.0 hooks: - id: check-added-large-files - - id: check-executables-have-shebangs +# - id: check-executables-have-shebangs (issues in devcontainer) - id: check-merge-conflict - id: check-toml - id: check-yaml From ddd95f2acf3d5e0687c97a4d24fac98101fe5551 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 19:02:19 -0600 Subject: [PATCH 22/43] check for files --- Makefile | 2 ++ mlos_bench/pyproject.toml | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4bb0368dc89..1ffecdbe963 100644 --- a/Makefile +++ b/Makefile @@ -253,6 +253,8 @@ mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: PACKAGE_NAME := mlos_viz ! ( tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 tests/ ) # Make sure the py.typed marker file exists. tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /py.typed + # Make sure the alembic scripts are included + tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /alembic/versions/.*py # Check to make sure the mlos_bench module has the config directory. [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 mlos_bench/config/ cd $(MODULE_NAME)/dist/tmp && ln -s ../$(PACKAGE_NAME)-*.tar.gz $(PACKAGE_NAME)-latest.tar.gz diff --git a/mlos_bench/pyproject.toml b/mlos_bench/pyproject.toml index 028aa785678..06bcbb18753 100644 --- a/mlos_bench/pyproject.toml +++ b/mlos_bench/pyproject.toml @@ -62,7 +62,6 @@ mlos_bench = [ "py.typed", "**/*.pyi", "storage/sql/alembic.ini", "storage/sql/alembic/env.py", - "storage/sql/alembic/script.py.mako", "storage/sql/alembic/revisions/*.py", "config/**/*.md", "config/**/*.jsonc", From 86b4ede7556a6f3a5f422bbf3a92289a2ee8590a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 19:07:06 -0600 Subject: [PATCH 23/43] fixups --- Makefile | 4 +++- mlos_bench/pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1ffecdbe963..cea73134afa 100644 --- a/Makefile +++ b/Makefile @@ -254,7 +254,9 @@ mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: PACKAGE_NAME := mlos_viz # Make sure the py.typed marker file exists. tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /py.typed # Make sure the alembic scripts are included - tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /alembic/versions/.*py + [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /storage/sql/alembic.ini + [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /storage/sql/alembic/env.py + [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /storage/sql/alembic/versions/.*py # Check to make sure the mlos_bench module has the config directory. [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 mlos_bench/config/ cd $(MODULE_NAME)/dist/tmp && ln -s ../$(PACKAGE_NAME)-*.tar.gz $(PACKAGE_NAME)-latest.tar.gz diff --git a/mlos_bench/pyproject.toml b/mlos_bench/pyproject.toml index 06bcbb18753..bc712d3eae7 100644 --- a/mlos_bench/pyproject.toml +++ b/mlos_bench/pyproject.toml @@ -62,7 +62,7 @@ mlos_bench = [ "py.typed", "**/*.pyi", "storage/sql/alembic.ini", "storage/sql/alembic/env.py", - "storage/sql/alembic/revisions/*.py", + "storage/sql/alembic/versions/*.py", "config/**/*.md", "config/**/*.jsonc", "config/**/*.json", From 220a495ebba4337288e2c8c71d671e2f162a574c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 19:14:57 -0600 Subject: [PATCH 24/43] fixups --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 030b94db5e4..9f42f56b018 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,7 +40,7 @@ addopts = --ff --nf -n auto --doctest-modules - --ignore=mlos_bench/mlos_bench/storage/sql/alembic/ + --ignore-glob=**/alembic/env.py # --dist loadgroup # --log-level=DEBUG # Moved these to Makefile (coverage is expensive and we only need it in the pipelines generally). From 8357c3af352699c2ba73166d44520fd201e5325a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 19:36:48 -0600 Subject: [PATCH 25/43] fixups --- mlos_bench/mlos_bench/launcher.py | 5 ++++- .../storage/sql/alembic/versions/d2a708351ba8_add_alembic.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 72701b83aa8..0d76a25e6f3 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -380,7 +380,10 @@ def add_argument(self, *args: Any, **kwargs: Any) -> None: default=False, dest="create_update_storage_schema_only", action="store_true", - help="Makes sure that the storage schema is update to date for the current version of mlos_bench.", + help=( + "Makes sure that the storage schema is up to date " + "for the current version of mlos_bench." + ), ) # By default we use the command line arguments, but allow the caller to diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py index 562a27dadac..2974d805999 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py @@ -11,8 +11,8 @@ """ from collections.abc import Sequence -import sqlalchemy as sa -from alembic import op +# import sqlalchemy as sa +# from alembic import op # revision identifiers, used by Alembic. revision: str = "d2a708351ba8" From 908b5db40f2681bae520f6b172c56fc9d7151e2b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 19:37:39 -0600 Subject: [PATCH 26/43] missing package for pylint --- conda-envs/mlos-3.10.yml | 1 + conda-envs/mlos-3.11.yml | 1 + conda-envs/mlos-3.12.yml | 1 + conda-envs/mlos-3.13.yml | 1 + conda-envs/mlos-windows.yml | 1 + conda-envs/mlos.yml | 1 + 6 files changed, 6 insertions(+) diff --git a/conda-envs/mlos-3.10.yml b/conda-envs/mlos-3.10.yml index 8793b14838c..31e313f0bcc 100644 --- a/conda-envs/mlos-3.10.yml +++ b/conda-envs/mlos-3.10.yml @@ -28,6 +28,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos-3.11.yml b/conda-envs/mlos-3.11.yml index 76866df3ab2..22a3390e6a3 100644 --- a/conda-envs/mlos-3.11.yml +++ b/conda-envs/mlos-3.11.yml @@ -28,6 +28,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos-3.12.yml b/conda-envs/mlos-3.12.yml index 4b61bb750a3..18dc02293f6 100644 --- a/conda-envs/mlos-3.12.yml +++ b/conda-envs/mlos-3.12.yml @@ -30,6 +30,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos-3.13.yml b/conda-envs/mlos-3.13.yml index fc32e92800b..5ff0e8e5a38 100644 --- a/conda-envs/mlos-3.13.yml +++ b/conda-envs/mlos-3.13.yml @@ -30,6 +30,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos-windows.yml b/conda-envs/mlos-windows.yml index 906ba421c19..8e344d55924 100644 --- a/conda-envs/mlos-windows.yml +++ b/conda-envs/mlos-windows.yml @@ -31,6 +31,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos.yml b/conda-envs/mlos.yml index 21335fa32e1..4d8f82390c9 100644 --- a/conda-envs/mlos.yml +++ b/conda-envs/mlos.yml @@ -26,6 +26,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 From 90e34ad390628b0bfbfe01a87e4be7f5b0f96fd8 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 20:22:55 -0600 Subject: [PATCH 27/43] doc fixups --- doc/source/conf.py | 5 +++++ mlos_bench/mlos_bench/storage/sql/schema.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index ffe0398ce8b..8bb6b555290 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -146,6 +146,7 @@ def is_on_github_actions(): ) intersphinx_mapping.update( { + "alembic": ("https://alembic.sqlalchemy.org/en/latest/", None), "dabl": ("https://dabl.github.io/stable/", None), } ) @@ -216,6 +217,7 @@ def setup(app: SphinxApp) -> None: # External classes that refuse to resolve: ("py:class", "contextlib.nullcontext"), ("py:class", "sqlalchemy.engine.Engine"), + ("py:class", "sqlalchemy.MetaData"), ("py:exc", "jsonschema.exceptions.SchemaError"), ("py:exc", "jsonschema.exceptions.ValidationError"), ] @@ -253,6 +255,9 @@ def setup(app: SphinxApp) -> None: # Don't document internal environment scripts that aren't part of a module. "*/mlos_bench/config/environments/*/*.py", "*/mlos_bench/config/services/*/*.py", + # Don't document schema evolution scripts. + "*/mlos_bench/storage/sql/alembic/*.py", + "*/mlos_bench/storage/sql/alembic/versions/*.py", ] autoapi_options = [ "members", diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 2bd7a650789..b15fe40e5f5 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -81,8 +81,10 @@ def __init__(self, engine: Engine | None): ---------- engine : sqlalchemy.engine.Engine | None The SQLAlchemy engine to use for the DB schema. - Listed as optional for :external:mod:`alembic` schema migration - purposes, but won't be functional without one. + Listed as optional for `alembic `_ + schema migration purposes so we can reference it inside it's ``env.py`` + config file for :attr:`~meta` data inspection, but won't generally be + functional without one. """ _LOG.info("Create the DB schema for: %s", engine) self._engine = engine From 2caf76c89de66800852cb9a81f7376793e0775b9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 20:36:11 -0600 Subject: [PATCH 28/43] fixups --- mlos_bench/mlos_bench/storage/sql/schema.py | 41 ++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index b15fe40e5f5..7d130dcd8fb 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -12,6 +12,9 @@ ``repr`` or ``str`` (e.g., via ``print()``) on this object. The ``mlos_bench`` CLI will do this automatically if the logging level is set to ``DEBUG``. + +Also see the `mlos_bench CLI usage <../../../mlos_bench.run.usage.html>`__ for +details on how to invoke only the schema creation/update routines. """ import logging @@ -100,6 +103,9 @@ def __init__(self, engine: Engine | None): Column("git_commit", String(40), nullable=False), PrimaryKeyConstraint("exp_id"), ) + """Table storing + :py:mod:`~mlos_bench.storage.base_experiment_data.ExperimentData` info. + """ self.objectives = Table( "objectives", @@ -115,6 +121,9 @@ def __init__(self, engine: Engine | None): PrimaryKeyConstraint("exp_id", "optimization_target"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ) + """Table storing :py:mod:`~mlos_bench.storage.base_storage.Storage.Experiment` + optimization objectives info. + """ # A workaround for SQLAlchemy issue with autoincrement in DuckDB: if engine and engine.dialect.name == "duckdb": @@ -142,6 +151,10 @@ def __init__(self, engine: Engine | None): col_config_id, Column("config_hash", String(64), nullable=False, unique=True), ) + """Table storing + :py:mod:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` + info. + """ self.trial = Table( "trial", @@ -157,6 +170,9 @@ def __init__(self, engine: Engine | None): ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) + """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` + info. + """ # Values of the tunable parameters of the experiment, # fixed for a particular trial config. @@ -169,6 +185,10 @@ def __init__(self, engine: Engine | None): PrimaryKeyConstraint("config_id", "param_id"), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) + """Table storing + :py:mod:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` + info. + """ # Values of additional non-tunable parameters of the trial, # e.g., scheduled execution time, VM name / location, number of repeats, etc. @@ -185,6 +205,9 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) + """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` + metadata info. + """ self.trial_status = Table( "trial_status", @@ -199,6 +222,9 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) + """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` status + info. + """ self.trial_result = Table( "trial_result", @@ -213,6 +239,9 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) + """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` result + info. + """ self.trial_telemetry = Table( "trial_telemetry", @@ -228,6 +257,9 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) + """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` + telemetry info. + """ _LOG.debug("Schema: %s", self._meta) @@ -244,7 +276,14 @@ def create(self) -> "DbSchema": return self def update(self) -> "DbSchema": - """Updates the DB schema to the latest version.""" + """ + Updates the DB schema to the latest version. + + Notes + ----- + Also see the `mlos_bench CLI usage <../../../mlos_bench.run.usage.html>`__ + for details on how to invoke only the schema creation/update routines. + """ assert self._engine alembic_cfg = config.Config( path_join(str(files("mlos_bench.storage.sql")), "alembic.ini", abs_path=True) From 059fccf94068d42f5abfc7baaf0446361a018a3c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 20:39:07 -0600 Subject: [PATCH 29/43] fixup links --- mlos_bench/mlos_bench/storage/sql/schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 7d130dcd8fb..9b386651fc5 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -13,7 +13,7 @@ The ``mlos_bench`` CLI will do this automatically if the logging level is set to ``DEBUG``. -Also see the `mlos_bench CLI usage <../../../mlos_bench.run.usage.html>`__ for +Also see the `mlos_bench CLI usage <../../../../../mlos_bench.run.usage.html>`__ for details on how to invoke only the schema creation/update routines. """ @@ -281,7 +281,7 @@ def update(self) -> "DbSchema": Notes ----- - Also see the `mlos_bench CLI usage <../../../mlos_bench.run.usage.html>`__ + Also see the `mlos_bench CLI usage <../../../../../mlos_bench.run.usage.html>`__ for details on how to invoke only the schema creation/update routines. """ assert self._engine From 41221a4857d659899f5405a77fcaf548022b8e52 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 20:55:00 -0600 Subject: [PATCH 30/43] improved links --- mlos_bench/mlos_bench/storage/sql/schema.py | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 9b386651fc5..da1d715a4b1 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -104,7 +104,7 @@ def __init__(self, engine: Engine | None): PrimaryKeyConstraint("exp_id"), ) """Table storing - :py:mod:`~mlos_bench.storage.base_experiment_data.ExperimentData` info. + :py:class:`~mlos_bench.storage.base_experiment_data.ExperimentData` info. """ self.objectives = Table( @@ -121,7 +121,7 @@ def __init__(self, engine: Engine | None): PrimaryKeyConstraint("exp_id", "optimization_target"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ) - """Table storing :py:mod:`~mlos_bench.storage.base_storage.Storage.Experiment` + """Table storing :py:class:`~mlos_bench.storage.base_storage.Storage.Experiment` optimization objectives info. """ @@ -152,7 +152,7 @@ def __init__(self, engine: Engine | None): Column("config_hash", String(64), nullable=False, unique=True), ) """Table storing - :py:mod:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` + :py:class:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` info. """ @@ -170,7 +170,7 @@ def __init__(self, engine: Engine | None): ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) - """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` + """Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` info. """ @@ -186,7 +186,7 @@ def __init__(self, engine: Engine | None): ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) """Table storing - :py:mod:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` + :py:class:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` info. """ @@ -205,8 +205,8 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) - """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` - metadata info. + """Table storing :py:attr:`TrialData metadata + ` info. """ self.trial_status = Table( @@ -222,8 +222,8 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) - """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` status - info. + """Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + :py:class:`~mlos_bench.environments.status.Status` info. """ self.trial_result = Table( @@ -239,8 +239,8 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) - """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` result - info. + """Table storing :py:attr:`TrialData result + ` info. """ self.trial_telemetry = Table( @@ -257,8 +257,8 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) - """Table storing :py:mod:`~mlos_bench.storage.base_trial_data.TrialData` - telemetry info. + """Table storing :py:attr:`TrialData telemetry + ` info. """ _LOG.debug("Schema: %s", self._meta) From 8b2fe15ffe756150a3497103b6d7810e98c7eb71 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:11:46 -0600 Subject: [PATCH 31/43] more docstring fixes --- mlos_bench/mlos_bench/storage/sql/schema.py | 30 ++++++++++++--------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index da1d715a4b1..014e610a9f8 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -103,7 +103,7 @@ def __init__(self, engine: Engine | None): Column("git_commit", String(40), nullable=False), PrimaryKeyConstraint("exp_id"), ) - """Table storing + """The Table storing :py:class:`~mlos_bench.storage.base_experiment_data.ExperimentData` info. """ @@ -121,8 +121,9 @@ def __init__(self, engine: Engine | None): PrimaryKeyConstraint("exp_id", "optimization_target"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ) - """Table storing :py:class:`~mlos_bench.storage.base_storage.Storage.Experiment` - optimization objectives info. + """The Table storing + :py:class:`~mlos_bench.storage.base_storage.Storage.Experiment` optimization + objectives info. """ # A workaround for SQLAlchemy issue with autoincrement in DuckDB: @@ -151,7 +152,7 @@ def __init__(self, engine: Engine | None): col_config_id, Column("config_hash", String(64), nullable=False, unique=True), ) - """Table storing + """The Table storing :py:class:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` info. """ @@ -170,7 +171,7 @@ def __init__(self, engine: Engine | None): ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) - """Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` info. """ @@ -185,7 +186,7 @@ def __init__(self, engine: Engine | None): PrimaryKeyConstraint("config_id", "param_id"), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) - """Table storing + """The Table storing :py:class:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` info. """ @@ -205,8 +206,9 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) - """Table storing :py:attr:`TrialData metadata - ` info. + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + :py:attr:`metadata ` + info. """ self.trial_status = Table( @@ -222,7 +224,7 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) - """Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` :py:class:`~mlos_bench.environments.status.Status` info. """ @@ -239,8 +241,9 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) - """Table storing :py:attr:`TrialData result - ` info. + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + :py:attr:`results ` + info. """ self.trial_telemetry = Table( @@ -257,8 +260,9 @@ def __init__(self, engine: Engine | None): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) - """Table storing :py:attr:`TrialData telemetry - ` info. + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + :py:attr:`telemetry ` + info. """ _LOG.debug("Schema: %s", self._meta) From 8a01c3f7347dfc73399800e6d68f56792d776dad Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:35:51 -0600 Subject: [PATCH 32/43] Revert "move those changes to a new PR" This reverts commit 780d98607a0d205e44205b030612fe4fcf11d2d7. --- ...f83fb8ae7fc4_add_trial_runner_id_column.py | 35 +++++++++++++++++++ mlos_bench/mlos_bench/storage/sql/schema.py | 1 + 2 files changed, 36 insertions(+) create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py new file mode 100644 index 00000000000..12ed9124700 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py @@ -0,0 +1,35 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Add trial_runner_id column + +Revision ID: f83fb8ae7fc4 +Revises: d2a708351ba8 +Create Date: 2025-01-03 21:25:48.848196+00:00 + +""" +from typing import Sequence + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "f83fb8ae7fc4" +down_revision: str | None = "d2a708351ba8" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """The schema upgrade script for this revision.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("trial", sa.Column("trial_runner_id", sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + """The schema downgrade script for this revision.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("trial", "trial_runner_id") + # ### end Alembic commands ### diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 014e610a9f8..5af43c866dd 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -163,6 +163,7 @@ def __init__(self, engine: Engine | None): Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), + Column("trial_runner_id", Integer, nullable=True, default="NULL"), Column("ts_start", DateTime, nullable=False), Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: From f2c23097eee091befdc019d1d99f1692845ee00e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 3 Jan 2025 21:38:04 -0600 Subject: [PATCH 33/43] fixps --- .../versions/f83fb8ae7fc4_add_trial_runner_id_column.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py index 12ed9124700..226c4149357 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py @@ -2,14 +2,16 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Add trial_runner_id column +""" +Add trial_runner_id column. Revision ID: f83fb8ae7fc4 Revises: d2a708351ba8 Create Date: 2025-01-03 21:25:48.848196+00:00 - """ -from typing import Sequence +# pylint: disable=no-member + +from collections.abc import Sequence import sqlalchemy as sa from alembic import op From 09125e1dad74c532031262ef6c088bd2350cc5e6 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 04:04:22 +0000 Subject: [PATCH 34/43] add some checks to make sure we 'stamp' schemas on first create See Also: https://alembic.sqlalchemy.org/en/latest/cookbook.html#building-an-up-to-date-database-from-scratch --- mlos_bench/mlos_bench/storage/sql/schema.py | 26 +++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 5af43c866dd..d16ad8163f6 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -24,6 +24,7 @@ from alembic import command, config from sqlalchemy import ( Column, + Connection, DateTime, Dialect, Float, @@ -36,6 +37,7 @@ Table, UniqueConstraint, create_mock_engine, + inspect, ) from sqlalchemy.engine import Engine @@ -273,11 +275,30 @@ def meta(self) -> MetaData: """Return the SQLAlchemy MetaData object.""" return self._meta + def _get_alembic_cfg(self, conn: Connection) -> config.Config: # pylint: disable=no-self-use + alembic_cfg = config.Config( + path_join(str(files("mlos_bench.storage.sql")), "alembic.ini", abs_path=True) + ) + alembic_cfg.attributes["connection"] = conn + return alembic_cfg + def create(self) -> "DbSchema": """Create the DB schema.""" _LOG.info("Create the DB schema") assert self._engine self._meta.create_all(self._engine) + with self._engine.connect() as conn: + # If the trial table has the trial_runner_id column but no + # "alembic_version" table, then the schema is up to date and we should + # mark it as such to avoid trying to run the (non-idempotent) upgrade + # scripts. + if any( + column["name"] == "trial_runner_id" + for column in inspect(conn).get_columns(self.trial.name) + ) and not inspect(conn).has_table("alembic_version"): + # Mark the schema as up to date. + alembic_cfg = self._get_alembic_cfg(conn) + command.stamp(alembic_cfg, "head") return self def update(self) -> "DbSchema": @@ -290,11 +311,8 @@ def update(self) -> "DbSchema": for details on how to invoke only the schema creation/update routines. """ assert self._engine - alembic_cfg = config.Config( - path_join(str(files("mlos_bench.storage.sql")), "alembic.ini", abs_path=True) - ) with self._engine.connect() as conn: - alembic_cfg.attributes["connection"] = conn + alembic_cfg = self._get_alembic_cfg(conn) command.upgrade(alembic_cfg, "head") return self From 8fa28160855b4a8cc29d5c1c273a59dcc0da59bf Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 04:17:10 +0000 Subject: [PATCH 35/43] fixup --- mlos_bench/mlos_bench/storage/sql/schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index d16ad8163f6..349340d9817 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -298,7 +298,9 @@ def create(self) -> "DbSchema": ) and not inspect(conn).has_table("alembic_version"): # Mark the schema as up to date. alembic_cfg = self._get_alembic_cfg(conn) - command.stamp(alembic_cfg, "head") + command.stamp(alembic_cfg, "heads") + #command.current(alembic_cfg) + conn.commit() return self def update(self) -> "DbSchema": From 1c100374909a3b64ab13e6b9a88fa3f133466691 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 04:20:26 +0000 Subject: [PATCH 36/43] update comment --- mlos_bench/mlos_bench/storage/sql/schema.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 349340d9817..4d051a497be 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -289,9 +289,13 @@ def create(self) -> "DbSchema": self._meta.create_all(self._engine) with self._engine.connect() as conn: # If the trial table has the trial_runner_id column but no - # "alembic_version" table, then the schema is up to date and we should - # mark it as such to avoid trying to run the (non-idempotent) upgrade - # scripts. + # "alembic_version" table, then the schema is up to date as of initial + # create and we should mark it as such to avoid trying to run the + # (non-idempotent) upgrade scripts. + # Otherwise, either we already have an alembic_version table and can + # safely run the necessary upgrades or we are missing the + # trial_runner_id column (the first to introduce schema updates) and + # should run the upgrades. if any( column["name"] == "trial_runner_id" for column in inspect(conn).get_columns(self.trial.name) From c1a61e8ad3a7751618638b81c7480f6f8fecc9b1 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 04:26:44 +0000 Subject: [PATCH 37/43] black --- mlos_bench/mlos_bench/storage/sql/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 4d051a497be..222d6f848c3 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -303,7 +303,7 @@ def create(self) -> "DbSchema": # Mark the schema as up to date. alembic_cfg = self._get_alembic_cfg(conn) command.stamp(alembic_cfg, "heads") - #command.current(alembic_cfg) + # command.current(alembic_cfg) conn.commit() return self From 53fadf8fbc32d02a8d020b84ec735b62ec06a207 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 21:06:16 +0000 Subject: [PATCH 38/43] fixup logging overrides --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 5 ++--- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 642970df638..4d2a1120c54 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -94,9 +94,8 @@ keys = console keys = generic [logger_root] -# Don't override the root logger's level, so that we can control it from mlos_bench configs. -#level = WARNING -handlers = +level = WARNING +handlers = console qualname = [logger_sqlalchemy] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index 0b4066cee07..6944060b831 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -6,6 +6,7 @@ # pylint: disable=no-member from logging.config import fileConfig +import sys from alembic import context from sqlalchemy import engine_from_config, pool @@ -18,7 +19,8 @@ # Interpret the config file for Python logging. # This line sets up loggers basically. -if config.config_file_name is not None: +# Don't override the mlos_bench or pytest loggers though. +if config.config_file_name is not None and "alembic" in sys.argv[0]: fileConfig(config.config_file_name) # add your model's MetaData object here From 32fab2b6462a2790deba7accbf30d63fc372cc82 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 21:30:41 +0000 Subject: [PATCH 39/43] add tests --- .../mlos_bench/storage/sql/alembic/README.md | 2 ++ .../tests/storage/test_storage_schemas.py | 34 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index ac20a0a45d7..f544b2b14f9 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -38,6 +38,8 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. + > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. + 7. Merge that to the `main` branch. 8. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py new file mode 100644 index 00000000000..e11ff8613d9 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py @@ -0,0 +1,34 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Test sql schemas for mlos_bench storage.""" + +from alembic.migration import MigrationContext +from sqlalchemy import inspect + +from mlos_bench.storage.sql.storage import SqlStorage + +# NOTE: This value is hardcoded to the latest revision in the alembic versions directory. +# It could also be obtained programmatically using the "alembic heads" command or heads() API. +# See Also: schema.py for an example of programmatic alembic config access. +CURRENT_ALEMBIC_HEAD = "f83fb8ae7fc4" + + +def test_storage_schemas(storage: SqlStorage) -> None: + """Test storage schema creation.""" + eng = storage._engine # pylint: disable=protected-access + with eng.connect() as conn: # pylint: disable=protected-access + inspector = inspect(conn) + # Make sure the "trial_runner_id" column exists. + # (i.e., the latest schema has been applied) + assert any( + column["name"] == "trial_runner_id" for column in inspect(conn).get_columns("trial") + ) + # Make sure the "alembic_version" table exists and is appropriately stamped. + assert inspector.has_table("alembic_version") + context = MigrationContext.configure(conn) + current_rev = context.get_current_revision() + assert ( + current_rev == CURRENT_ALEMBIC_HEAD + ), f"Expected {CURRENT_ALEMBIC_HEAD}, got {current_rev}" From 414ec1be272945e59ae2dc763aeaac9d25f3d2c2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 21:31:05 +0000 Subject: [PATCH 40/43] speed improvement on licenseheaders pre-commit partial files run --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 12826d8dbcc..520aae0cbf5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,7 +33,7 @@ repos: hooks: - id: licenseheaders files: '\.(sh|cmd|ps1|sql|py)$' - args: [-t, doc/mit-license.tmpl, -E, .py, .sh, .ps1, .sql, .cmd, -x, mlos_bench/setup.py, mlos_core/setup.py, mlos_viz/setup.py] + args: [-t, doc/mit-license.tmpl, -E, .py, .sh, .ps1, .sql, .cmd, -x, mlos_bench/setup.py, mlos_core/setup.py, mlos_viz/setup.py, -f] require_serial: true stages: [pre-commit, manual] - repo: https://github.com/asottile/pyupgrade From 9841ccf70240912a9b0056ddb45f91a25dde5de3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Sat, 4 Jan 2025 21:32:19 +0000 Subject: [PATCH 41/43] isort --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index 6944060b831..fc186b8cb1f 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -5,8 +5,8 @@ """Alembic environment script.""" # pylint: disable=no-member -from logging.config import fileConfig import sys +from logging.config import fileConfig from alembic import context from sqlalchemy import engine_from_config, pool From 4766f61ba38bf65dfdc804839b2ebbf3ea0e53d8 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 6 Jan 2025 17:01:11 +0000 Subject: [PATCH 42/43] additional test for default NULL column value --- ...f83fb8ae7fc4_add_trial_runner_id_column.py | 2 +- mlos_bench/mlos_bench/storage/sql/schema.py | 2 +- .../tests/storage/test_storage_schemas.py | 20 +++++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py index 226c4149357..b49eb2bee9e 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/f83fb8ae7fc4_add_trial_runner_id_column.py @@ -26,7 +26,7 @@ def upgrade() -> None: """The schema upgrade script for this revision.""" # ### commands auto generated by Alembic - please adjust! ### - op.add_column("trial", sa.Column("trial_runner_id", sa.Integer(), nullable=True)) + op.add_column("trial", sa.Column("trial_runner_id", sa.Integer(), nullable=True, default=None)) # ### end Alembic commands ### diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 222d6f848c3..f8374375fa0 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -165,7 +165,7 @@ def __init__(self, engine: Engine | None): Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), - Column("trial_runner_id", Integer, nullable=True, default="NULL"), + Column("trial_runner_id", Integer, nullable=True, default=None), Column("ts_start", DateTime, nullable=False), Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py index e11ff8613d9..3f3eb965eb9 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py @@ -7,6 +7,7 @@ from alembic.migration import MigrationContext from sqlalchemy import inspect +from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. @@ -32,3 +33,22 @@ def test_storage_schemas(storage: SqlStorage) -> None: assert ( current_rev == CURRENT_ALEMBIC_HEAD ), f"Expected {CURRENT_ALEMBIC_HEAD}, got {current_rev}" + + +# Note: this is a temporary test. It will be removed and replaced with a more +# properly integrated test in #702. +def test_trial_runner_id_default(storage: SqlStorage, exp_data: ExperimentData) -> None: + """Test that the new trial_runner_id column defaults to None.""" + assert exp_data.trials + eng = storage._engine # pylint: disable=protected-access + schema = storage._schema # pylint: disable=protected-access + with eng.connect() as conn: + trials = conn.execute( + schema.trial_result.select().with_only_columns( + schema.trial.c.trial_runner_id, + ) + ) + # trial_runner_id is not currently fully implemented + trial_row = trials.fetchone() + assert trial_row + assert trial_row.trial_runner_id is None From 33dd94c48b931fb1382d05be7a90dafe02df4df3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 6 Jan 2025 13:32:46 -0600 Subject: [PATCH 43/43] Apply suggestions from code review Co-authored-by: Sergiy Matusevych --- mlos_bench/mlos_bench/storage/sql/schema.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index f8374375fa0..f1c887713aa 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -287,7 +287,7 @@ def create(self) -> "DbSchema": _LOG.info("Create the DB schema") assert self._engine self._meta.create_all(self._engine) - with self._engine.connect() as conn: + with self._engine.begin() as conn: # If the trial table has the trial_runner_id column but no # "alembic_version" table, then the schema is up to date as of initial # create and we should mark it as such to avoid trying to run the @@ -304,7 +304,6 @@ def create(self) -> "DbSchema": alembic_cfg = self._get_alembic_cfg(conn) command.stamp(alembic_cfg, "heads") # command.current(alembic_cfg) - conn.commit() return self def update(self) -> "DbSchema":