diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f054ddfdd1..12826d8dbc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: rev: v5.0.0 hooks: - id: check-added-large-files - - id: check-executables-have-shebangs +# - id: check-executables-have-shebangs (issues in devcontainer) - id: check-merge-conflict - id: check-toml - id: check-yaml diff --git a/Makefile b/Makefile index d278816844..cef0d0fbdd 100644 --- a/Makefile +++ b/Makefile @@ -253,6 +253,10 @@ mlos_viz/dist/tmp/mlos_viz-latest.tar.gz: PACKAGE_NAME := mlos_viz ! ( tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 tests/ ) # Make sure the py.typed marker file exists. tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /py.typed + # Make sure the alembic scripts are included + [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /storage/sql/alembic.ini + [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /storage/sql/alembic/env.py + [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 /storage/sql/alembic/versions/.*py # Check to make sure the mlos_bench module has the config directory. [ "$(MODULE_NAME)" != "mlos_bench" ] || tar tzf $(MODULE_NAME)/dist/$(PACKAGE_NAME)-*.tar.gz | grep -m1 mlos_bench/config/ cd $(MODULE_NAME)/dist/tmp && ln -s ../$(PACKAGE_NAME)-*.tar.gz $(PACKAGE_NAME)-latest.tar.gz diff --git a/conda-envs/mlos-3.10.yml b/conda-envs/mlos-3.10.yml index 8793b14838..31e313f0bc 100644 --- a/conda-envs/mlos-3.10.yml +++ b/conda-envs/mlos-3.10.yml @@ -28,6 +28,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos-3.11.yml b/conda-envs/mlos-3.11.yml index 76866df3ab..22a3390e6a 100644 --- a/conda-envs/mlos-3.11.yml +++ b/conda-envs/mlos-3.11.yml @@ -28,6 +28,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos-3.12.yml b/conda-envs/mlos-3.12.yml index 4b61bb750a..18dc02293f 100644 --- a/conda-envs/mlos-3.12.yml +++ b/conda-envs/mlos-3.12.yml @@ -30,6 +30,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos-3.13.yml b/conda-envs/mlos-3.13.yml index fc32e92800..5ff0e8e5a3 100644 --- a/conda-envs/mlos-3.13.yml +++ b/conda-envs/mlos-3.13.yml @@ -30,6 +30,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos-windows.yml b/conda-envs/mlos-windows.yml index 906ba421c1..8e344d5592 100644 --- a/conda-envs/mlos-windows.yml +++ b/conda-envs/mlos-windows.yml @@ -31,6 +31,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/conda-envs/mlos.yml b/conda-envs/mlos.yml index 21335fa32e..4d8f82390c 100644 --- a/conda-envs/mlos.yml +++ b/conda-envs/mlos.yml @@ -26,6 +26,7 @@ dependencies: - pre-commit==4.0.1 - pycodestyle==2.12.1 - pylint==3.3.3 + - tomlkit - mypy==1.14.1 - pandas-stubs - types-beautifulsoup4 diff --git a/doc/source/conf.py b/doc/source/conf.py index ffe0398ce8..8bb6b55529 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -146,6 +146,7 @@ def is_on_github_actions(): ) intersphinx_mapping.update( { + "alembic": ("https://alembic.sqlalchemy.org/en/latest/", None), "dabl": ("https://dabl.github.io/stable/", None), } ) @@ -216,6 +217,7 @@ def setup(app: SphinxApp) -> None: # External classes that refuse to resolve: ("py:class", "contextlib.nullcontext"), ("py:class", "sqlalchemy.engine.Engine"), + ("py:class", "sqlalchemy.MetaData"), ("py:exc", "jsonschema.exceptions.SchemaError"), ("py:exc", "jsonschema.exceptions.ValidationError"), ] @@ -253,6 +255,9 @@ def setup(app: SphinxApp) -> None: # Don't document internal environment scripts that aren't part of a module. "*/mlos_bench/config/environments/*/*.py", "*/mlos_bench/config/services/*/*.py", + # Don't document schema evolution scripts. + "*/mlos_bench/storage/sql/alembic/*.py", + "*/mlos_bench/storage/sql/alembic/versions/*.py", ] autoapi_options = [ "members", diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 47df4dad54..0d76a25e6f 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -125,6 +125,17 @@ def __init__(self, description: str, long_text: str = "", argv: list[str] | None self.global_config = DictTemplater(self.global_config).expand_vars(use_os_env=True) assert isinstance(self.global_config, dict) + self.storage = self._load_storage( + args.storage or config.get("storage"), + lazy_schema_create=False if args.create_update_storage_schema_only else None, + ) + _LOG.info("Init storage: %s", self.storage) + + if args.create_update_storage_schema_only: + _LOG.info("Create/update storage schema only.") + self.storage.update_schema() + sys.exit(0) + # --service cli args should override the config file values. service_files: list[str] = config.get("services", []) + (args.service or []) assert isinstance(self._parent_service, SupportsConfigLoading) @@ -159,9 +170,6 @@ def __init__(self, description: str, long_text: str = "", argv: list[str] | None self.optimizer = self._load_optimizer(args.optimizer or config.get("optimizer")) _LOG.info("Init optimizer: %s", self.optimizer) - self.storage = self._load_storage(args.storage or config.get("storage")) - _LOG.info("Init storage: %s", self.storage) - self.teardown: bool = ( bool(args.teardown) if args.teardown is not None @@ -366,6 +374,18 @@ def add_argument(self, *args: Any, **kwargs: Any) -> None: """, ) + parser.add_argument( + "--create-update-storage-schema-only", + required=False, + default=False, + dest="create_update_storage_schema_only", + action="store_true", + help=( + "Makes sure that the storage schema is up to date " + "for the current version of mlos_bench." + ), + ) + # By default we use the command line arguments, but allow the caller to # provide some explicitly for testing purposes. if argv is None: @@ -483,7 +503,11 @@ def _load_optimizer(self, args_optimizer: str | None) -> Optimizer: ) return optimizer - def _load_storage(self, args_storage: str | None) -> Storage: + def _load_storage( + self, + args_storage: str | None, + lazy_schema_create: bool | None = None, + ) -> Storage: """ Instantiate the Storage object from JSON file provided in the --storage command line parameter. @@ -504,6 +528,8 @@ def _load_storage(self, args_storage: str | None) -> Storage: ) class_config = self._config_loader.load_config(args_storage, ConfigSchema.STORAGE) assert isinstance(class_config, dict) + if lazy_schema_create is not None: + class_config["lazy_schema_create"] = lazy_schema_create storage = self._config_loader.build_storage( service=self._parent_service, config=class_config, diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 5a7f97f831..b99f32ae17 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -65,6 +65,10 @@ def __init__( self._config = config.copy() self._global_config = global_config or {} + @abstractmethod + def update_schema(self) -> None: + """Update the schema of the storage backend if needed.""" + def _validate_json_config(self, config: dict) -> None: """Reconstructs a basic json config that this class might have been instantiated from in order to validate configs provided outside the file loading diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini new file mode 100644 index 0000000000..642970df63 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -0,0 +1,120 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +# Use forward slashes (/) also on windows to provide an os agnostic path +script_location = mlos_bench.storage.sql:alembic + + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +timezone = UTC + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +# version_path_separator = newline +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# See README.md for details. +sqlalchemy.url = sqlite:///mlos_bench.sqlite + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +# Don't override the root logger's level, so that we can control it from mlos_bench configs. +#level = WARNING +handlers = +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md new file mode 100644 index 0000000000..ac20a0a45d --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -0,0 +1,43 @@ +# Schema Evolution with Alembic + +This document contains some notes on how to use [`alembic`](https://alembic.sqlalchemy.org/en/latest/) for schema evolution in the `mlos_bench` project. + +## Overview + +1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: + + ```sh + cd mlos_bench/storage/sql + rm mlos_bench.sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only + ``` + + > This allows `alembic` to automatically generate a migration script from the current schema. + +2. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + + > Keep each change small and atomic. + > For example, if you want to add a new column, do that in one change. + > If you want to rename a column, do that in another change. + +3. Generate a new migration script with the following command: + + ```sh + alembic revision --autogenerate -m "Descriptive text about the change." + ``` + +4. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. + +5. Verify that the migration script works by running the following command: + + ```sh + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only + ``` + + > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. + +6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. + +7. Merge that to the `main` branch. + +8. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py new file mode 100644 index 0000000000..0b4066cee0 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -0,0 +1,89 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Alembic environment script.""" +# pylint: disable=no-member + +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool + +from mlos_bench.storage.sql.schema import DbSchema + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +target_metadata = DbSchema(engine=None).meta + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """ + Run migrations in 'offline' mode. + + This configures the context with just a URL and not an Engine, though an Engine is + acceptable here as well. By skipping the Engine creation we don't even need a DBAPI + to be available. + + Calls to context.execute() here emit the given string to the script output. + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """ + Run migrations in 'online' mode. + + In this scenario we need to create an Engine and associate a connection with the + context. + """ + connectable = config.attributes.get("connection", None) + + if connectable is None: + # only create Engine if we don't have a Connection + # from the outside + connectable = engine_from_config( + config.get_section(config.config_ini_section) or {}, + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + else: + context.configure(connection=connectable, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako b/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako new file mode 100644 index 0000000000..c8e8aee1e2 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/script.py.mako @@ -0,0 +1,28 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: str | None = ${repr(down_revision)} +branch_labels: str | Sequence[str] | None = ${repr(branch_labels)} +depends_on: str | Sequence[str] | None = ${repr(depends_on)} + + +def upgrade() -> None: + """The schema upgrade script for this revision.""" + ${upgrades if upgrades else "pass # pylint: disable=unnecessary-pass"} + + +def downgrade() -> None: + """The schema downgrade script for this revision.""" + ${downgrades if downgrades else "pass # pylint: disable=unnecessary-pass"} diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py new file mode 100644 index 0000000000..2974d80599 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/d2a708351ba8_add_alembic.py @@ -0,0 +1,35 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Add alembic. + +Revision ID: d2a708351ba8 +Revises: +Create Date: 2025-01-03 21:21:13.978672+00:00 +""" +from collections.abc import Sequence + +# import sqlalchemy as sa +# from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "d2a708351ba8" +down_revision: str | None = None +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """The schema upgrade script for this revision.""" + # ### commands auto generated by Alembic - please adjust! ### + pass # pylint: disable=unnecessary-pass + # ### end Alembic commands ### + + +def downgrade() -> None: + """The schema downgrade script for this revision.""" + # ### commands auto generated by Alembic - please adjust! ### + pass # pylint: disable=unnecessary-pass + # ### end Alembic commands ### diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 0ca194c917..014e610a9f 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -12,11 +12,16 @@ ``repr`` or ``str`` (e.g., via ``print()``) on this object. The ``mlos_bench`` CLI will do this automatically if the logging level is set to ``DEBUG``. + +Also see the `mlos_bench CLI usage <../../../../../mlos_bench.run.usage.html>`__ for +details on how to invoke only the schema creation/update routines. """ import logging +from importlib.resources import files from typing import Any +from alembic import command, config from sqlalchemy import ( Column, DateTime, @@ -34,6 +39,8 @@ ) from sqlalchemy.engine import Engine +from mlos_bench.util import path_join + _LOG = logging.getLogger(__name__) @@ -69,11 +76,21 @@ class DbSchema: _METRIC_VALUE_LEN = 255 _STATUS_LEN = 16 - def __init__(self, engine: Engine): - """Declare the SQLAlchemy schema for the database.""" + def __init__(self, engine: Engine | None): + """ + Declare the SQLAlchemy schema for the database. + + Parameters + ---------- + engine : sqlalchemy.engine.Engine | None + The SQLAlchemy engine to use for the DB schema. + Listed as optional for `alembic `_ + schema migration purposes so we can reference it inside it's ``env.py`` + config file for :attr:`~meta` data inspection, but won't generally be + functional without one. + """ _LOG.info("Create the DB schema for: %s", engine) self._engine = engine - # TODO: bind for automatic schema updates? (#649) self._meta = MetaData() self.experiment = Table( @@ -86,6 +103,9 @@ def __init__(self, engine: Engine): Column("git_commit", String(40), nullable=False), PrimaryKeyConstraint("exp_id"), ) + """The Table storing + :py:class:`~mlos_bench.storage.base_experiment_data.ExperimentData` info. + """ self.objectives = Table( "objectives", @@ -101,9 +121,13 @@ def __init__(self, engine: Engine): PrimaryKeyConstraint("exp_id", "optimization_target"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ) + """The Table storing + :py:class:`~mlos_bench.storage.base_storage.Storage.Experiment` optimization + objectives info. + """ # A workaround for SQLAlchemy issue with autoincrement in DuckDB: - if engine.dialect.name == "duckdb": + if engine and engine.dialect.name == "duckdb": seq_config_id = Sequence("seq_config_id") col_config_id = Column( "config_id", @@ -128,6 +152,10 @@ def __init__(self, engine: Engine): col_config_id, Column("config_hash", String(64), nullable=False, unique=True), ) + """The Table storing + :py:class:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` + info. + """ self.trial = Table( "trial", @@ -143,6 +171,9 @@ def __init__(self, engine: Engine): ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + info. + """ # Values of the tunable parameters of the experiment, # fixed for a particular trial config. @@ -155,6 +186,10 @@ def __init__(self, engine: Engine): PrimaryKeyConstraint("config_id", "param_id"), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) + """The Table storing + :py:class:`~mlos_bench.storage.base_tunable_config_data.TunableConfigData` + info. + """ # Values of additional non-tunable parameters of the trial, # e.g., scheduled execution time, VM name / location, number of repeats, etc. @@ -171,6 +206,10 @@ def __init__(self, engine: Engine): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + :py:attr:`metadata ` + info. + """ self.trial_status = Table( "trial_status", @@ -185,6 +224,9 @@ def __init__(self, engine: Engine): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + :py:class:`~mlos_bench.environments.status.Status` info. + """ self.trial_result = Table( "trial_result", @@ -199,6 +241,10 @@ def __init__(self, engine: Engine): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + :py:attr:`results ` + info. + """ self.trial_telemetry = Table( "trial_telemetry", @@ -214,15 +260,43 @@ def __init__(self, engine: Engine): [self.trial.c.exp_id, self.trial.c.trial_id], ), ) + """The Table storing :py:class:`~mlos_bench.storage.base_trial_data.TrialData` + :py:attr:`telemetry ` + info. + """ _LOG.debug("Schema: %s", self._meta) + @property + def meta(self) -> MetaData: + """Return the SQLAlchemy MetaData object.""" + return self._meta + def create(self) -> "DbSchema": """Create the DB schema.""" _LOG.info("Create the DB schema") + assert self._engine self._meta.create_all(self._engine) return self + def update(self) -> "DbSchema": + """ + Updates the DB schema to the latest version. + + Notes + ----- + Also see the `mlos_bench CLI usage <../../../../../mlos_bench.run.usage.html>`__ + for details on how to invoke only the schema creation/update routines. + """ + assert self._engine + alembic_cfg = config.Config( + path_join(str(files("mlos_bench.storage.sql")), "alembic.ini", abs_path=True) + ) + with self._engine.connect() as conn: + alembic_cfg.attributes["connection"] = conn + command.upgrade(alembic_cfg, "head") + return self + def __repr__(self) -> str: """ Produce a string with all SQL statements required to create the schema from @@ -237,6 +311,7 @@ def __repr__(self) -> str: sql : str A multi-line string with SQL statements to create the DB schema from scratch. """ + assert self._engine ddl = _DDL(self._engine.dialect) mock_engine = create_mock_engine(self._engine.url, executor=ddl) self._meta.create_all(mock_engine, checkfirst=False) diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index e3e170b438..b3bf63d0ed 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -38,21 +38,31 @@ def __init__( self._repr = f"{self._url.get_backend_name()}:{self._url.database}" _LOG.info("Connect to the database: %s", self) self._engine = create_engine(self._url, echo=self._log_sql) - self._db_schema: DbSchema + self._db_schema = DbSchema(self._engine) + self._schema_created = False + self._schema_updated = False if not lazy_schema_create: assert self._schema + self.update_schema() else: _LOG.info("Using lazy schema create for database: %s", self) @property def _schema(self) -> DbSchema: """Lazily create schema upon first access.""" - if not hasattr(self, "_db_schema"): - self._db_schema = DbSchema(self._engine).create() + if not self._schema_created: + self._db_schema.create() + self._schema_created = True if _LOG.isEnabledFor(logging.DEBUG): - _LOG.debug("DDL statements:\n%s", self._schema) + _LOG.debug("DDL statements:\n%s", self._db_schema) return self._db_schema + def update_schema(self) -> None: + """Update the database schema.""" + if not self._schema_updated: + self._schema.update() + self._schema_updated = True + def __repr__(self) -> str: return self._repr diff --git a/mlos_bench/pyproject.toml b/mlos_bench/pyproject.toml index d52ad69849..bc712d3eae 100644 --- a/mlos_bench/pyproject.toml +++ b/mlos_bench/pyproject.toml @@ -60,6 +60,9 @@ exclude = ["*.tests", "*.tests.*"] [tool.setuptools.package-data] mlos_bench = [ "py.typed", "**/*.pyi", + "storage/sql/alembic.ini", + "storage/sql/alembic/env.py", + "storage/sql/alembic/versions/*.py", "config/**/*.md", "config/**/*.jsonc", "config/**/*.json", diff --git a/mlos_bench/setup.py b/mlos_bench/setup.py index f42d1606c8..95d902105b 100644 --- a/mlos_bench/setup.py +++ b/mlos_bench/setup.py @@ -68,11 +68,11 @@ def _get_long_desc_from_readme(base_url: str) -> dict: # Additional tools for extra functionality. "azure": ["azure-storage-file-share", "azure-identity", "azure-keyvault"], "ssh": ["asyncssh<2.15.0"], # FIXME: asyncssh 2.15.0 has a bug that breaks the tests - "storage-sql-duckdb": ["sqlalchemy", "duckdb_engine"], - "storage-sql-mysql": ["sqlalchemy", "mysql-connector-python"], - "storage-sql-postgres": ["sqlalchemy", "psycopg2"], + "storage-sql-duckdb": ["sqlalchemy", "alembic", "duckdb_engine"], + "storage-sql-mysql": ["sqlalchemy", "alembic", "mysql-connector-python"], + "storage-sql-postgres": ["sqlalchemy", "alembic", "psycopg2"], # sqlite3 comes with python, so we don't need to install it. - "storage-sql-sqlite": ["sqlalchemy"], + "storage-sql-sqlite": ["sqlalchemy", "alembic"], # Transitive extra_requires from mlos-core. "flaml": ["flaml[blendsearch]"], "smac": ["smac"], diff --git a/setup.cfg b/setup.cfg index fd42321bb2..9f42f56b01 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,6 +40,7 @@ addopts = --ff --nf -n auto --doctest-modules + --ignore-glob=**/alembic/env.py # --dist loadgroup # --log-level=DEBUG # Moved these to Makefile (coverage is expensive and we only need it in the pipelines generally).