From 9711511f438461ad1ab8b869d8c752cf17a9d48e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 21 Oct 2024 19:30:50 +0000 Subject: [PATCH 01/33] mypy updates --- mlos_bench/mlos_bench/environments/local/local_env.py | 2 +- .../mlos_bench/services/remote/azure/azure_fileshare.py | 2 +- mlos_bench/mlos_bench/storage/sql/common.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/local/local_env.py b/mlos_bench/mlos_bench/environments/local/local_env.py index 989ae960398..754cdd34065 100644 --- a/mlos_bench/mlos_bench/environments/local/local_env.py +++ b/mlos_bench/mlos_bench/environments/local/local_env.py @@ -209,7 +209,7 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: ) data = pandas.DataFrame([data.value.to_list()], columns=data.metric.to_list()) # Try to convert string metrics to numbers. - data = data.apply( # type: ignore[assignment] # (false positive) + data = data.apply( pandas.to_numeric, errors="coerce", ).fillna(data) diff --git a/mlos_bench/mlos_bench/services/remote/azure/azure_fileshare.py b/mlos_bench/mlos_bench/services/remote/azure/azure_fileshare.py index 6fa447da225..29a3829a136 100644 --- a/mlos_bench/mlos_bench/services/remote/azure/azure_fileshare.py +++ b/mlos_bench/mlos_bench/services/remote/azure/azure_fileshare.py @@ -110,7 +110,7 @@ def download( data = file_client.download_file() with open(local_path, "wb") as output_file: _LOG.debug("Download file: %s -> %s", remote_path, local_path) - data.readinto(output_file) # type: ignore[no-untyped-call] + data.readinto(output_file) except ResourceNotFoundError as ex: # Translate into non-Azure exception: raise FileNotFoundError(f"Cannot download: {remote_path}") from ex diff --git a/mlos_bench/mlos_bench/storage/sql/common.py b/mlos_bench/mlos_bench/storage/sql/common.py index 3b0c6c31fb0..918ed54ff2a 100644 --- a/mlos_bench/mlos_bench/storage/sql/common.py +++ b/mlos_bench/mlos_bench/storage/sql/common.py @@ -191,7 +191,7 @@ def get_results_df( columns="param", values="value", ) - configs_df = configs_df.apply( # type: ignore[assignment] # (fp) + configs_df = configs_df.apply( pandas.to_numeric, errors="coerce", ).fillna(configs_df) @@ -237,7 +237,7 @@ def get_results_df( columns="metric", values="value", ) - results_df = results_df.apply( # type: ignore[assignment] # (fp) + results_df = results_df.apply( pandas.to_numeric, errors="coerce", ).fillna(results_df) From e716db97f66bf831226a2825ec6c40b6fafb6b2c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 21 Oct 2024 22:16:21 +0000 Subject: [PATCH 02/33] parallel runner disambiguation --- .../tests/optimizers/mlos_core_opt_smac_test.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/optimizers/mlos_core_opt_smac_test.py b/mlos_bench/mlos_bench/tests/optimizers/mlos_core_opt_smac_test.py index 23aa56e48cb..5f96b552e5b 100644 --- a/mlos_bench/mlos_bench/tests/optimizers/mlos_core_opt_smac_test.py +++ b/mlos_bench/mlos_bench/tests/optimizers/mlos_core_opt_smac_test.py @@ -70,9 +70,11 @@ def test_init_mlos_core_smac_relative_output_directory(tunable_groups: TunableGr """Test relative path output directory initialization of mlos_core SMAC optimizer. """ + uid = os.environ.get("PYTEST_XDIST_WORKER", "") + output_dir = _OUTPUT_DIR + "." + uid test_opt_config = { "optimizer_type": "SMAC", - "output_directory": _OUTPUT_DIR, + "output_directory": output_dir, "seed": SEED, } opt = MlosCoreOptimizer(tunable_groups, test_opt_config) @@ -82,7 +84,7 @@ def test_init_mlos_core_smac_relative_output_directory(tunable_groups: TunableGr assert path_join(str(opt._opt.base_optimizer.scenario.output_directory)).startswith( path_join(os.getcwd(), str(test_opt_config["output_directory"])) ) - shutil.rmtree(_OUTPUT_DIR) + shutil.rmtree(output_dir) def test_init_mlos_core_smac_relative_output_directory_with_run_name( @@ -91,9 +93,11 @@ def test_init_mlos_core_smac_relative_output_directory_with_run_name( """Test relative path output directory initialization of mlos_core SMAC optimizer. """ + uid = os.environ.get("PYTEST_XDIST_WORKER", "") + output_dir = _OUTPUT_DIR + "." + uid test_opt_config = { "optimizer_type": "SMAC", - "output_directory": _OUTPUT_DIR, + "output_directory": output_dir, "run_name": "test_run", "seed": SEED, } @@ -106,7 +110,7 @@ def test_init_mlos_core_smac_relative_output_directory_with_run_name( os.getcwd(), str(test_opt_config["output_directory"]), str(test_opt_config["run_name"]) ) ) - shutil.rmtree(_OUTPUT_DIR) + shutil.rmtree(output_dir) def test_init_mlos_core_smac_relative_output_directory_with_experiment_id( From f89eaba135a8a9c55045373c803df19477f14088 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 31 Oct 2024 19:34:16 +0000 Subject: [PATCH 03/33] Basic MacOS CI checks --- .github/workflows/macos.yml | 162 ++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 .github/workflows/macos.yml diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml new file mode 100644 index 00000000000..0a19bbb96f2 --- /dev/null +++ b/.github/workflows/macos.yml @@ -0,0 +1,162 @@ +# Note: this file is based on the linux.yml + +name: MLOS MacOS + +on: + workflow_dispatch: + inputs: + tags: + description: Manual MLOS MacOS run + push: + branches: [ main ] + pull_request: + branches: [ main ] + merge_group: + types: [checks_requested] + schedule: + - cron: "1 0 * * *" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true + +jobs: + MacOS: + runs-on: macos-latest + + permissions: + contents: read + + # Test multiple versions of python. + strategy: + fail-fast: false + matrix: + python_version: + # Empty string is the floating most recent version of python + # (useful to catch new compatibility issues in nightly builds) + - "" + # For now we only test the latest version of python on MacOS. + #- "3.8" + #- "3.9" + #- "3.10" + #- "3.11" + #- "3.12" + #- "3.13" + + env: + cache_cur_date: unset + cache_cur_hour: unset + cache_prev_hour: unset + CONDA_ENV_NAME: unset + # See notes about $CONDA below. + CONDA_DIR: unset + # When parallel jobs are used, group the output to make debugging easier. + MAKEFLAGS: -Oline + + steps: + - uses: actions/checkout@v4 + + - uses: conda-incubator/setup-miniconda@v3 + + - name: Set cache timestamp variables + id: set_cache_vars + run: | + set -x + if [ -z "${{ matrix.python_version }}" ]; then + CONDA_ENV_NAME=mlos + else + CONDA_ENV_NAME="mlos-${{ matrix.python_version }}" + fi + echo "CONDA_ENV_NAME=$CONDA_ENV_NAME" >> $GITHUB_ENV + echo "cache_cur_date=$(date -u +%Y-%m-%d)" >> $GITHUB_ENV + echo "cache_cur_hour=$(date -u +%H)" >> $GITHUB_ENV + echo "cache_prev_hour=$(date -u -d'1 hour ago' +%H)" >> $GITHUB_ENV + # $CONDA should be set by the setup-miniconda action. + # We set a separate environment variable to allow the dependabot tool + # to parse this file since it expects all env vars to be declared above. + echo "CONDA_DIR=$CONDA" >> $GITHUB_ENV + echo "PIP_CACHE_DIR=$(conda run -n base pip cache dir)" >> $GITHUB_ENV + + #- name: Restore cached conda environment + - name: Restore cached conda packages + id: restore-conda-cache + if: ${{ github.event_name != 'schedule' }} + uses: actions/cache@v4 + with: + #path: ${{ env.CONDA_DIR }}/envs/${{ env.CONDA_ENV_NAME }} + path: ${{ env.CONDA_DIR }}/pkgs + key: conda-${{ runner.os }}-${{ env.CONDA_ENV_NAME }}-${{ hashFiles('conda-envs/${{ env.CONDA_ENV_NAME }}.yml') }}-${{ hashFiles('mlos_*/pyproject.toml') }}-${{ hashFiles('mlos_*/setup.py') }}-${{ env.cache_cur_date }}-${{ env.cache_cur_hour }} + restore-keys: | + conda-${{ runner.os }}-${{ env.CONDA_ENV_NAME }}-${{ hashFiles('conda-envs/${{ env.CONDA_ENV_NAME }}.yml') }}-${{ hashFiles('mlos_*/pyproject.toml') }}-${{ hashFiles('mlos_*/setup.py') }}-${{ env.cache_cur_date }}-${{ env.cache_prev_hour }} + conda-${{ runner.os }}-${{ env.CONDA_ENV_NAME }}-${{ hashFiles('conda-envs/${{ env.CONDA_ENV_NAME }}.yml') }}-${{ hashFiles('mlos_*/pyproject.toml') }}-${{ hashFiles('mlos_*/setup.py') }}-${{ env.cache_cur_date }} + + - name: Restore cached pip packages + id: restore-pip-cache + if: ${{ github.event_name != 'schedule' }} + uses: actions/cache@v4 + with: + path: ${{ env.PIP_CACHE_DIR }} + key: conda-${{ runner.os }}-${{ env.CONDA_ENV_NAME }}-${{ hashFiles('conda-envs/${{ env.CONDA_ENV_NAME }}.yml') }}-${{ hashFiles('mlos_*/pyproject.toml') }}-${{ hashFiles('mlos_*/setup.py') }}-${{ env.cache_cur_date }}-${{ env.cache_cur_hour }} + restore-keys: | + conda-${{ runner.os }}-${{ env.CONDA_ENV_NAME }}-${{ hashFiles('conda-envs/${{ env.CONDA_ENV_NAME }}.yml') }}-${{ hashFiles('mlos_*/pyproject.toml') }}-${{ hashFiles('mlos_*/setup.py') }}-${{ env.cache_cur_date }}-${{ env.cache_prev_hour }} + conda-${{ runner.os }}-${{ env.CONDA_ENV_NAME }}-${{ hashFiles('conda-envs/${{ env.CONDA_ENV_NAME }}.yml') }}-${{ hashFiles('mlos_*/pyproject.toml') }}-${{ hashFiles('mlos_*/setup.py') }}-${{ env.cache_cur_date }} + + - name: Log some environment variables for debugging + run: | + set -x + printenv + echo "cache_cur_date: $cache_cur_date" + echo "cache_cur_hour: $cache_cur_hour" + echo "cache_prev_hour: $cache_prev_hour" + echo "cache-hit: ${{ steps.restore-conda-cache.outputs.cache-hit }}" + + - name: Update and configure conda + run: | + set -x + conda config --set channel_priority strict + conda update -v -y -n base -c defaults --all + + # Try and speed up the pipeline by using a faster solver: + - name: Install and default to mamba solver + run: | + set -x + conda install -v -y -n base conda-libmamba-solver + # Try to set either of the configs for the solver. + conda config --set experimental_solver libmamba || true + conda config --set solver libmamba || true + echo "CONDA_EXPERIMENTAL_SOLVER=libmamba" >> $GITHUB_ENV + echo "EXPERIMENTAL_SOLVER=libmamba" >> $GITHUB_ENV + + - name: Create/update mlos conda environment + run: make CONDA_ENV_NAME=$CONDA_ENV_NAME CONDA_INFO_LEVEL=-v conda-env + + - name: Log conda info + run: | + conda info + conda config --show + conda config --show-sources + conda list -n $CONDA_ENV_NAME + ls -l $CONDA_DIR/envs/$CONDA_ENV_NAME/lib/python*/site-packages/ + conda run -n $CONDA_ENV_NAME pip cache dir + conda run -n $CONDA_ENV_NAME pip cache info + + - name: Verify expected version of python in conda env + if: ${{ matrix.python_version == '' }} + timeout-minutes: 2 + run: | + set -x + conda run -n mlos python -c \ + 'from sys import version_info as vers; assert (vers.major, vers.minor) == (3, 13), f"Unexpected python version: {vers}"' + + # This is moreso about code cleanliness, which is a dev thing, not a + # functionality thing, and the rules for that change between python versions, + # so only do this for the default in the devcontainer. + #- name: Run lint checks + # run: make CONDA_ENV_NAME=$CONDA_ENV_NAME check + + # Only run the coverage checks on the devcontainer job. + - name: Run tests + run: make CONDA_ENV_NAME=$CONDA_ENV_NAME SKIP_COVERAGE=true test + + - name: Generate and test binary distribution files + run: make CONDA_ENV_NAME=$CONDA_ENV_NAME CONDA_INFO_LEVEL=-v dist dist-test From 6829634b33d80922446a3e87630daedc94989713 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 31 Oct 2024 15:39:20 -0500 Subject: [PATCH 04/33] Add a run script for Windows --- .devcontainer/scripts/run-devcontainer.ps1 | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .devcontainer/scripts/run-devcontainer.ps1 diff --git a/.devcontainer/scripts/run-devcontainer.ps1 b/.devcontainer/scripts/run-devcontainer.ps1 new file mode 100644 index 00000000000..2e8c490e835 --- /dev/null +++ b/.devcontainer/scripts/run-devcontainer.ps1 @@ -0,0 +1,45 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# Quick hacky script to start a devcontainer in a non-vscode shell for testing. +# See Also: +# - ../build/build-devcontainer +# - "devcontainer open" subcommand from + +#Set-PSDebug -Trace 2 +$ErrorActionPreference = 'Stop' + +# Move to repo root. +Set-Location "$PSScriptRoot/../.." +$repo_root = (Get-Item . | Select-Object -ExpandProperty FullName) +$repo_name = (Get-Item . | Select-Object -ExpandProperty Name) +$repo_root_id = $repo_root.GetHashCode() +$container_name = "$repo_name.$repo_root_id" + +# Be sure to use the host workspace folder if available. +$workspace_root = $repo_root + +$docker_gid = 0 + +New-Item -Type Directory -ErrorAction Ignore "${env:TMP}/$container_name/dc/shellhistory" + +docker run -it --rm ` + --name "$container_name" ` + --user vscode ` + --env USER=vscode ` + --group-add $docker_gid ` + -v "${env:USERPROFILE}/.azure:/dc/azure" ` + -v "${env:TMP}/$container_name/dc/shellhistory:/dc/shellhistory" ` + -v "/var/run/docker.sock:/var/run/docker.sock" ` + -v "${workspace_root}:/workspaces/$repo_name" ` + --workdir "/workspaces/$repo_name" ` + --env CONTAINER_WORKSPACE_FOLDER="/workspaces/$repo_name" ` + --env LOCAL_WORKSPACE_FOLDER="$workspace_root" ` + --env http_proxy="${env:http_proxy:-}" ` + --env https_proxy="${env:https_proxy:-}" ` + --env no_proxy="${env:no_proxy:-}" ` + mlos-devcontainer ` + $args From ab7c4e668f11a9bcfcdbfbc26508cb4e3365ee5f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 31 Oct 2024 15:45:10 -0500 Subject: [PATCH 05/33] Add basic devcontainer build/run checks --- .github/workflows/macos.yml | 8 ++++++++ .github/workflows/windows.yml | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 0a19bbb96f2..90dc4ed5b90 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -160,3 +160,11 @@ jobs: - name: Generate and test binary distribution files run: make CONDA_ENV_NAME=$CONDA_ENV_NAME CONDA_INFO_LEVEL=-v dist dist-test + + - name: Build the devcontainer + run: | + .devcontainer/build/build-devcontainer.sh + + - name: Basic test of the devcontainer + run: | + .devcontainer/script/run-devcontainer.sh conda run -n mlos python --version | grep "Python 3.13" diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 88f6bb15ddf..aec93f03af2 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -123,3 +123,11 @@ jobs: - name: Generate and test binary distribution files run: | .github/workflows/build-dist-test.ps1 + + - name: Build the devcontainer + run: | + .devcontainer/build/build-devcontainer.ps1 + + - name: Basic test of the devcontainer + run: | + .devcontainer/script/run-devcontainer.ps1 conda run -n mlos python --version From 57813dcdaa5341f3962247477357a0e3f213d181 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 31 Oct 2024 21:07:49 +0000 Subject: [PATCH 06/33] fixups --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 672a1ce7c22..93bd4fd0be1 100644 --- a/Makefile +++ b/Makefile @@ -549,7 +549,7 @@ dist-test-env: dist build/dist-test-env.$(PYTHON_VERSION).build-stamp build/dist-test-env.$(PYTHON_VERSION).build-stamp: build/conda-env.${CONDA_ENV_NAME}.build-stamp # Use the same version of python as the one we used to build the wheels. -build/dist-test-env.$(PYTHON_VERSION).build-stamp: PYTHON_VERS_REQ=$(shell conda list -n ${CONDA_ENV_NAME} | egrep '^python\s+' | sed -r -e 's/^python\s+//' | cut -d' ' -f1 | cut -d. -f1-2) +build/dist-test-env.$(PYTHON_VERSION).build-stamp: PYTHON_VERS_REQ=$(shell conda list -n ${CONDA_ENV_NAME} | egrep '^python\s+' | sed -r -e 's/^python[ \t]+//' | cut -d' ' -f1 | cut -d. -f1-2) build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_core/dist/tmp/mlos_core-latest-py3-none-any.whl build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_bench/dist/tmp/mlos_bench-latest-py3-none-any.whl build/dist-test-env.$(PYTHON_VERSION).build-stamp: mlos_viz/dist/tmp/mlos_viz-latest-py3-none-any.whl From 3a63e6a7982ff1b1eff90b118447f35dd854534c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 31 Oct 2024 21:22:55 +0000 Subject: [PATCH 07/33] test --- .github/workflows/macos.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 90dc4ed5b90..8366a043956 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -161,6 +161,10 @@ jobs: - name: Generate and test binary distribution files run: make CONDA_ENV_NAME=$CONDA_ENV_NAME CONDA_INFO_LEVEL=-v dist dist-test + - name: Check docker + run: | + sudo docker info + - name: Build the devcontainer run: | .devcontainer/build/build-devcontainer.sh From d4b378fdae98ef8d4a845915a88d795ff9ba725f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 31 Oct 2024 21:23:17 +0000 Subject: [PATCH 08/33] check docker --- .github/workflows/windows.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index aec93f03af2..c94bf059b7f 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -124,6 +124,10 @@ jobs: run: | .github/workflows/build-dist-test.ps1 + - name: Check docker + run: | + docker info + - name: Build the devcontainer run: | .devcontainer/build/build-devcontainer.ps1 From 53eb0fc9c15fb55f0f05b0523dfa19671400c01b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 16:36:05 +0000 Subject: [PATCH 09/33] refactor jobs --- .github/workflows/devcontainer.yml | 10 +++++++--- .github/workflows/linux.yml | 4 +++- .github/workflows/macos.yml | 31 ++++++++++++++++++++++++++++-- .github/workflows/windows.yml | 15 ++++++++++++++- 4 files changed, 53 insertions(+), 7 deletions(-) diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml index 1db506ce66a..be9ea82589a 100644 --- a/.github/workflows/devcontainer.yml +++ b/.github/workflows/devcontainer.yml @@ -26,7 +26,9 @@ concurrency: cancel-in-progress: true jobs: - DevContainer: + DevContainerLintBuildTestPublish: + name: DevContainer Lint/Build/Test/Publish + runs-on: ubuntu-latest permissions: @@ -259,9 +261,11 @@ jobs: docker tag mlos-devcontainer:latest ${{ secrets.ACR_LOGINURL }}/mlos-devcontainer:$image_tag docker push ${{ secrets.ACR_LOGINURL }}/mlos-devcontainer:$image_tag - DeployDocs: + PublishDocs: + name: Publish Documentation + if: github.ref == 'refs/heads/main' - needs: DevContainer + needs: DevContainerLintBuildTestPublish runs-on: ubuntu-latest # Required for github-pages-deploy-action to push to the gh-pages branch. diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 7e280901875..c9ba2bdcaa0 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -19,7 +19,9 @@ concurrency: cancel-in-progress: true jobs: - Linux: + LinuxCondaBuildTest: + name: Linux Build/Test with Conda + runs-on: ubuntu-latest permissions: diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 8366a043956..bd1b7531244 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -21,7 +21,9 @@ concurrency: cancel-in-progress: true jobs: - MacOS: + MacOSCondaBuildTest: + name: MacOS Build/Test with Conda + runs-on: macos-latest permissions: @@ -161,9 +163,34 @@ jobs: - name: Generate and test binary distribution files run: make CONDA_ENV_NAME=$CONDA_ENV_NAME CONDA_INFO_LEVEL=-v dist dist-test + MacOSDevContainerBuildTest: + name: MacOS DevContainer Build/Test + runs-on: macos-latest + + permissions: + contents: read + + steps: + - uses: actions/checkout@v4 + + - name: Install docker + run: | + brew install docker + brew install docker-machine + docker --version + docker-machine create default + docker-machine start default + docker-machine ls + docker-machine env default + eval "$(docker-machine env default)" + docker image ls + echo "eval \$(docker-machine env default)" >> ~/.bash_profile + echo "export DOCKER_HOST="tcp://0.0.0.0:2375"" >> ~/.bashrc + cat ~/.bash_profile + - name: Check docker run: | - sudo docker info + docker info - name: Build the devcontainer run: | diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index c94bf059b7f..c7981916cde 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -20,7 +20,9 @@ concurrency: cancel-in-progress: true jobs: - Windows: + WindowsCondaBuildTest: + name: Windows Build/Test with Conda + runs-on: windows-latest permissions: @@ -124,6 +126,17 @@ jobs: run: | .github/workflows/build-dist-test.ps1 + WindowsDevContainerBuildTest: + name: Windows DevContainer Build/Test + + runs-on: windows-latest + + permissions: + contents: read + + steps: + - uses: actions/checkout@v4 + - name: Check docker run: | docker info From bcf6dfd12a4dea133799c136fd64c79bae23a487 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 18:33:30 +0000 Subject: [PATCH 10/33] more info --- .github/workflows/windows.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index c7981916cde..09fcede86f8 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -140,6 +140,8 @@ jobs: - name: Check docker run: | docker info + docker builder ls + docker builder inspect - name: Build the devcontainer run: | From a733564fecb789a58935247db0d12401dd05c9ab Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 19:16:23 +0000 Subject: [PATCH 11/33] debug --- .github/workflows/windows.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 09fcede86f8..dbfdc62b24c 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -140,6 +140,7 @@ jobs: - name: Check docker run: | docker info + docker system info docker builder ls docker builder inspect From 60f7aea0e8be05613b84c11411adcd71c1a1b51f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 19:34:41 +0000 Subject: [PATCH 12/33] debug --- .github/workflows/macos.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index bd1b7531244..5e4b0a9d4d3 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -175,9 +175,11 @@ jobs: - name: Install docker run: | - brew install docker + brew install --cask docker + brew install --cask virtualbox brew install docker-machine docker --version + docker builder ls docker-machine create default docker-machine start default docker-machine ls From e4d3cabe884d46526afb6b6fbd038b5eb70e5afe Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 19:34:46 +0000 Subject: [PATCH 13/33] disable --- .github/workflows/windows.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index dbfdc62b24c..ace9c631ebd 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -128,9 +128,15 @@ jobs: WindowsDevContainerBuildTest: name: Windows DevContainer Build/Test + # Skipped for now since building Linux containers on Windows Github Action Runners is not yet supported. + if: false runs-on: windows-latest + defaults: + run: + shell: pwsh + permissions: contents: read @@ -140,8 +146,7 @@ jobs: - name: Check docker run: | docker info - docker system info - docker builder ls + docker builder ls | Select-String linux # current returns '' (not yet supported) docker builder inspect - name: Build the devcontainer From 5512c1588cdb5c146758d8d3ed073a6be588eea9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 19:42:48 +0000 Subject: [PATCH 14/33] tweaks --- .github/workflows/macos.yml | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 5e4b0a9d4d3..b372dcfada4 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -176,19 +176,11 @@ jobs: - name: Install docker run: | brew install --cask docker - brew install --cask virtualbox - brew install docker-machine + brew install docker-buildx + export DOCKER_BUILDKIT=1 docker --version docker builder ls - docker-machine create default - docker-machine start default - docker-machine ls - docker-machine env default - eval "$(docker-machine env default)" - docker image ls - echo "eval \$(docker-machine env default)" >> ~/.bash_profile - echo "export DOCKER_HOST="tcp://0.0.0.0:2375"" >> ~/.bashrc - cat ~/.bash_profile + docker ps - name: Check docker run: | From 4060880ad9d31c74faf47105fde4aa23cb588d20 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 19:43:49 +0000 Subject: [PATCH 15/33] whitespace --- .github/workflows/devcontainer.yml | 1 + .github/workflows/macos.yml | 1 + .github/workflows/windows.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml index be9ea82589a..c0d41a3d72c 100644 --- a/.github/workflows/devcontainer.yml +++ b/.github/workflows/devcontainer.yml @@ -261,6 +261,7 @@ jobs: docker tag mlos-devcontainer:latest ${{ secrets.ACR_LOGINURL }}/mlos-devcontainer:$image_tag docker push ${{ secrets.ACR_LOGINURL }}/mlos-devcontainer:$image_tag + PublishDocs: name: Publish Documentation diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index b372dcfada4..47f0a61439b 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -163,6 +163,7 @@ jobs: - name: Generate and test binary distribution files run: make CONDA_ENV_NAME=$CONDA_ENV_NAME CONDA_INFO_LEVEL=-v dist dist-test + MacOSDevContainerBuildTest: name: MacOS DevContainer Build/Test runs-on: macos-latest diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index ace9c631ebd..41adb19cfa9 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -126,6 +126,7 @@ jobs: run: | .github/workflows/build-dist-test.ps1 + WindowsDevContainerBuildTest: name: Windows DevContainer Build/Test # Skipped for now since building Linux containers on Windows Github Action Runners is not yet supported. From 48788a88b4a0200b0cc7a18a25d970b88f3fdae2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 20:15:11 +0000 Subject: [PATCH 16/33] try again --- .github/workflows/macos.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 47f0a61439b..d189f90332e 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -174,14 +174,21 @@ jobs: steps: - uses: actions/checkout@v4 + # Note: no linux platform build support due to lack of nested virtualization on M series chips. + # https://github.com/orgs/community/discussions/69211#discussioncomment-7242133 - name: Install docker run: | brew install --cask docker brew install docker-buildx - export DOCKER_BUILDKIT=1 + mkdir -p ~/.docker + cat ~/.docker/config.json || true + echo '"cliPluginsExtraDirs": "cliPluginsExtraDirs": ["/opt/homebrew/lib/docker/cli-plugins"]' | tee -a ~/.docker/config.json + cat ~/.docker/config.json docker --version - docker builder ls + docker info + docker system info || true docker ps + DOCKER_BUILDKIT=1 docker builder ls - name: Check docker run: | From a557cd46f7098ab8a3fab73a8a8c5dd1e9272200 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 20:27:36 +0000 Subject: [PATCH 17/33] fixupo --- .github/workflows/macos.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index d189f90332e..8a0e28f6a27 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -182,7 +182,7 @@ jobs: brew install docker-buildx mkdir -p ~/.docker cat ~/.docker/config.json || true - echo '"cliPluginsExtraDirs": "cliPluginsExtraDirs": ["/opt/homebrew/lib/docker/cli-plugins"]' | tee -a ~/.docker/config.json + echo '"cliPluginsExtraDirs": ["/opt/homebrew/lib/docker/cli-plugins"]' | tee -a ~/.docker/config.json cat ~/.docker/config.json docker --version docker info From 4e53cb4f5aea7b2100a832e64e5f1466f4d5e86e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 20:39:41 +0000 Subject: [PATCH 18/33] fixups --- .github/workflows/macos.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 8a0e28f6a27..1caebd76840 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -180,9 +180,12 @@ jobs: run: | brew install --cask docker brew install docker-buildx + brew install jq mkdir -p ~/.docker - cat ~/.docker/config.json || true - echo '"cliPluginsExtraDirs": ["/opt/homebrew/lib/docker/cli-plugins"]' | tee -a ~/.docker/config.json + (cat ~/.docker/config.json || echo "{}") \ + | jq '.cliPluginsExtraDirs = ((.cliPluginsExtraDirs // []) + ["/opt/homebrew/lib/docker-cli-plugins"])' \ + | tee ~/.docker/config.json.new + mv ~/.docker/config.json.new ~/.docker/config.json cat ~/.docker/config.json docker --version docker info From 1ca1be43a6802f9c3073c73cef9a068d96072dd6 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 20:58:23 +0000 Subject: [PATCH 19/33] more debugging --- .github/workflows/macos.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 1caebd76840..cb9853125ad 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -177,16 +177,19 @@ jobs: # Note: no linux platform build support due to lack of nested virtualization on M series chips. # https://github.com/orgs/community/discussions/69211#discussioncomment-7242133 - name: Install docker + timeout-minutes: 15 run: | brew install --cask docker brew install docker-buildx brew install jq mkdir -p ~/.docker - (cat ~/.docker/config.json || echo "{}") \ + (cat ~/.docker/config.json 2>/dev/null || echo "{}") \ | jq '.cliPluginsExtraDirs = ((.cliPluginsExtraDirs // []) + ["/opt/homebrew/lib/docker-cli-plugins"])' \ | tee ~/.docker/config.json.new mv ~/.docker/config.json.new ~/.docker/config.json cat ~/.docker/config.json + osascript -e 'quit app "Docker"' || true; open -a Docker; while [ -z "$(docker info 2> /dev/null )" ]; do printf "."; sleep 1; done; echo "" + ls -l /var/run/docker.sock || true docker --version docker info docker system info || true From 22f478c50ae51521d7dd0fd2c626099b57bdbed9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 1 Nov 2024 22:15:33 +0000 Subject: [PATCH 20/33] Disable the devcontainer rules for now --- .github/workflows/macos.yml | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index cb9853125ad..37bd8e39e40 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -168,38 +168,47 @@ jobs: name: MacOS DevContainer Build/Test runs-on: macos-latest + # Skip this for now. + # Note: no linux platform build support due to lack of nested virtualization on M series chips. + # https://github.com/orgs/community/discussions/69211#discussioncomment-7242133 + if: false + permissions: contents: read steps: - uses: actions/checkout@v4 - # Note: no linux platform build support due to lack of nested virtualization on M series chips. - # https://github.com/orgs/community/discussions/69211#discussioncomment-7242133 - name: Install docker timeout-minutes: 15 run: | + # Install the docker desktop app. brew install --cask docker brew install docker-buildx brew install jq + # Make sure the cli knows where to find the buildx plugin. mkdir -p ~/.docker (cat ~/.docker/config.json 2>/dev/null || echo "{}") \ | jq '.cliPluginsExtraDirs = ((.cliPluginsExtraDirs // []) + ["/opt/homebrew/lib/docker-cli-plugins"])' \ | tee ~/.docker/config.json.new mv ~/.docker/config.json.new ~/.docker/config.json cat ~/.docker/config.json + # Restart docker service. + ps auxwww | grep -i docker || true osascript -e 'quit app "Docker"' || true; open -a Docker; while [ -z "$(docker info 2> /dev/null )" ]; do printf "."; sleep 1; done; echo "" - ls -l /var/run/docker.sock || true + + - name: Check docker + run: | + # Check and see if it's running. + ps auxwww | grep -i docker || true + ls -l /var/run/docker.sock + # Dump some debug info. docker --version docker info docker system info || true docker ps DOCKER_BUILDKIT=1 docker builder ls - - name: Check docker - run: | - docker info - - name: Build the devcontainer run: | .devcontainer/build/build-devcontainer.sh From e6681399ebc350afda15a7fc97e95df961af4383 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 17:22:01 +0000 Subject: [PATCH 21/33] debugging --- mlos_bench/mlos_bench/tests/conftest.py | 20 +++++++++++++++++++ .../tests/services/remote/ssh/__init__.py | 5 +++++ .../tests/services/remote/ssh/fixtures.py | 8 ++++++-- .../remote/ssh/test_ssh_host_service.py | 2 ++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index bc0a8aa1897..b681c8d312b 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -5,6 +5,8 @@ """Common fixtures for mock TunableGroups and Environment objects.""" import os +from logging import warning +from time import time_ns from typing import Any, Generator, List import pytest @@ -143,11 +145,16 @@ def locked_docker_services( """A locked version of the docker_services fixture to implement xdist single instance locking. """ + worker_id = os.environ.get("PYTEST_XDIST_WORKER") # pylint: disable=too-many-arguments,too-many-positional-arguments # Mark the services as in use with the reader lock. + warning(f"{time_ns()} {worker_id}: Acquiring read lock on {docker_services_lock.path}") docker_services_lock.acquire_read_lock() + warning(f"{time_ns()} {worker_id}: Acquired read lock on {docker_services_lock.path}") # Acquire the setup lock to prevent multiple setup operations at once. + warning(f"{time_ns()} {worker_id}: Acquiring lock on {docker_setup_teardown_lock.path}") docker_setup_teardown_lock.acquire() + warning(f"{time_ns()} {worker_id}: Acquired lock on {docker_setup_teardown_lock.path}") # This "with get_docker_services(...)"" pattern is in the default fixture. # We call it instead of docker_services() to avoid pytest complaints about # calling fixtures directly. @@ -160,22 +167,35 @@ def locked_docker_services( ) as docker_services: # Release the setup/tear down lock in order to let the setup operation # continue for other workers (should be a no-op at this point). + warning(f"{time_ns()} {worker_id}: Releasing {docker_setup_teardown_lock.path}") docker_setup_teardown_lock.release() + warning(f"{time_ns()} {worker_id}: Released {docker_setup_teardown_lock.path}") # Yield the services so that tests within this worker can use them. + warning(f"{time_ns()} {worker_id}: Yielding services") yield docker_services # Now tests that use those services get to run on this worker... # Once the tests are done, release the read lock that marks the services as in use. + warning(f"{time_ns()} {worker_id}: Releasing read lock on {docker_services_lock.path}") docker_services_lock.release_read_lock() + warning(f"{time_ns()} {worker_id}: Released read lock on {docker_services_lock.path}") # Now as we prepare to execute the cleanup code on context exit we need # to acquire the setup/teardown lock again. # First we attempt to get the write lock so that we wait for other # readers to finish and guard against a lock inversion possibility. + warning(f"{time_ns()} {worker_id}: Acquiring write lock on {docker_services_lock.path}") docker_services_lock.acquire_write_lock() + warning(f"{time_ns()} {worker_id}: Acquired write lock on {docker_services_lock.path}") # Next, acquire the setup/teardown lock # First one here is the one to do actual work, everyone else is basically a no-op. # Upon context exit, we should execute the docker_cleanup code. # And try to get the setup/tear down lock again. + warning(f"{time_ns()} {worker_id}: Acquiring {docker_setup_teardown_lock.path}") docker_setup_teardown_lock.acquire() + warning(f"{time_ns()} {worker_id}: Acquired {docker_setup_teardown_lock.path}") # Finally, after the docker_cleanup code has finished, remove both locks. + warning(f"{time_ns()} {worker_id}: Releasing {docker_setup_teardown_lock.path}") docker_setup_teardown_lock.release() + warning(f"{time_ns()} {worker_id}: Released {docker_setup_teardown_lock.path}") + warning(f"{time_ns()} {worker_id}: Releasing write lock on {docker_services_lock.path}") docker_services_lock.release_write_lock() + warning(f"{time_ns()} {worker_id}: Released write lock on {docker_services_lock.path}") diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py index 78bd4b1baba..da09a7eee0b 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py @@ -5,9 +5,12 @@ """Common data classes for the SSH service tests.""" from dataclasses import dataclass +from logging import warning from subprocess import run from typing import Optional +import os + from pytest_docker.plugin import Services as DockerServices from mlos_bench.tests import check_socket @@ -75,6 +78,8 @@ def to_connect_params(self, uncached: bool = False) -> dict: def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: """Wait until a docker service is ready.""" + worker_id = os.environ.get("PYTEST_XDIST_WORKER") + warning(f"{worker_id} Waiting for {hostname}:{port} to be ready.") docker_services.wait_until_responsive( check=lambda: check_socket(hostname, port), timeout=30.0, diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py index f4042cf62f9..1047f2cd518 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py @@ -71,7 +71,9 @@ def ssh_test_server( id_rsa_path=id_rsa_file.name, ) wait_docker_service_socket( - locked_docker_services, ssh_test_server_info.hostname, ssh_test_server_info.get_port() + locked_docker_services, + ssh_test_server_info.hostname, + ssh_test_server_info.get_port(), ) id_rsa_src = f"/{ssh_test_server_info.username}/.ssh/id_rsa" docker_cp_cmd = ( @@ -116,7 +118,9 @@ def alt_test_server( id_rsa_path=ssh_test_server.id_rsa_path, ) wait_docker_service_socket( - locked_docker_services, alt_test_server_info.hostname, alt_test_server_info.get_port() + locked_docker_services, + alt_test_server_info.hostname, + alt_test_server_info.get_port(), ) return alt_test_server_info diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py index 003a8e64339..23135f8f062 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py @@ -141,6 +141,7 @@ def test_ssh_service_remote_exec( # Make sure the cache is cleaned up on context exit. assert len(SshHostService._EVENT_LOOP_THREAD_SSH_CLIENT_CACHE) == 0 + assert False, "Testing" def check_ssh_service_reboot( @@ -249,3 +250,4 @@ def test_ssh_service_reboot( ssh_host_service, graceful=False, ) + assert False, "Testing" From 92dba3baa19986940324e78d00597db69f129376 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 17:22:08 +0000 Subject: [PATCH 22/33] fixups --- .../mlos_bench/tests/services/remote/ssh/test_ssh_service.py | 2 +- mlos_bench/mlos_bench/tests/services/remote/ssh/up.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_service.py index 5b335477a98..d06516b3fe3 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_service.py @@ -132,4 +132,4 @@ def test_ssh_service_context_handler() -> None: if __name__ == "__main__": # For debugging in Windows which has issues with pytest detection in vscode. - pytest.main(["-n1", "--dist=no", "-k", "test_ssh_service_background_thread"]) + pytest.main(["-n0", "--dist=no", "-k", "test_ssh_service_context_handler"]) diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/up.sh b/mlos_bench/mlos_bench/tests/services/remote/ssh/up.sh index 42bc984e6e5..f0f152975dc 100755 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/up.sh +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/up.sh @@ -28,3 +28,5 @@ echo "OK: private key available at '$scriptdir/id_rsa'. Connect to the ssh-serve docker compose -p "$PROJECT_NAME" port ssh-server ${PORT:-2254} | cut -d: -f2 echo "INFO: And this port for the alt-server container:" docker compose -p "$PROJECT_NAME" port alt-server ${PORT:-2254} | cut -d: -f2 +echo "INFO: And this port for the reboot-server container:" +docker compose -p "$PROJECT_NAME" port reboot-server ${PORT:-2254} | cut -d: -f2 From 9b77d069596f71abb5dc128711a06c0b7c31cb27 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 17:36:01 +0000 Subject: [PATCH 23/33] more debugging --- mlos_bench/mlos_bench/tests/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index b681c8d312b..a25a142596b 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -6,6 +6,7 @@ import os from logging import warning +from subprocess import run from time import time_ns from typing import Any, Generator, List @@ -172,9 +173,13 @@ def locked_docker_services( warning(f"{time_ns()} {worker_id}: Released {docker_setup_teardown_lock.path}") # Yield the services so that tests within this worker can use them. warning(f"{time_ns()} {worker_id}: Yielding services") + out = run("docker ps", shell=True, check=True, capture_output=True) + warning(f"{time_ns()} {worker_id}: pre locked yield: " + out.stdout.decode()) yield docker_services # Now tests that use those services get to run on this worker... # Once the tests are done, release the read lock that marks the services as in use. + out = run("docker ps", shell=True, check=True, capture_output=True) + warning(f"{time_ns()} {worker_id}: post locked yield: " + out.stdout.decode()) warning(f"{time_ns()} {worker_id}: Releasing read lock on {docker_services_lock.path}") docker_services_lock.release_read_lock() warning(f"{time_ns()} {worker_id}: Released read lock on {docker_services_lock.path}") From 17aed27722e9f6d952cc882c485ae12ada835b31 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 18:56:18 +0000 Subject: [PATCH 24/33] Fixups for odd rebuild behavior in MacOS --- .devcontainer/scripts/prep-container-build | 5 +++++ mlos_bench/mlos_bench/tests/conftest.py | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.devcontainer/scripts/prep-container-build b/.devcontainer/scripts/prep-container-build index 80d0a9f84c3..34f7213ec56 100755 --- a/.devcontainer/scripts/prep-container-build +++ b/.devcontainer/scripts/prep-container-build @@ -18,6 +18,11 @@ if [ ! -f .env ]; then echo "Creating empty .env file for devcontainer." touch .env fi +# Add some info about the host OS to the .env file. +egrep -v '^HOST_OSTYPE=' .env > .env.tmp +echo "HOST_OSTYPE=$OSTYPE" >> .env.tmp +mv .env.tmp .env + # Also prep the random NGINX_PORT for the docker-compose command. if ! [ -e .devcontainer/.env ] || ! egrep -q "^NGINX_PORT=[0-9]+$" .devcontainer/.env; then RANDOM=$$ diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index a25a142596b..d4e44b064a7 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -8,7 +8,9 @@ from logging import warning from subprocess import run from time import time_ns -from typing import Any, Generator, List +from typing import Any, Generator, List, Union + +import sys import pytest from fasteners import InterProcessLock, InterProcessReaderWriterLock @@ -61,6 +63,17 @@ def mock_env_no_noise(tunable_groups: TunableGroups) -> MockEnv: # Fixtures to configure the pytest-docker plugin. +@pytest.fixture(scope="session") +def docker_setup() -> Union[List[str], str]: + """Setup for docker services.""" + if sys.platform == "darwin" or os.environ.get("HOST_OSTYPE", "").lower().startswith("darwin"): + # Workaround an oddity on macOS where the "docker-compose up" + # command always recreates the containers. + # That leads to races when multiple workers are trying to + # start and use the same services. + return ["up --build -d --no-recreate"] + else: + return ["up --build -d"] @pytest.fixture(scope="session") From 383144f882e09ef87d1bcbfb0d2965f5d1257b1c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 18:56:49 +0000 Subject: [PATCH 25/33] Revert "more debugging" This reverts commit 9b77d069596f71abb5dc128711a06c0b7c31cb27. --- mlos_bench/mlos_bench/tests/conftest.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index d4e44b064a7..c40edbe0c3e 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -6,7 +6,6 @@ import os from logging import warning -from subprocess import run from time import time_ns from typing import Any, Generator, List, Union @@ -186,13 +185,9 @@ def locked_docker_services( warning(f"{time_ns()} {worker_id}: Released {docker_setup_teardown_lock.path}") # Yield the services so that tests within this worker can use them. warning(f"{time_ns()} {worker_id}: Yielding services") - out = run("docker ps", shell=True, check=True, capture_output=True) - warning(f"{time_ns()} {worker_id}: pre locked yield: " + out.stdout.decode()) yield docker_services # Now tests that use those services get to run on this worker... # Once the tests are done, release the read lock that marks the services as in use. - out = run("docker ps", shell=True, check=True, capture_output=True) - warning(f"{time_ns()} {worker_id}: post locked yield: " + out.stdout.decode()) warning(f"{time_ns()} {worker_id}: Releasing read lock on {docker_services_lock.path}") docker_services_lock.release_read_lock() warning(f"{time_ns()} {worker_id}: Released read lock on {docker_services_lock.path}") From 0adc4a4eda1a3c9297701ff9280573008e0aaa6e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 18:58:20 +0000 Subject: [PATCH 26/33] Revert "debugging" This reverts commit e6681399ebc350afda15a7fc97e95df961af4383. --- mlos_bench/mlos_bench/tests/conftest.py | 20 ------------------- .../tests/services/remote/ssh/__init__.py | 5 ----- .../tests/services/remote/ssh/fixtures.py | 8 ++------ .../remote/ssh/test_ssh_host_service.py | 2 -- 4 files changed, 2 insertions(+), 33 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index c40edbe0c3e..ee05cea56ce 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -5,8 +5,6 @@ """Common fixtures for mock TunableGroups and Environment objects.""" import os -from logging import warning -from time import time_ns from typing import Any, Generator, List, Union import sys @@ -158,16 +156,11 @@ def locked_docker_services( """A locked version of the docker_services fixture to implement xdist single instance locking. """ - worker_id = os.environ.get("PYTEST_XDIST_WORKER") # pylint: disable=too-many-arguments,too-many-positional-arguments # Mark the services as in use with the reader lock. - warning(f"{time_ns()} {worker_id}: Acquiring read lock on {docker_services_lock.path}") docker_services_lock.acquire_read_lock() - warning(f"{time_ns()} {worker_id}: Acquired read lock on {docker_services_lock.path}") # Acquire the setup lock to prevent multiple setup operations at once. - warning(f"{time_ns()} {worker_id}: Acquiring lock on {docker_setup_teardown_lock.path}") docker_setup_teardown_lock.acquire() - warning(f"{time_ns()} {worker_id}: Acquired lock on {docker_setup_teardown_lock.path}") # This "with get_docker_services(...)"" pattern is in the default fixture. # We call it instead of docker_services() to avoid pytest complaints about # calling fixtures directly. @@ -180,35 +173,22 @@ def locked_docker_services( ) as docker_services: # Release the setup/tear down lock in order to let the setup operation # continue for other workers (should be a no-op at this point). - warning(f"{time_ns()} {worker_id}: Releasing {docker_setup_teardown_lock.path}") docker_setup_teardown_lock.release() - warning(f"{time_ns()} {worker_id}: Released {docker_setup_teardown_lock.path}") # Yield the services so that tests within this worker can use them. - warning(f"{time_ns()} {worker_id}: Yielding services") yield docker_services # Now tests that use those services get to run on this worker... # Once the tests are done, release the read lock that marks the services as in use. - warning(f"{time_ns()} {worker_id}: Releasing read lock on {docker_services_lock.path}") docker_services_lock.release_read_lock() - warning(f"{time_ns()} {worker_id}: Released read lock on {docker_services_lock.path}") # Now as we prepare to execute the cleanup code on context exit we need # to acquire the setup/teardown lock again. # First we attempt to get the write lock so that we wait for other # readers to finish and guard against a lock inversion possibility. - warning(f"{time_ns()} {worker_id}: Acquiring write lock on {docker_services_lock.path}") docker_services_lock.acquire_write_lock() - warning(f"{time_ns()} {worker_id}: Acquired write lock on {docker_services_lock.path}") # Next, acquire the setup/teardown lock # First one here is the one to do actual work, everyone else is basically a no-op. # Upon context exit, we should execute the docker_cleanup code. # And try to get the setup/tear down lock again. - warning(f"{time_ns()} {worker_id}: Acquiring {docker_setup_teardown_lock.path}") docker_setup_teardown_lock.acquire() - warning(f"{time_ns()} {worker_id}: Acquired {docker_setup_teardown_lock.path}") # Finally, after the docker_cleanup code has finished, remove both locks. - warning(f"{time_ns()} {worker_id}: Releasing {docker_setup_teardown_lock.path}") docker_setup_teardown_lock.release() - warning(f"{time_ns()} {worker_id}: Released {docker_setup_teardown_lock.path}") - warning(f"{time_ns()} {worker_id}: Releasing write lock on {docker_services_lock.path}") docker_services_lock.release_write_lock() - warning(f"{time_ns()} {worker_id}: Released write lock on {docker_services_lock.path}") diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py index da09a7eee0b..78bd4b1baba 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py @@ -5,12 +5,9 @@ """Common data classes for the SSH service tests.""" from dataclasses import dataclass -from logging import warning from subprocess import run from typing import Optional -import os - from pytest_docker.plugin import Services as DockerServices from mlos_bench.tests import check_socket @@ -78,8 +75,6 @@ def to_connect_params(self, uncached: bool = False) -> dict: def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: """Wait until a docker service is ready.""" - worker_id = os.environ.get("PYTEST_XDIST_WORKER") - warning(f"{worker_id} Waiting for {hostname}:{port} to be ready.") docker_services.wait_until_responsive( check=lambda: check_socket(hostname, port), timeout=30.0, diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py index 1047f2cd518..f4042cf62f9 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py @@ -71,9 +71,7 @@ def ssh_test_server( id_rsa_path=id_rsa_file.name, ) wait_docker_service_socket( - locked_docker_services, - ssh_test_server_info.hostname, - ssh_test_server_info.get_port(), + locked_docker_services, ssh_test_server_info.hostname, ssh_test_server_info.get_port() ) id_rsa_src = f"/{ssh_test_server_info.username}/.ssh/id_rsa" docker_cp_cmd = ( @@ -118,9 +116,7 @@ def alt_test_server( id_rsa_path=ssh_test_server.id_rsa_path, ) wait_docker_service_socket( - locked_docker_services, - alt_test_server_info.hostname, - alt_test_server_info.get_port(), + locked_docker_services, alt_test_server_info.hostname, alt_test_server_info.get_port() ) return alt_test_server_info diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py index 23135f8f062..003a8e64339 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py @@ -141,7 +141,6 @@ def test_ssh_service_remote_exec( # Make sure the cache is cleaned up on context exit. assert len(SshHostService._EVENT_LOOP_THREAD_SSH_CLIENT_CACHE) == 0 - assert False, "Testing" def check_ssh_service_reboot( @@ -250,4 +249,3 @@ def test_ssh_service_reboot( ssh_host_service, graceful=False, ) - assert False, "Testing" From a3a1f724838c2180cfc72aaedff5f194c5efeead Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 19:05:12 +0000 Subject: [PATCH 27/33] fixup --- .devcontainer/scripts/prep-container-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/scripts/prep-container-build b/.devcontainer/scripts/prep-container-build index 34f7213ec56..6c545cda67a 100755 --- a/.devcontainer/scripts/prep-container-build +++ b/.devcontainer/scripts/prep-container-build @@ -19,7 +19,7 @@ if [ ! -f .env ]; then touch .env fi # Add some info about the host OS to the .env file. -egrep -v '^HOST_OSTYPE=' .env > .env.tmp +egrep -v '^HOST_OSTYPE=' .env > .env.tmp || true echo "HOST_OSTYPE=$OSTYPE" >> .env.tmp mv .env.tmp .env From d15e18fd34f597391cfc0ac8b15dd0980d2a7d8f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 19:38:27 +0000 Subject: [PATCH 28/33] more portability things --- .devcontainer/scripts/prep-container-build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/scripts/prep-container-build b/.devcontainer/scripts/prep-container-build index 6c545cda67a..f6bbb01248e 100755 --- a/.devcontainer/scripts/prep-container-build +++ b/.devcontainer/scripts/prep-container-build @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. @@ -25,7 +25,7 @@ mv .env.tmp .env # Also prep the random NGINX_PORT for the docker-compose command. if ! [ -e .devcontainer/.env ] || ! egrep -q "^NGINX_PORT=[0-9]+$" .devcontainer/.env; then - RANDOM=$$ + RANDOM=${RANDOM:-$$} NGINX_PORT=$((($RANDOM % 30000) + 1 + 80)) echo "NGINX_PORT=$NGINX_PORT" > .devcontainer/.env fi From 50948139902125d5f22db552a846d032eb8f05e6 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 19:43:38 +0000 Subject: [PATCH 29/33] format --- mlos_bench/mlos_bench/tests/conftest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index ee05cea56ce..2aa2138dabf 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -5,9 +5,8 @@ """Common fixtures for mock TunableGroups and Environment objects.""" import os -from typing import Any, Generator, List, Union - import sys +from typing import Any, Generator, List, Union import pytest from fasteners import InterProcessLock, InterProcessReaderWriterLock From a9f1a6b8576551275d6b1c3b810feea564b5cf65 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 20:11:54 +0000 Subject: [PATCH 30/33] naming tweaks --- .github/workflows/devcontainer.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/macos.yml | 4 ++-- .github/workflows/windows.yml | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml index c0d41a3d72c..2505d225d65 100644 --- a/.github/workflows/devcontainer.yml +++ b/.github/workflows/devcontainer.yml @@ -27,7 +27,7 @@ concurrency: jobs: DevContainerLintBuildTestPublish: - name: DevContainer Lint/Build/Test/Publish + name: Lint/Build/Test/Publish runs-on: ubuntu-latest diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index c9ba2bdcaa0..d94ab2565c5 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -20,7 +20,7 @@ concurrency: jobs: LinuxCondaBuildTest: - name: Linux Build/Test with Conda + name: Build/Test with Conda runs-on: ubuntu-latest diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 37bd8e39e40..ddf5d492f8c 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -22,7 +22,7 @@ concurrency: jobs: MacOSCondaBuildTest: - name: MacOS Build/Test with Conda + name: Build/Test with Conda runs-on: macos-latest @@ -165,7 +165,7 @@ jobs: MacOSDevContainerBuildTest: - name: MacOS DevContainer Build/Test + name: DevContainer Build/Test runs-on: macos-latest # Skip this for now. diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 41adb19cfa9..9ff97e7497b 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -21,7 +21,7 @@ concurrency: jobs: WindowsCondaBuildTest: - name: Windows Build/Test with Conda + name: Build/Test with Conda runs-on: windows-latest @@ -128,7 +128,7 @@ jobs: WindowsDevContainerBuildTest: - name: Windows DevContainer Build/Test + name: DevContainer Build/Test # Skipped for now since building Linux containers on Windows Github Action Runners is not yet supported. if: false From cf8313519423d88f6f7a4e05d68f2425ff953423 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 4 Nov 2024 20:20:49 +0000 Subject: [PATCH 31/33] Add MacOS status badge to the README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 31cc6da0213..2c1160dc938 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![MLOS DevContainer](https://github.com/microsoft/MLOS/actions/workflows/devcontainer.yml/badge.svg)](https://github.com/microsoft/MLOS/actions/workflows/devcontainer.yml) [![MLOS Linux](https://github.com/microsoft/MLOS/actions/workflows/linux.yml/badge.svg)](https://github.com/microsoft/MLOS/actions/workflows/linux.yml) +[![MLOS MacOS](https://github.com/microsoft/MLOS/actions/workflows/macos.yml/badge.svg)](https://github.com/microsoft/MLOS/actions/workflows/macos.yml) [![MLOS Windows](https://github.com/microsoft/MLOS/actions/workflows/windows.yml/badge.svg)](https://github.com/microsoft/MLOS/actions/workflows/windows.yml) [![Code Coverage Status](https://microsoft.github.io/MLOS/_images/coverage.svg)](https://microsoft.github.io/MLOS/htmlcov/index.html) From 132663557915e7e0030abc558a979dc258a4f297 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 5 Nov 2024 17:03:43 +0000 Subject: [PATCH 32/33] naming --- .github/workflows/markdown-link-check.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/markdown-link-check.yml b/.github/workflows/markdown-link-check.yml index 5edfe706bb0..2ee81a651fd 100644 --- a/.github/workflows/markdown-link-check.yml +++ b/.github/workflows/markdown-link-check.yml @@ -19,6 +19,7 @@ concurrency: jobs: # Check in-repo markdown links markdown-link-check: + name: Check markdown links runs-on: ubuntu-latest permissions: contents: read From 568654f6b9f9062769c1f66ba1362170b7acb3cd Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 5 Nov 2024 17:07:55 +0000 Subject: [PATCH 33/33] naming --- .github/workflows/devcontainer.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/macos.yml | 4 ++-- .github/workflows/markdown-link-check.yml | 2 +- .github/workflows/windows.yml | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml index 2505d225d65..c0d41a3d72c 100644 --- a/.github/workflows/devcontainer.yml +++ b/.github/workflows/devcontainer.yml @@ -27,7 +27,7 @@ concurrency: jobs: DevContainerLintBuildTestPublish: - name: Lint/Build/Test/Publish + name: DevContainer Lint/Build/Test/Publish runs-on: ubuntu-latest diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index d94ab2565c5..c9ba2bdcaa0 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -20,7 +20,7 @@ concurrency: jobs: LinuxCondaBuildTest: - name: Build/Test with Conda + name: Linux Build/Test with Conda runs-on: ubuntu-latest diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index ddf5d492f8c..37bd8e39e40 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -22,7 +22,7 @@ concurrency: jobs: MacOSCondaBuildTest: - name: Build/Test with Conda + name: MacOS Build/Test with Conda runs-on: macos-latest @@ -165,7 +165,7 @@ jobs: MacOSDevContainerBuildTest: - name: DevContainer Build/Test + name: MacOS DevContainer Build/Test runs-on: macos-latest # Skip this for now. diff --git a/.github/workflows/markdown-link-check.yml b/.github/workflows/markdown-link-check.yml index 2ee81a651fd..ebf9ed24454 100644 --- a/.github/workflows/markdown-link-check.yml +++ b/.github/workflows/markdown-link-check.yml @@ -19,7 +19,7 @@ concurrency: jobs: # Check in-repo markdown links markdown-link-check: - name: Check markdown links + name: Check Markdown links runs-on: ubuntu-latest permissions: contents: read diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 9ff97e7497b..41adb19cfa9 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -21,7 +21,7 @@ concurrency: jobs: WindowsCondaBuildTest: - name: Build/Test with Conda + name: Windows Build/Test with Conda runs-on: windows-latest @@ -128,7 +128,7 @@ jobs: WindowsDevContainerBuildTest: - name: DevContainer Build/Test + name: Windows DevContainer Build/Test # Skipped for now since building Linux containers on Windows Github Action Runners is not yet supported. if: false