From 2215516d7ee3d9bd40dcbd594bd41a8c1e7c6544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Mon, 3 Mar 2025 16:44:08 -0600 Subject: [PATCH 1/2] fix: bump ops to 2.19 and cleanup NHC installation --- charms/sackd/requirements.txt | 2 +- charms/slurmctld/requirements.txt | 2 +- charms/slurmd/dispatch | 11 --------- charms/slurmd/requirements.txt | 2 +- charms/slurmd/src/utils/nhc.py | 36 +++++++++++++++++++++--------- charms/slurmdbd/requirements.txt | 2 +- charms/slurmrestd/requirements.txt | 2 +- repository.py | 7 ++++++ 8 files changed, 37 insertions(+), 27 deletions(-) delete mode 100755 charms/slurmd/dispatch diff --git a/charms/sackd/requirements.txt b/charms/sackd/requirements.txt index c5cedba..be6eba5 100644 --- a/charms/sackd/requirements.txt +++ b/charms/sackd/requirements.txt @@ -1 +1 @@ -ops==2.17.1 +ops~=2.19 diff --git a/charms/slurmctld/requirements.txt b/charms/slurmctld/requirements.txt index 1971872..7435fd2 100644 --- a/charms/slurmctld/requirements.txt +++ b/charms/slurmctld/requirements.txt @@ -1,4 +1,4 @@ -ops==2.17.1 +ops~=2.19 slurmutils<1.0.0,>=0.11.0 influxdb==5.3.2 netifaces-plus==0.12.4 diff --git a/charms/slurmd/dispatch b/charms/slurmd/dispatch deleted file mode 100755 index 2b6f3b8..0000000 --- a/charms/slurmd/dispatch +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -set -e - -if ! [[ -f '.installed' ]] -then - # Necessary to compile and install NHC - apt-get install --assume-yes make - touch .installed -fi - -JUJU_DISPATCH_PATH="${JUJU_DISPATCH_PATH:-$0}" PYTHONPATH=lib:venv /usr/bin/env python3 ./src/charm.py diff --git a/charms/slurmd/requirements.txt b/charms/slurmd/requirements.txt index bc54308..c551b40 100644 --- a/charms/slurmd/requirements.txt +++ b/charms/slurmd/requirements.txt @@ -1,4 +1,4 @@ -ops==2.17.1 +ops~=2.19 slurmutils<1.0.0,>=0.12.0 nvidia-ml-py==12.560.30 git+https://github.com/canonical/ubuntu-drivers-common@554b91edfd3699625dbed90f679abb31a897b76e#egg=ubuntu-drivers-common diff --git a/charms/slurmd/src/utils/nhc.py b/charms/slurmd/src/utils/nhc.py index c6a59e1..ed222d8 100644 --- a/charms/slurmd/src/utils/nhc.py +++ b/charms/slurmd/src/utils/nhc.py @@ -22,25 +22,39 @@ from constants import NHC_CONFIG +import charms.operator_libs_linux.v0.apt as apt + _logger = logging.getLogger(__name__) -class Error(Exception): - """Exception raised when a nhc operation failed.""" +class NHCOpsError(Exception): + """Exception raised when a NHC operation failed.""" + + @property + def message(self) -> str: + """Return message passed as argument to exception.""" + return self.args[0] def install() -> None: - """Install nhc on compute node. + """Install NHC on compute node. Raises: - subprocess.CalledProcessError: Raised if error is encountered during nhc install. + subprocess.CalledProcessError: Raised if error is encountered during NHC install. """ - _logger.info("installing node health check (nhc)") + _logger.info("installing required packages to install Node Health Check (NHC)") + + try: + apt.add_package("make") + except (apt.PackageNotFoundError, apt.PackageError) as e: + raise NHCOpsError(f"failed to install package `make`. reason: {e}") + + _logger.info("installing NHC") with tempfile.TemporaryDirectory() as tmpdir: try: env = {"LC_ALL": "C", "LANG": "C.UTF-8"} - _logger.info("extracting nhc tarball") + _logger.info("extracting NHC tarball") r = subprocess.check_output( [ "tar", @@ -57,7 +71,7 @@ def install() -> None: ) _logger.debug(r) - _logger.info("building nhc with autotools") + _logger.info("building NHC with autotools") r = subprocess.check_output( ["./autogen.sh", "--prefix=/usr", "--sysconfdir=/etc", "--libexecdir=/usr/lib"], cwd=tmpdir, @@ -67,22 +81,22 @@ def install() -> None: ) _logger.debug(r) - _logger.info("testing nhc build") + _logger.info("testing NHC build") r = subprocess.check_output( ["make", "test"], cwd=tmpdir, env=env, stderr=subprocess.STDOUT, text=True ) _logger.debug(r) - _logger.info("installing nhc") + _logger.info("installing NHC") r = subprocess.check_output( ["make", "install"], cwd=tmpdir, env=env, stderr=subprocess.STDOUT, text=True ) _logger.debug(r) except subprocess.CalledProcessError as e: - _logger.error("failed to install nhc. reason: %s", e) + _logger.error("failed to install NHC. reason: %s", e) raise - # Write the nhc.conf following nhc installation. + # Write the nhc.conf following NHC installation. generate_config() diff --git a/charms/slurmdbd/requirements.txt b/charms/slurmdbd/requirements.txt index d70c278..d0114ed 100644 --- a/charms/slurmdbd/requirements.txt +++ b/charms/slurmdbd/requirements.txt @@ -1,2 +1,2 @@ -ops==2.17.1 +ops~=2.19 slurmutils<1.0.0,>=0.11.0 diff --git a/charms/slurmrestd/requirements.txt b/charms/slurmrestd/requirements.txt index d70c278..d0114ed 100644 --- a/charms/slurmrestd/requirements.txt +++ b/charms/slurmrestd/requirements.txt @@ -1,2 +1,2 @@ -ops==2.17.1 +ops~=2.19 slurmutils<1.0.0,>=0.11.0 diff --git a/repository.py b/repository.py index b967461..cdac559 100755 --- a/repository.py +++ b/repository.py @@ -194,6 +194,13 @@ def clean_charm( """ logger.debug(f"Removing {charm.build_path}") if not dry_run: + try: + subprocess.run(["charmcraft", "clean"], cwd=charm.build_path, check=True) + except FileNotFoundError as e: + logger.info("ignoring charm %s which is not staged", charm.path.name) + except subprocess.CalledProcessError as e: + logger.warning("`charmcraft clean` failed for charm %s. cause: %s", charm.path.name, e) + logger.warning("some LXD instances may remain on the system") shutil.rmtree(charm.build_path, ignore_errors=True) charm.charm_path.unlink(missing_ok=True) From d9a878745ea23306f0bc145f303d1b1aa901700a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Tue, 4 Mar 2025 11:13:56 -0600 Subject: [PATCH 2/2] ci: free image space --- .github/workflows/ci.yaml | 6 ++++++ .github/workflows/release.yaml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 57a49a2..deaa699 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -79,6 +79,12 @@ jobs: - unit-test - type-check steps: + - name: Remove unnecessary files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" - name: Checkout uses: actions/checkout@v4 - name: Install dependencies diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index f79b716..1dea11f 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -33,6 +33,12 @@ jobs: matrix: charm: [slurmctld, slurmd, slurmdbd, slurmrestd, sackd] steps: + - name: Remove unnecessary files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" - name: Checkout uses: actions/checkout@v3 - name: Select Charmhub channel