Skip to content

Commit 7ad5c0e

Browse files
authored
Merge branch 'develop' into compilation-gpu-burn-with-hip
2 parents 79046da + 001ac57 commit 7ad5c0e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+4826
-2114
lines changed
+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
services:
2+
munge-key-generator:
3+
image: ghcr.io/reframe-hpc/munge-ubuntu:20.04
4+
hostname: munge-host
5+
healthcheck:
6+
test: ["CMD-SHELL", "test -f /scratch/munge.key"]
7+
interval: 10s
8+
timeout: 10s
9+
retries: 5
10+
volumes:
11+
- shared-scratch:/scratch
12+
13+
frontend:
14+
image: slurm-reframe
15+
container_name: frontend
16+
build:
17+
dockerfile: .github/pseudo-cluster/reframe/Dockerfile
18+
context: ../../
19+
hostname: login
20+
user: admin
21+
init: True
22+
volumes:
23+
- shared-home:/home/admin:rw
24+
- shared-scratch:/scratch:rw
25+
links:
26+
- slurm-master
27+
depends_on:
28+
munge-key-generator:
29+
condition: service_healthy
30+
slurm-master:
31+
condition: service_started
32+
node0:
33+
condition: service_started
34+
node1:
35+
condition: service_started
36+
node2:
37+
condition: service_started
38+
environment:
39+
- SLURM_CPUS_ON_NODE=1
40+
- BACKEND=${BACKEND:-squeue}
41+
42+
slurm-master:
43+
image: ghcr.io/reframe-hpc/slurm-master-ubuntu:20.04
44+
hostname: slurm-master
45+
user: admin
46+
volumes:
47+
- shared-home:/home/admin
48+
- shared-scratch:/scratch:rw
49+
depends_on:
50+
munge-key-generator:
51+
condition: service_healthy
52+
environment:
53+
- SLURM_CPUS_ON_NODE=1
54+
55+
node0:
56+
image: ghcr.io/reframe-hpc/slurm-node-ubuntu:20.04
57+
hostname: nid00
58+
container_name: slurm-node0
59+
user: admin
60+
volumes:
61+
- shared-home:/home/admin
62+
- shared-scratch:/scratch:rw
63+
environment:
64+
- SLURM_NODENAME=nid00
65+
- SLURM_CPUS_ON_NODE=1
66+
depends_on:
67+
munge-key-generator:
68+
condition: service_healthy
69+
slurm-master:
70+
condition: service_started
71+
links:
72+
- slurm-master
73+
74+
node1:
75+
image: ghcr.io/reframe-hpc/slurm-node-ubuntu:20.04
76+
hostname: nid01
77+
container_name: slurm-node1
78+
user: admin
79+
volumes:
80+
- shared-home:/home/admin
81+
- shared-scratch:/scratch:rw
82+
environment:
83+
- SLURM_NODENAME=nid01
84+
- SLURM_CPUS_ON_NODE=1
85+
depends_on:
86+
munge-key-generator:
87+
condition: service_healthy
88+
slurm-master:
89+
condition: service_started
90+
links:
91+
- slurm-master
92+
93+
node2:
94+
image: ghcr.io/reframe-hpc/slurm-node-ubuntu:20.04
95+
hostname: nid02
96+
container_name: slurm-node2
97+
user: admin
98+
volumes:
99+
- shared-home:/home/admin
100+
- shared-scratch:/scratch:rw
101+
environment:
102+
- SLURM_NODENAME=nid02
103+
- SLURM_CPUS_ON_NODE=1
104+
depends_on:
105+
munge-key-generator:
106+
condition: service_healthy
107+
slurm-master:
108+
condition: service_started
109+
links:
110+
- slurm-master
111+
112+
volumes:
113+
shared-home:
114+
shared-scratch:
+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
FROM ubuntu:20.04
2+
3+
ARG DEBIAN_FRONTEND=noninteractive
4+
5+
RUN apt update -y && \
6+
apt install -y \
7+
build-essential \
8+
clang jq libomp-dev tree vim \
9+
git \
10+
mariadb-client \
11+
munge \
12+
slurm-client \
13+
slurm-wlm-torque \
14+
sudo \
15+
python3 \
16+
python3-pip \
17+
wget \
18+
curl \
19+
mpich \
20+
libmpich-dev && \
21+
rm -rf /var/lib/apt/lists/*
22+
23+
RUN useradd -m admin -s /usr/bin/bash -d /home/admin && \
24+
echo "admin:admin" | chpasswd && adduser admin sudo && \
25+
echo "admin ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
26+
27+
COPY .github/pseudo-cluster/reframe/slurm.conf /etc/slurm-llnl/
28+
COPY .github/pseudo-cluster/reframe/cgroup.conf /etc/slurm-llnl/
29+
COPY .github/pseudo-cluster/reframe/docker-entrypoint.sh /etc/slurm-llnl/
30+
COPY . /usr/local/share/reframe
31+
32+
RUN mkdir /scratch && \
33+
chown -R admin:admin /scratch
34+
35+
RUN chmod +rx /etc/slurm-llnl/docker-entrypoint.sh
36+
37+
WORKDIR /home/admin
38+
39+
ENV USER admin
40+
ENV SHELL bash
41+
42+
ENTRYPOINT ["/etc/slurm-llnl/docker-entrypoint.sh"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../examples/tutorial/dockerfiles/slurm-cluster/reframe/cgroup.conf
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
trap exit 0 INT
4+
5+
while [ ! -f /scratch/munge.key ]
6+
do
7+
sleep 1
8+
done
9+
10+
sudo cp /scratch/munge.key /etc/munge/munge.key
11+
sudo service munge start
12+
sudo sed -i "s/REPLACE_IT/CPUs=${SLURM_CPUS_ON_NODE}/g" /etc/slurm-llnl/slurm.conf
13+
14+
# Needs to be copied in the shared home directory
15+
cp -r /usr/local/share/reframe .
16+
cd reframe
17+
./bootstrap.sh
18+
19+
echo "Running unittests with backend scheduler: ${BACKEND}"
20+
21+
tempdir=$(mktemp -d -p /scratch)
22+
TMPDIR=$tempdir ./test_reframe.py -v \
23+
--rfm-user-config=ci-scripts/configs/ci-cluster.py \
24+
--rfm-user-system=pseudo-cluster:compute-${BACKEND:-squeue}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../examples/tutorial/dockerfiles/slurm-cluster/reframe/slurm.conf
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: ReFrame CI / Scheduler backend tests
2+
on:
3+
pull_request: []
4+
5+
jobs:
6+
scheduler-test:
7+
runs-on: ubuntu-latest
8+
strategy:
9+
matrix:
10+
scheduler: ['pbs', 'squeue', 'torque']
11+
steps:
12+
- uses: actions/checkout@v4
13+
- name: Login to GitHub Container Registry
14+
uses: docker/login-action@v2
15+
with:
16+
registry: ghcr.io
17+
username: ${{ github.actor }}
18+
password: ${{ secrets.GITHUB_TOKEN }}
19+
- name: Build Images
20+
run: |
21+
docker compose -f .github/pseudo-cluster/docker-compose.yml build
22+
- name: Run Unittests with ${{ matrix.scheduler }} sceduler
23+
run: |
24+
BACKEND=${{ matrix.scheduler }} docker compose -f .github/pseudo-cluster/docker-compose.yml up --abort-on-container-exit --exit-code-from frontend

Jenkinsfile

-169
This file was deleted.

0 commit comments

Comments
 (0)