From bf0a45aabcd7267fe6d21be2b0891100b78b34f1 Mon Sep 17 00:00:00 2001 From: Chloe Martin Date: Tue, 10 Dec 2024 13:34:31 -0500 Subject: [PATCH 1/3] chore(CI): Refactor and re-enable simulator nightly --- .github/workflows/simulator_nightly.yml | 175 ++++-------------------- 1 file changed, 29 insertions(+), 146 deletions(-) diff --git a/.github/workflows/simulator_nightly.yml b/.github/workflows/simulator_nightly.yml index 234a9013788..36ea6ee8778 100644 --- a/.github/workflows/simulator_nightly.yml +++ b/.github/workflows/simulator_nightly.yml @@ -20,159 +20,42 @@ concurrency: group: ${{ github.workflow }} env: - IOTA_REF: "${{ github.event.inputs.iota_ref || 'main' }}" + IOTA_REF: "${{ github.event.inputs.iota_ref || 'develop' }}" TEST_NUM: "${{ github.event.inputs.test_num || '30' }}" + CARGO_TERM_COLOR: always + # Disable incremental compilation. + # + # Incremental compilation is useful as part of an edit-build-test-edit cycle, + # as it lets the compiler avoid recompiling code that hasn't changed. However, + # on CI, we're not making small edits; we're almost always building the entire + # project from scratch. Thus, incremental compilation on CI actually + # introduces *additional* overhead to support making future builds + # faster...but no future builds will ever occur in any given CI environment. + # + # See https://matklad.github.io/2021/09/04/fast-rust-builds.html#ci-workflow + # for details. + CARGO_INCREMENTAL: 0 + # Allow more retries for network requests in cargo (downloading crates) and + # rustup (installing toolchains). This should help to reduce flaky CI failures + # from transient network timeouts or other issues. + CARGO_NET_RETRY: 10 + RUSTUP_MAX_RETRIES: 10 + # Don't emit giant backtraces in the CI logs. + RUST_BACKTRACE: short + RUST_LOG: off + NUM_CPUS: 24 jobs: simtest: timeout-minutes: 240 - permissions: - # The "id-token: write" permission is required or Machine ID will not be - # able to authenticate with the cluster. - id-token: write - contents: read - runs-on: self-hosted + runs-on: [self-hosted] steps: - - name: Install Teleport - uses: teleport-actions/setup@176c25dfcd19cd31a252f275d579822b243e7b9c # v1.0.6 + - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: - version: 11.3.1 - - name: Authorize against Teleport - id: auth - uses: teleport-actions/auth@685adaf480dc79262a99220eb158a92136d5abd9 # v2.0.3 + ref: ${{ env.IOTA_REF }} + - uses: taiki-e/install-action@375e0c7f08a66b8c2ba7e7eef31a6f91043a81b0 # v2.44.38 with: - # Specify the publically accessible address of your Teleport proxy. - proxy: proxy.iota-int.com:443 - # Specify the name of the join token for your bot. - token: iota-simtest-token - # Specify the length of time that the generated credentials should be - # valid for. This is optional and defaults to "1h" - certificate-ttl: 2h - - # Cargo clean and git restore on any left over files from git checkout, and deletes all remote tracking branches - - name: Environment clean - run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && rm -rf ~/iota" - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/ && git clone git@github.com:iotaledger/iota.git" - - # Deleting files in tmpfs that usually fill up pretty quickly after each run. Cargo clean to free up space as well. - - name: Tmpfs and cargo clean - run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@simtest-01 "sudo rm -rf /tmp/*" - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/iota && cargo clean" - - # Checkout out the latest iota repo - - name: Checkout iota repo - run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 10 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/iota && git fetch origin && git checkout ${{ env.IOTA_REF }}" - - # Setting up cargo and simtest - - name: Install simtest - run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 10 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/iota && ./scripts/simtest/install.sh" - - # Run simulator tests + tool: nextest - name: Run simtest - run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 120 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/iota && RUSTUP_MAX_RETRIES=10 CARGO_TERM_COLOR=always CARGO_INCREMENTAL=0 CARGO_NET_RETRY=10 RUST_BACKTRACE=short RUST_LOG=off NUM_CPUS=24 TEST_NUM=${{ env.TEST_NUM }} ./scripts/simtest/simtest-run.sh" - - notify: - name: Notify - needs: [simtest] - runs-on: self-hosted - if: github.event_name == 'schedule' && failure() - - steps: - - uses: technote-space/workflow-conclusion-action@45ce8e0eb155657ab8ccf346ade734257fd196a5 # v3.0.3 - - - name: Checkout iota repo develop branch - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - - - name: Get iota commit - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - export iota_sha=$(git rev-parse HEAD) - echo "iota_sha=${iota_sha}" >> $GITHUB_ENV - - - name: Get link to logs - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh_job_link=$(gh api -X GET 'repos/iotaledger/iota/actions/runs/${{ github.run_id }}/jobs' --jq '.jobs.[0].html_url') - echo "gh_job_link=${gh_job_link}" >> $GITHUB_ENV - - - name: Get current oncall - run: | - export current_oncall=$(curl -s --request GET \ - --url 'https://api.pagerduty.com/oncalls?schedule_ids[]=PGCQ3YS' \ - --header 'Accept: application/json' \ - --header 'Authorization: Token token=${{ secrets.PAGERDUTY_ACCESS_KEY }}' \ - --header 'Content-Type: application/json' \ - | jq '.oncalls[].user.summary' | tr -d '"') - echo "current_oncall=$(echo ${current_oncall})" >> $GITHUB_ENV - - export oncall_name=$(curl -s --request GET \ - --url 'https://api.pagerduty.com/oncalls?schedule_ids[]=PGCQ3YS' \ - --header 'Accept: application/json' \ - --header 'Authorization: Token token=${{ secrets.PAGERDUTY_ACCESS_KEY }}' \ - --header 'Content-Type: application/json' \ - | jq '.oncalls[].escalation_policy.summary' | tr -d '"') - echo "oncall_name=$(echo ${oncall_name})" >> $GITHUB_ENV - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - - name: Get slack id for the oncall - run: | - export slack_id=$(aws s3 cp s3://iota-employees-dir/employees.json - | jq --arg ONCALL "${{ env.current_oncall }}" '.[] | if .name == $ONCALL then .slack_id else empty end') - echo "slack_id=$(echo ${slack_id} | tr -d '"')" >> $GITHUB_ENV - - - name: Post to slack - uses: slackapi/slack-github-action@37ebaef184d7626c5f204ab8d3baff4262dd30f0 # v1.27.0 - env: - SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - IOTA_SHA: ${{ env.iota_sha }} - GH_JOB_LINK: ${{ env.gh_job_link }} - SLACK_ID: ${{ env.slack_id }} - ONCALL_NAME: ${{ env.oncall_name }} - with: - channel-id: "simtest-nightly" - payload: | - { - "text": "*${{ github.workflow }}* workflow status: `${{ env.WORKFLOW_CONCLUSION }}`", - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "*${{ github.workflow }}* workflow status: `${{ env.WORKFLOW_CONCLUSION }}`" - } - }, - { - "type": "divider" - }, - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "IOTA commit: \nRun: <${{ env.GH_JOB_LINK }}|${{ github.run_id }}>" - } - }, - { - "type": "divider" - }, - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "<@${{ env.SLACK_ID }}>, current `${{ env.ONCALL_NAME }}` oncall, please debug failures: `tsh ssh ubuntu@simtest-01` and look in the `/home/ubuntu/simtest_logs/{date}` folder for test results" - } - } - ] - } + run: scripts/simtest/simtest-run.sh From 1ef1b73b5717e21aeca2a7eb885626512fb06e1b Mon Sep 17 00:00:00 2001 From: Chloe Martin Date: Tue, 10 Dec 2024 13:37:21 -0500 Subject: [PATCH 2/3] change default ref --- .github/workflows/simulator_nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/simulator_nightly.yml b/.github/workflows/simulator_nightly.yml index 36ea6ee8778..8084dbc229c 100644 --- a/.github/workflows/simulator_nightly.yml +++ b/.github/workflows/simulator_nightly.yml @@ -9,7 +9,7 @@ on: description: "Branch / commit to test" type: string required: true - default: main + default: develop test_num: description: "MSIM_TEST_NUM (test iterations)" type: string From a7af5bb560c421ea882d34157423e3db22f48660 Mon Sep 17 00:00:00 2001 From: Chloe Martin Date: Thu, 12 Dec 2024 13:40:02 -0500 Subject: [PATCH 3/3] Combine nightly workflows --- .github/workflows/_rust_tests.yml | 4 ++ .github/workflows/nightly.yml | 30 ++++++++++++ .github/workflows/simulator_nightly.yml | 61 ------------------------- 3 files changed, 34 insertions(+), 61 deletions(-) delete mode 100644 .github/workflows/simulator_nightly.yml diff --git a/.github/workflows/_rust_tests.yml b/.github/workflows/_rust_tests.yml index da2303c56e4..cda9c4a5bfd 100644 --- a/.github/workflows/_rust_tests.yml +++ b/.github/workflows/_rust_tests.yml @@ -6,6 +6,9 @@ on: changedCrates: type: string required: false + runSimtest: + type: boolean + default: true concurrency: group: rust-tests-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -118,6 +121,7 @@ jobs: simtest: name: Simtest rust + if: inputs.runSimtest timeout-minutes: 45 runs-on: [self-hosted] env: diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 27268340ad9..3e02cfc233f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -4,6 +4,17 @@ on: schedule: - cron: "0 0 * * *" # every day at midnight workflow_dispatch: + inputs: + iota_ref: + description: "Branch / commit to simtest" + type: string + required: true + default: develop + test_num: + description: "MSIM_TEST_NUM (test iterations)" + type: string + required: false + default: "30" env: BINARY_LIST_FILE: "./binary-build-list.json" @@ -27,6 +38,8 @@ env: RUSTUP_MAX_RETRIES: 10 # Don't emit giant backtraces in the CI logs. RUST_BACKTRACE: short + IOTA_REF: "${{ github.event.inputs.iota_ref || 'develop' }}" + TEST_NUM: "${{ github.event.inputs.test_num || '30' }}" jobs: release: @@ -61,6 +74,9 @@ jobs: tests: uses: ./.github/workflows/_rust_tests.yml + with: + # simtest job below runs a superset of these tests + runSimtest: false external-tests: uses: ./.github/workflows/_external_rust_tests.yml @@ -84,3 +100,17 @@ jobs: split-cluster: uses: ./.github/workflows/split_cluster.yml + + simtest: + timeout-minutes: 240 + runs-on: [self-hosted] + + steps: + - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + with: + ref: ${{ env.IOTA_REF }} + - uses: taiki-e/install-action@375e0c7f08a66b8c2ba7e7eef31a6f91043a81b0 # v2.44.38 + with: + tool: nextest + - name: Run simtest + run: scripts/simtest/simtest-run.sh diff --git a/.github/workflows/simulator_nightly.yml b/.github/workflows/simulator_nightly.yml deleted file mode 100644 index 8084dbc229c..00000000000 --- a/.github/workflows/simulator_nightly.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: Simulator Tests - -on: - schedule: - - cron: "0 9 * * *" # UTC timing is every day at 1am PST - workflow_dispatch: - inputs: - iota_ref: - description: "Branch / commit to test" - type: string - required: true - default: develop - test_num: - description: "MSIM_TEST_NUM (test iterations)" - type: string - required: false - default: "30" - -concurrency: - group: ${{ github.workflow }} - -env: - IOTA_REF: "${{ github.event.inputs.iota_ref || 'develop' }}" - TEST_NUM: "${{ github.event.inputs.test_num || '30' }}" - CARGO_TERM_COLOR: always - # Disable incremental compilation. - # - # Incremental compilation is useful as part of an edit-build-test-edit cycle, - # as it lets the compiler avoid recompiling code that hasn't changed. However, - # on CI, we're not making small edits; we're almost always building the entire - # project from scratch. Thus, incremental compilation on CI actually - # introduces *additional* overhead to support making future builds - # faster...but no future builds will ever occur in any given CI environment. - # - # See https://matklad.github.io/2021/09/04/fast-rust-builds.html#ci-workflow - # for details. - CARGO_INCREMENTAL: 0 - # Allow more retries for network requests in cargo (downloading crates) and - # rustup (installing toolchains). This should help to reduce flaky CI failures - # from transient network timeouts or other issues. - CARGO_NET_RETRY: 10 - RUSTUP_MAX_RETRIES: 10 - # Don't emit giant backtraces in the CI logs. - RUST_BACKTRACE: short - RUST_LOG: off - NUM_CPUS: 24 - -jobs: - simtest: - timeout-minutes: 240 - runs-on: [self-hosted] - - steps: - - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - with: - ref: ${{ env.IOTA_REF }} - - uses: taiki-e/install-action@375e0c7f08a66b8c2ba7e7eef31a6f91043a81b0 # v2.44.38 - with: - tool: nextest - - name: Run simtest - run: scripts/simtest/simtest-run.sh