From bf0a45aabcd7267fe6d21be2b0891100b78b34f1 Mon Sep 17 00:00:00 2001
From: Chloe Martin <chloedaughterofmars@gmail.com>
Date: Tue, 10 Dec 2024 13:34:31 -0500
Subject: [PATCH 1/3] chore(CI): Refactor and re-enable simulator nightly

---
 .github/workflows/simulator_nightly.yml | 175 ++++--------------------
 1 file changed, 29 insertions(+), 146 deletions(-)

diff --git a/.github/workflows/simulator_nightly.yml b/.github/workflows/simulator_nightly.yml
index 234a9013788..36ea6ee8778 100644
--- a/.github/workflows/simulator_nightly.yml
+++ b/.github/workflows/simulator_nightly.yml
@@ -20,159 +20,42 @@ concurrency:
   group: ${{ github.workflow }}
 
 env:
-  IOTA_REF: "${{ github.event.inputs.iota_ref || 'main' }}"
+  IOTA_REF: "${{ github.event.inputs.iota_ref || 'develop' }}"
   TEST_NUM: "${{ github.event.inputs.test_num || '30' }}"
+  CARGO_TERM_COLOR: always
+  # Disable incremental compilation.
+  #
+  # Incremental compilation is useful as part of an edit-build-test-edit cycle,
+  # as it lets the compiler avoid recompiling code that hasn't changed. However,
+  # on CI, we're not making small edits; we're almost always building the entire
+  # project from scratch. Thus, incremental compilation on CI actually
+  # introduces *additional* overhead to support making future builds
+  # faster...but no future builds will ever occur in any given CI environment.
+  #
+  # See https://matklad.github.io/2021/09/04/fast-rust-builds.html#ci-workflow
+  # for details.
+  CARGO_INCREMENTAL: 0
+  # Allow more retries for network requests in cargo (downloading crates) and
+  # rustup (installing toolchains). This should help to reduce flaky CI failures
+  # from transient network timeouts or other issues.
+  CARGO_NET_RETRY: 10
+  RUSTUP_MAX_RETRIES: 10
+  # Don't emit giant backtraces in the CI logs.
+  RUST_BACKTRACE: short
+  RUST_LOG: off
+  NUM_CPUS: 24
 
 jobs:
   simtest:
     timeout-minutes: 240
-    permissions:
-      # The "id-token: write" permission is required or Machine ID will not be
-      # able to authenticate with the cluster.
-      id-token: write
-      contents: read
-    runs-on: self-hosted
+    runs-on: [self-hosted]
 
     steps:
-      - name: Install Teleport
-        uses: teleport-actions/setup@176c25dfcd19cd31a252f275d579822b243e7b9c # v1.0.6
+      - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
         with:
-          version: 11.3.1
-      - name: Authorize against Teleport
-        id: auth
-        uses: teleport-actions/auth@685adaf480dc79262a99220eb158a92136d5abd9 # v2.0.3
+          ref: ${{ env.IOTA_REF }}
+      - uses: taiki-e/install-action@375e0c7f08a66b8c2ba7e7eef31a6f91043a81b0 # v2.44.38
         with:
-          # Specify the publically accessible address of your Teleport proxy.
-          proxy: proxy.iota-int.com:443
-          # Specify the name of the join token for your bot.
-          token: iota-simtest-token
-          # Specify the length of time that the generated credentials should be
-          # valid for. This is optional and defaults to "1h"
-          certificate-ttl: 2h
-
-      # Cargo clean and git restore on any left over files from git checkout, and deletes all remote tracking branches
-      - name: Environment clean
-        run: |
-          tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && rm -rf ~/iota"
-          tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/ && git clone git@github.com:iotaledger/iota.git"
-
-      # Deleting files in tmpfs that usually fill up pretty quickly after each run. Cargo clean to free up space as well.
-      - name: Tmpfs and cargo clean
-        run: |
-          tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@simtest-01 "sudo rm -rf /tmp/*"
-          tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/iota && cargo clean"
-
-      # Checkout out the latest iota repo
-      - name: Checkout iota repo
-        run: |
-          tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 10 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/iota && git fetch origin && git checkout ${{ env.IOTA_REF }}"
-
-      # Setting up cargo and simtest
-      - name: Install simtest
-        run: |
-          tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 10 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/iota && ./scripts/simtest/install.sh"
-
-      # Run simulator tests
+          tool: nextest
       - name: Run simtest
-        run: |
-          tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 120 ssh ubuntu@simtest-01 "source ~/.bashrc && source ~/.cargo/env && cd ~/iota && RUSTUP_MAX_RETRIES=10 CARGO_TERM_COLOR=always CARGO_INCREMENTAL=0 CARGO_NET_RETRY=10 RUST_BACKTRACE=short RUST_LOG=off NUM_CPUS=24 TEST_NUM=${{ env.TEST_NUM }} ./scripts/simtest/simtest-run.sh"
-
-  notify:
-    name: Notify
-    needs: [simtest]
-    runs-on: self-hosted
-    if: github.event_name == 'schedule' && failure()
-
-    steps:
-      - uses: technote-space/workflow-conclusion-action@45ce8e0eb155657ab8ccf346ade734257fd196a5 # v3.0.3
-
-      - name: Checkout iota repo develop branch
-        uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
-
-      - name: Get iota commit
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          export iota_sha=$(git rev-parse HEAD)
-          echo "iota_sha=${iota_sha}" >> $GITHUB_ENV
-
-      - name: Get link to logs
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          gh_job_link=$(gh api -X GET 'repos/iotaledger/iota/actions/runs/${{ github.run_id }}/jobs' --jq '.jobs.[0].html_url')
-          echo "gh_job_link=${gh_job_link}" >> $GITHUB_ENV
-
-      - name: Get current oncall
-        run: |
-          export current_oncall=$(curl -s --request GET \
-            --url 'https://api.pagerduty.com/oncalls?schedule_ids[]=PGCQ3YS' \
-            --header 'Accept: application/json' \
-            --header 'Authorization: Token token=${{ secrets.PAGERDUTY_ACCESS_KEY }}' \
-            --header 'Content-Type: application/json' \
-            | jq '.oncalls[].user.summary' | tr -d '"')
-          echo "current_oncall=$(echo ${current_oncall})" >> $GITHUB_ENV
-
-          export oncall_name=$(curl -s --request GET \
-            --url 'https://api.pagerduty.com/oncalls?schedule_ids[]=PGCQ3YS' \
-            --header 'Accept: application/json' \
-            --header 'Authorization: Token token=${{ secrets.PAGERDUTY_ACCESS_KEY }}' \
-            --header 'Content-Type: application/json' \
-            | jq '.oncalls[].escalation_policy.summary' | tr -d '"')
-          echo "oncall_name=$(echo ${oncall_name})" >> $GITHUB_ENV
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
-        with:
-          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          aws-region: us-west-2
-
-      - name: Get slack id for the oncall
-        run: |
-          export slack_id=$(aws s3 cp s3://iota-employees-dir/employees.json - | jq --arg ONCALL "${{ env.current_oncall }}" '.[] | if .name == $ONCALL then .slack_id else empty end')
-          echo "slack_id=$(echo ${slack_id} | tr -d '"')" >> $GITHUB_ENV
-
-      - name: Post to slack
-        uses: slackapi/slack-github-action@37ebaef184d7626c5f204ab8d3baff4262dd30f0 # v1.27.0
-        env:
-          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-          IOTA_SHA: ${{ env.iota_sha }}
-          GH_JOB_LINK: ${{ env.gh_job_link }}
-          SLACK_ID: ${{ env.slack_id }}
-          ONCALL_NAME: ${{ env.oncall_name }}
-        with:
-          channel-id: "simtest-nightly"
-          payload: |
-            {
-              "text": "*${{ github.workflow }}* workflow status: `${{ env.WORKFLOW_CONCLUSION }}`",
-              "blocks": [
-                {
-                  "type": "section",
-                  "text": {
-                    "type": "mrkdwn",
-                    "text": "*${{ github.workflow }}* workflow status: `${{ env.WORKFLOW_CONCLUSION }}`"
-                  }
-                },
-                {
-                  "type": "divider"
-                },
-                {
-                  "type": "section",
-                  "text": {
-                    "type": "mrkdwn",
-                    "text": "IOTA commit: <https://github.com/iotaledger/iota/commit/${{ env.IOTA_SHA }}|${{ env.IOTA_SHA }}> \nRun: <${{ env.GH_JOB_LINK }}|${{ github.run_id }}>"
-                  }
-                },
-                {
-                  "type": "divider"
-                },
-                {
-                  "type": "section",
-                  "text": {
-                    "type": "mrkdwn",
-                    "text": "<@${{ env.SLACK_ID }}>, current `${{ env.ONCALL_NAME }}` oncall, please debug failures: `tsh ssh ubuntu@simtest-01` and look in the `/home/ubuntu/simtest_logs/{date}` folder for test results"
-                  }
-                }
-              ]
-            }
+        run: scripts/simtest/simtest-run.sh

From 1ef1b73b5717e21aeca2a7eb885626512fb06e1b Mon Sep 17 00:00:00 2001
From: Chloe Martin <chloedaughterofmars@gmail.com>
Date: Tue, 10 Dec 2024 13:37:21 -0500
Subject: [PATCH 2/3] change default ref

---
 .github/workflows/simulator_nightly.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/simulator_nightly.yml b/.github/workflows/simulator_nightly.yml
index 36ea6ee8778..8084dbc229c 100644
--- a/.github/workflows/simulator_nightly.yml
+++ b/.github/workflows/simulator_nightly.yml
@@ -9,7 +9,7 @@ on:
         description: "Branch / commit to test"
         type: string
         required: true
-        default: main
+        default: develop
       test_num:
         description: "MSIM_TEST_NUM (test iterations)"
         type: string

From a7af5bb560c421ea882d34157423e3db22f48660 Mon Sep 17 00:00:00 2001
From: Chloe Martin <chloedaughterofmars@gmail.com>
Date: Thu, 12 Dec 2024 13:40:02 -0500
Subject: [PATCH 3/3] Combine nightly workflows

---
 .github/workflows/_rust_tests.yml       |  4 ++
 .github/workflows/nightly.yml           | 30 ++++++++++++
 .github/workflows/simulator_nightly.yml | 61 -------------------------
 3 files changed, 34 insertions(+), 61 deletions(-)
 delete mode 100644 .github/workflows/simulator_nightly.yml

diff --git a/.github/workflows/_rust_tests.yml b/.github/workflows/_rust_tests.yml
index da2303c56e4..cda9c4a5bfd 100644
--- a/.github/workflows/_rust_tests.yml
+++ b/.github/workflows/_rust_tests.yml
@@ -6,6 +6,9 @@ on:
       changedCrates:
         type: string
         required: false
+      runSimtest:
+        type: boolean
+        default: true
 
 concurrency:
   group: rust-tests-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -118,6 +121,7 @@ jobs:
 
   simtest:
     name: Simtest rust
+    if: inputs.runSimtest
     timeout-minutes: 45
     runs-on: [self-hosted]
     env:
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 27268340ad9..3e02cfc233f 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -4,6 +4,17 @@ on:
   schedule:
     - cron: "0 0 * * *" # every day at midnight
   workflow_dispatch:
+    inputs:
+      iota_ref:
+        description: "Branch / commit to simtest"
+        type: string
+        required: true
+        default: develop
+      test_num:
+        description: "MSIM_TEST_NUM (test iterations)"
+        type: string
+        required: false
+        default: "30"
 
 env:
   BINARY_LIST_FILE: "./binary-build-list.json"
@@ -27,6 +38,8 @@ env:
   RUSTUP_MAX_RETRIES: 10
   # Don't emit giant backtraces in the CI logs.
   RUST_BACKTRACE: short
+  IOTA_REF: "${{ github.event.inputs.iota_ref || 'develop' }}"
+  TEST_NUM: "${{ github.event.inputs.test_num || '30' }}"
 
 jobs:
   release:
@@ -61,6 +74,9 @@ jobs:
 
   tests:
     uses: ./.github/workflows/_rust_tests.yml
+    with:
+      # simtest job below runs a superset of these tests
+      runSimtest: false
 
   external-tests:
     uses: ./.github/workflows/_external_rust_tests.yml
@@ -84,3 +100,17 @@ jobs:
 
   split-cluster:
     uses: ./.github/workflows/split_cluster.yml
+
+  simtest:
+    timeout-minutes: 240
+    runs-on: [self-hosted]
+
+    steps:
+      - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
+        with:
+          ref: ${{ env.IOTA_REF }}
+      - uses: taiki-e/install-action@375e0c7f08a66b8c2ba7e7eef31a6f91043a81b0 # v2.44.38
+        with:
+          tool: nextest
+      - name: Run simtest
+        run: scripts/simtest/simtest-run.sh
diff --git a/.github/workflows/simulator_nightly.yml b/.github/workflows/simulator_nightly.yml
deleted file mode 100644
index 8084dbc229c..00000000000
--- a/.github/workflows/simulator_nightly.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-name: Simulator Tests
-
-on:
-  schedule:
-    - cron: "0 9 * * *" # UTC timing is every day at 1am PST
-  workflow_dispatch:
-    inputs:
-      iota_ref:
-        description: "Branch / commit to test"
-        type: string
-        required: true
-        default: develop
-      test_num:
-        description: "MSIM_TEST_NUM (test iterations)"
-        type: string
-        required: false
-        default: "30"
-
-concurrency:
-  group: ${{ github.workflow }}
-
-env:
-  IOTA_REF: "${{ github.event.inputs.iota_ref || 'develop' }}"
-  TEST_NUM: "${{ github.event.inputs.test_num || '30' }}"
-  CARGO_TERM_COLOR: always
-  # Disable incremental compilation.
-  #
-  # Incremental compilation is useful as part of an edit-build-test-edit cycle,
-  # as it lets the compiler avoid recompiling code that hasn't changed. However,
-  # on CI, we're not making small edits; we're almost always building the entire
-  # project from scratch. Thus, incremental compilation on CI actually
-  # introduces *additional* overhead to support making future builds
-  # faster...but no future builds will ever occur in any given CI environment.
-  #
-  # See https://matklad.github.io/2021/09/04/fast-rust-builds.html#ci-workflow
-  # for details.
-  CARGO_INCREMENTAL: 0
-  # Allow more retries for network requests in cargo (downloading crates) and
-  # rustup (installing toolchains). This should help to reduce flaky CI failures
-  # from transient network timeouts or other issues.
-  CARGO_NET_RETRY: 10
-  RUSTUP_MAX_RETRIES: 10
-  # Don't emit giant backtraces in the CI logs.
-  RUST_BACKTRACE: short
-  RUST_LOG: off
-  NUM_CPUS: 24
-
-jobs:
-  simtest:
-    timeout-minutes: 240
-    runs-on: [self-hosted]
-
-    steps:
-      - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
-        with:
-          ref: ${{ env.IOTA_REF }}
-      - uses: taiki-e/install-action@375e0c7f08a66b8c2ba7e7eef31a6f91043a81b0 # v2.44.38
-        with:
-          tool: nextest
-      - name: Run simtest
-        run: scripts/simtest/simtest-run.sh