Skip to content

[WIP] Datafusion 46 support #4956

[WIP] Datafusion 46 support

[WIP] Datafusion 46 support #4956

Workflow file for this run

name: PR Benchmarks
on:
pull_request:
types: [ labeled, synchronize ]
branches: [ "develop" ]
workflow_dispatch: { }
permissions:
actions: write
contents: read
pull-requests: write
id-token: write
jobs:
label_trigger:
runs-on: ubuntu-latest
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
steps:
# We remove the benchmark label first so that the workflow can be re-triggered.
- uses: actions-ecosystem/action-remove-labels@v1
with:
labels: benchmark
bench:
needs: label_trigger
runs-on: [ self-hosted, gcp ]
strategy:
matrix:
benchmark:
- id: random_access
name: Random Access
- id: compress
name: Compression
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- uses: ./.github/actions/setup-rust
# The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
- name: Install DuckDB
uses: opt-nc/setup-duckdb-action@v1.0.11
if: runner.environment != 'self-hosted'
with:
version: v1.0.0
- name: Set tempdir
if: runner.environment == 'self-hosted'
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Run ${{ matrix.benchmark.name }} benchmark
shell: bash
env:
RUSTFLAGS: '-C target-cpu=native'
run: |
cargo run --bin ${{ matrix.benchmark.id }} --release -- -d gh-json | tee ${{ matrix.benchmark.id }}.json
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Compare results
shell: bash
run: |
set -Eeu -o pipefail -x
base_commit_sha=${{ github.event.pull_request.base.sha }}
aws s3 cp s3://vortex-benchmark-results-database/data.json - \
| grep $base_commit_sha \
> base.json
echo '# Benchmarks: ${{ matrix.benchmark.id }}' > comment.md
echo '<details>' >> comment.md
echo '<summary>Table of Results</summary>' >> comment.md
echo '' >> comment.md
uv run scripts/compare-benchmark-jsons.py base.json ${{ matrix.benchmark.id }}.json \
>> comment.md
echo '</details>' >> comment.md
- name: Comment PR
uses: thollander/actions-comment-pull-request@v3
with:
file-path: comment.md
comment-tag: bench-pr-comment-${{ matrix.benchmark.id }}
sql:
needs: label_trigger
uses: ./.github/workflows/sql-benchmarks.yml
secrets: inherit
with:
mode: 'pr'