Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[datasets] Update AnghaBench to v1. #242

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions compiler_gym/envs/llvm/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,25 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset
site_data_base = site_data_base or site_data_path("llvm-v0")

yield AnghaBenchDataset(site_data_base=site_data_base, sort_order=0)
# Add legacy version of Anghabench using an old manifest.
anghabench_v0_manifest_url, anghabench_v0_manifest_sha256 = {
"darwin": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2",
"39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1",
),
"linux": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2",
"a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477",
),
}[sys.platform]
yield AnghaBenchDataset(
name="benchmark://anghabench-v0",
site_data_base=site_data_base,
sort_order=0,
manifest_url=anghabench_v0_manifest_url,
manifest_sha256=anghabench_v0_manifest_sha256,
deprecated="Please use anghabench-v1",
)
yield BlasDataset(site_data_base=site_data_base, sort_order=0)
yield CLgenDataset(site_data_base=site_data_base, sort_order=0)
yield CBenchDataset(site_data_base=site_data_base, sort_order=-1)
Expand Down
28 changes: 19 additions & 9 deletions compiler_gym/envs/llvm/datasets/anghabench.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sys
from concurrent.futures import as_completed
from pathlib import Path
from typing import Optional

from compiler_gym.datasets import Benchmark, TarDatasetWithManifest
from compiler_gym.datasets.benchmark import BenchmarkWithSource
Expand Down Expand Up @@ -38,28 +39,36 @@ class AnghaBenchDataset(TarDatasetWithManifest):
overhead of compiling it from C to bitcode. This is a one-off cost.
"""

def __init__(self, site_data_base: Path, sort_order: int = 0):
manifest_url, manifest_sha256 = {
def __init__(
self,
site_data_base: Path,
sort_order: int = 0,
manifest_url: Optional[str] = None,
manifest_sha256: Optional[str] = None,
deprecated: Optional[str] = None,
name: Optional[str] = None,
):
manifest_url_, manifest_sha256_ = {
"darwin": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2",
"39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1",
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v1-macos-manifest.bz2",
"96ead63da5f8efa07fd0370f0c6e452b59bed840828b8b19402102b1ce3ee109",
),
"linux": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2",
"a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477",
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v1-linux-manifest.bz2",
"14df85f650199498cf769715e9f0d7841d09f9fa62a95b8ecc242bdaf227f33a",
),
}[sys.platform]
super().__init__(
name="benchmark://anghabench-v0",
name=name or "benchmark://anghabench-v1",
description="Compile-only C/C++ functions extracted from GitHub",
references={
"Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf",
"Homepage": "http://cuda.dcc.ufmg.br/angha/",
},
license="Unknown. See: https://github.com/brenocfg/AnghaBench/issues/1",
site_data_base=site_data_base,
manifest_urls=[manifest_url],
manifest_sha256=manifest_sha256,
manifest_urls=[manifest_url or manifest_url_],
manifest_sha256=manifest_sha256 or manifest_sha256_,
tar_urls=[
"https://github.com/brenocfg/AnghaBench/archive/d8034ac8562b8c978376008f4b33df01b8887b19.tar.gz"
],
Expand All @@ -68,6 +77,7 @@ def __init__(self, site_data_base: Path, sort_order: int = 0):
tar_compression="gz",
benchmark_file_suffix=".bc",
sort_order=sort_order,
deprecated=deprecated,
)

def benchmark(self, uri: str) -> Benchmark:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/llvm/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ We provide several datasets of open-source LLVM-IR benchmarks for use:
+----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
| Dataset | Num. Benchmarks [#f1]_ | Description | Validatable [#f2]_ |
+============================+==========================+====================================================================================================================================================================================================================+======================+
| benchmark://anghabench-v0 | 1,042,976 | Compile-only C/C++ functions extracted from GitHub [`Homepage <http://cuda.dcc.ufmg.br/angha/>`__, `Paper <https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf>`__] | No |
| benchmark://anghabench-v1 | 1,041,333 | Compile-only C/C++ functions extracted from GitHub [`Homepage <http://cuda.dcc.ufmg.br/angha/>`__, `Paper <https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf>`__] | No |
+----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
| benchmark://blas-v0 | 300 | Basic linear algebra kernels [`Homepage <http://www.netlib.org/blas/>`__, `Paper <https://strum355.netsoc.co/books/PDF/Basic%20Linear%20Algebra%20Subprograms%20for%20Fortran%20Usage%20-%20BLAS%20(1979).pdf>`__] | No |
+----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
Expand Down
14 changes: 7 additions & 7 deletions tests/llvm/datasets/anghabench_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,33 @@
def anghabench_dataset() -> AnghaBenchDataset:
env = gym.make("llvm-v0")
try:
ds = env.datasets["anghabench-v0"]
ds = env.datasets["anghabench-v1"]
finally:
env.close()
yield ds


def test_anghabench_size(anghabench_dataset: AnghaBenchDataset):
if sys.platform == "darwin":
assert anghabench_dataset.size == 1042908
assert anghabench_dataset.size == 1041265
else:
assert anghabench_dataset.size == 1042976
assert anghabench_dataset.size == 1041333


def test_missing_benchmark_name(anghabench_dataset: AnghaBenchDataset, mocker):
# Mock install() so that on CI it doesn't download and unpack the tarfile.
mocker.patch.object(anghabench_dataset, "install")

with pytest.raises(
LookupError, match=r"^No benchmark specified: benchmark://anghabench-v0$"
LookupError, match=r"^No benchmark specified: benchmark://anghabench-v1$"
):
anghabench_dataset.benchmark("benchmark://anghabench-v0")
anghabench_dataset.benchmark("benchmark://anghabench-v1")
anghabench_dataset.install.assert_called_once()

with pytest.raises(
LookupError, match=r"^No benchmark specified: benchmark://anghabench-v0/$"
LookupError, match=r"^No benchmark specified: benchmark://anghabench-v1/$"
):
anghabench_dataset.benchmark("benchmark://anghabench-v0/")
anghabench_dataset.benchmark("benchmark://anghabench-v1/")
assert anghabench_dataset.install.call_count == 2


Expand Down
2 changes: 1 addition & 1 deletion tests/llvm/datasets/llvm_datasets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_default_dataset_list():
try:
assert list(d.name for d in env.datasets) == [
"benchmark://cbench-v1",
"benchmark://anghabench-v0",
"benchmark://anghabench-v1",
"benchmark://blas-v0",
"benchmark://clgen-v0",
"benchmark://github-v0",
Expand Down