Skip to content

Commit

Permalink
fix: Improved readability, ran experiment with API
Browse files Browse the repository at this point in the history
and fixed errors
  • Loading branch information
apokryphosx committed Mar 3, 2025
1 parent 436b588 commit 0fc67b7
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions examples/benchmarks/math_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,24 @@

# Set up the agent to be benchmarked
agent = ChatAgent()
data_dir = Path("MATHDataset")
save_to = data_dir / "MATHResults.jsonl"

# Set up the Hendrykson MATH Benchmark
benchmark = MATHBenchmark(data_dir=str(data_dir), save_to=str(save_to))
benchmark = MATHBenchmark(
data_dir="MATH-Data", save_to="MATHResults"
)
benchmark.download()

# TODO run benchmark with API Key to get the value for correct answers
result = benchmark.run(agent, on="test", subset=20, mode=Mode("pass@k", 1))
print("Total:", result["total"])
print("Correct:", result["correct"])
# Run the benchmark to get results
benchmark = benchmark.run(agent, on="test", subset=10)

total_answers = len(benchmark.results)
correct_answers = sum(row["correct"] for row in benchmark.results)

print("Total:", total_answers)
print("Correct:", correct_answers)
'''
===============================================================================
Total: 20
Correct: ?
Total: 10
Correct: 9
===============================================================================
'''
'''

0 comments on commit 0fc67b7

Please sign in to comment.