forked from darshan-hpc/darshan
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* this is a Python/pandas-only version of doing some simple derived metrics; I don't think we'll actually do this, but I was exploring a bit because of the difficulties in darshan-hpcgh-839 * this matches pretty well with the `perl` based reports for total bytes, but even simple cases can sometimes disagree on bandwidth per darshan-hpcgh-847, so now I'm curious what is going on * one problem with doing this is that we'd have the same algorithms implemented in two different languages; the advantages include: - not reading all the records in a second time, crossing the CFFI boundary each time - easier to debug/maintain because bounds checking/no segfaults, etc.
- Loading branch information
1 parent
9731f44
commit 67dd29e
Showing
2 changed files
with
44 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
def perf_estimate(report, mod_name: str): | ||
data = report.data["records"][mod_name].to_df() | ||
counters_df = data["counters"] | ||
fcounters_df = data["fcounters"] | ||
# the old perl reports used MiB so doing | ||
# that here for consistency, though I note | ||
# that it might be more natural to use a library | ||
# like humanize to automatically select i.e., GiB | ||
# depending on magnitude | ||
mod_name_adjusted = mod_name.replace("-", "") | ||
total_mebibytes = (counters_df[f"{mod_name_adjusted}_BYTES_WRITTEN"].sum() | ||
+ counters_df[f"{mod_name_adjusted}_BYTES_READ"].sum()) / (2 ** 20) | ||
total_rw_time = (fcounters_df[f"{mod_name_adjusted}_F_READ_TIME"].sum() + | ||
fcounters_df[f"{mod_name_adjusted}_F_WRITE_TIME"].sum()) | ||
mebibytes_per_sec = total_mebibytes / total_rw_time | ||
# construct a string similar to the one used in perl reports, | ||
# matching in precision of reported values | ||
# TODO: resolve discrepancy noted in gh-847 vs. perl | ||
# reports on the bandwidth calculation (even for single record logs!) | ||
io_perf_string = (f"I/O performance estimate (at the {mod_name} layer): " | ||
f"transferred {total_mebibytes:.1f} MiB at {mebibytes_per_sec:.2f} MiB/s") | ||
return io_perf_string |
22 changes: 22 additions & 0 deletions
22
darshan-util/pydarshan/darshan/tests/test_derived_metrics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import darshan | ||
from darshan.log_utils import get_log_path | ||
from darshan import derived_metrics | ||
|
||
import pytest | ||
|
||
|
||
@pytest.mark.parametrize("log_name, module, expected", [ | ||
# expected strings are copy-pasted from the old | ||
# perl reports | ||
("imbalanced-io.darshan", | ||
"STDIO", | ||
"I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), | ||
("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", | ||
"STDIO", | ||
"I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), | ||
]) | ||
def test_perf_estimate(log_name, module, expected): | ||
log_path = get_log_path(log_name) | ||
report = darshan.DarshanReport(log_path, read_all=True) | ||
actual = derived_metrics.perf_estimate(report=report, mod_name=module) | ||
assert actual == expected |