Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPMI: Avoid duplicate example diffs in diffs summary #110619

Merged
merged 4 commits into from
Dec 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions src/coreclr/scripts/superpmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1954,7 +1954,7 @@ def aggregate_diff_metrics(details_file):
# Project out these fields for the saved diffs, to use for further
# processing. Saving everything into memory is costly on memory when there
# are a large number of diffs.
diffs_fields = ["Context", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"]
diffs_fields = ["Context", "Method full name", "Context size", "Base ActualCodeBytes", "Diff ActualCodeBytes", "Base PerfScore", "Diff PerfScore"]
diffs = []

for row in read_csv(details_file):
Expand Down Expand Up @@ -2725,12 +2725,40 @@ def diff_pct(r):

display_subset("Smallest {} zero sized diffs:", smallest_zero_size_contexts)

by_diff_size_pct_examples = [diff for diff in by_diff_size_pct if abs(int(diff['Diff ActualCodeBytes']) - int(diff['Base ActualCodeBytes'])) < 50]
if len(by_diff_size_pct_examples) == 0:
by_diff_size_pct_examples = by_diff_size_pct
# Prefer to show small diffs over large percentage wise diffs; sort by this additionally.
# sorted is stable, so for multiple small diffs this will keep them in order of percentage wise improvement/regression.
def is_small_diff(row):
if abs(int(row['Diff ActualCodeBytes']) - int(row['Base ActualCodeBytes'])) < 50:
return 0

return 1

by_small_then_improvement = sorted(by_diff_size_pct, key=is_small_diff)
by_small_then_regression = sorted(reversed(by_diff_size_pct), key=is_small_diff)

def pick_examples(diffs, picked_contexts):
seen = set()
def try_add_seen(row):
len_before = len(seen)
seen.add(row["Method full name"])
return len_before < len(seen)

result = []
for row in diffs:
if row["Context"] in picked_contexts or not try_add_seen(row):
continue

result.append(row)
if len(result) >= 3:
break

return result

example_improvements = pick_examples(by_small_then_improvement, set())

example_improvement_contexts = set(row["Context"] for row in example_improvements)
example_regressions = pick_examples(by_small_then_regression, example_improvement_contexts)

example_improvements = by_diff_size_pct_examples[:3]
example_regressions = by_diff_size_pct_examples[3:][-3:]
contexts = smallest_contexts + top_improvements + top_regressions + top_improvements_pct + top_regressions_pct + smallest_zero_size_contexts + example_improvements + example_regressions
examples = example_improvements + example_regressions

Expand Down
Loading