Skip to content

Commit ea369e9

Browse files
authored
Merge pull request ESMCI#4520 from ESMCI/azamat/baselines/update-perf-info
Update performance baseline checks Update performance baseline checks: add more details to TestStatus.log for base, comp, threshold tput+mem values append (not overwrite) to perf-baselines to keep history record sha, date, value of new blesses Test suite: PFS.ne30pg2_r05_oECv3.F2010.chrysalis_intel.bench-noio Test baseline: same Test namelist changes: none Test status: bit for bit User interface changes?: N Update gh-pages html (Y/N)?: N
2 parents 1c7eda6 + ce0ae65 commit ea369e9

File tree

5 files changed

+181
-144
lines changed

5 files changed

+181
-144
lines changed

CIME/SystemTests/system_tests_common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from CIME.locked_files import LOCKED_DIR, lock_file, is_locked
2929
from CIME.baselines.performance import (
3030
get_latest_cpl_logs,
31-
_perf_get_memory,
31+
perf_get_memory_list,
3232
perf_compare_memory_baseline,
3333
perf_compare_throughput_baseline,
3434
perf_write_baseline,
@@ -806,7 +806,7 @@ def perf_check_for_memory_leak(case, tolerance):
806806

807807
for cpllog in latestcpllogs:
808808
try:
809-
memlist = _perf_get_memory(case, cpllog)
809+
memlist = perf_get_memory_list(case, cpllog)
810810
except RuntimeError:
811811
return False, "insufficient data for memleak test"
812812

CIME/baselines/performance.py

+115-50
Original file line numberDiff line numberDiff line change
@@ -121,25 +121,25 @@ def perf_write_baseline(case, basegen_dir, throughput=True, memory=True):
121121

122122
if throughput:
123123
try:
124-
tput = perf_get_throughput(case, config)
124+
tput, mode = perf_get_throughput(case, config)
125125
except RuntimeError as e:
126126
logger.debug("Could not get throughput: {0!s}".format(e))
127127
else:
128128
baseline_file = os.path.join(basegen_dir, "cpl-tput.log")
129129

130-
write_baseline_file(baseline_file, tput)
130+
write_baseline_file(baseline_file, tput, mode)
131131

132132
logger.info("Updated throughput baseline to {!s}".format(tput))
133133

134134
if memory:
135135
try:
136-
mem = perf_get_memory(case, config)
136+
mem, mode = perf_get_memory(case, config)
137137
except RuntimeError as e:
138138
logger.info("Could not get memory usage: {0!s}".format(e))
139139
else:
140140
baseline_file = os.path.join(basegen_dir, "cpl-mem.log")
141141

142-
write_baseline_file(baseline_file, mem)
142+
write_baseline_file(baseline_file, mem, mode)
143143

144144
logger.info("Updated memory usage baseline to {!s}".format(mem))
145145

@@ -184,16 +184,11 @@ def perf_get_throughput(case, config):
184184
Model throughput.
185185
"""
186186
try:
187-
tput = config.perf_get_throughput(case)
187+
tput, mode = config.perf_get_throughput(case)
188188
except AttributeError:
189-
tput = _perf_get_throughput(case)
189+
tput, mode = _perf_get_throughput(case)
190190

191-
if tput is None:
192-
raise RuntimeError("Could not get default throughput") from None
193-
194-
tput = str(tput)
195-
196-
return tput
191+
return tput, mode
197192

198193

199194
def perf_get_memory(case, config):
@@ -215,19 +210,14 @@ def perf_get_memory(case, config):
215210
Model memory usage.
216211
"""
217212
try:
218-
mem = config.perf_get_memory(case)
213+
mem, mode = config.perf_get_memory(case)
219214
except AttributeError:
220-
mem = _perf_get_memory(case)
221-
222-
if mem is None:
223-
raise RuntimeError("Could not get default memory usage") from None
224-
225-
mem = str(mem[-1][1])
215+
mem, mode = _perf_get_memory(case)
226216

227-
return mem
217+
return mem, mode
228218

229219

230-
def write_baseline_file(baseline_file, value):
220+
def write_baseline_file(baseline_file, value, mode="a"):
231221
"""
232222
Writes value to `baseline_file`.
233223
@@ -237,13 +227,10 @@ def write_baseline_file(baseline_file, value):
237227
Path to the baseline file.
238228
value : str
239229
Value to write.
230+
mode : str
231+
Mode to open file with.
240232
"""
241-
commit_hash = get_current_commit(repo=get_src_root())
242-
243-
timestamp = get_timestamp(timestamp_format="%Y-%m-%d_%H:%M:%S")
244-
245-
with open(baseline_file, "w") as fd:
246-
fd.write(f"# sha:{commit_hash} date: {timestamp}\n")
233+
with open(baseline_file, mode) as fd:
247234
fd.write(value)
248235

249236

@@ -270,6 +257,17 @@ def _perf_get_memory(case, cpllog=None):
270257
RuntimeError
271258
If not enough sample were found.
272259
"""
260+
memlist = perf_get_memory_list(case, cpllog)
261+
262+
if memlist is None:
263+
raise RuntimeError("Could not get default memory usage") from None
264+
265+
value = _format_baseline(memlist[-1][1])
266+
267+
return value, "a"
268+
269+
270+
def perf_get_memory_list(case, cpllog):
273271
if cpllog is None:
274272
cpllog = get_latest_cpl_logs(case)
275273
else:
@@ -317,7 +315,12 @@ def _perf_get_throughput(case):
317315

318316
logger.debug("Could not parse throughput from coupler log")
319317

320-
return tput
318+
if tput is None:
319+
raise RuntimeError("Could not get default throughput") from None
320+
321+
value = _format_baseline(tput)
322+
323+
return value, "a"
321324

322325

323326
def get_latest_cpl_logs(case):
@@ -429,7 +432,7 @@ def read_baseline_file(baseline_file):
429432
Value stored in baseline file without comments.
430433
"""
431434
with open(baseline_file) as fd:
432-
lines = [x.strip() for x in fd.readlines() if not x.startswith("#")]
435+
lines = [x.strip() for x in fd.readlines() if not x.startswith("#") and x != ""]
433436

434437
return "\n".join(lines)
435438

@@ -456,13 +459,20 @@ def _perf_compare_throughput_baseline(case, baseline, tolerance):
456459
comment : str
457460
provides explanation from comparison.
458461
"""
459-
current = _perf_get_throughput(case)
462+
current, _ = _perf_get_throughput(case)
463+
464+
try:
465+
current = float(_parse_baseline(current))
466+
except (ValueError, TypeError):
467+
comment = "Could not compare throughput to baseline, as baseline had no value."
468+
469+
return None, comment
460470

461471
try:
462472
# default baseline is stored as single float
463-
baseline = float(baseline)
464-
except ValueError:
465-
comment = "Could not compare throughput to baseline, as basline had no value."
473+
baseline = float(_parse_baseline(baseline))
474+
except (ValueError, TypeError):
475+
comment = "Could not compare throughput to baseline, as baseline had no value."
466476

467477
return None, comment
468478

@@ -474,14 +484,13 @@ def _perf_compare_throughput_baseline(case, baseline, tolerance):
474484
if diff is not None:
475485
below_tolerance = diff < tolerance
476486

487+
info = "Throughput changed by {:.2f}%: baseline={:.3f} sypd, tolerance={:d}%, current={:.3f} sypd".format(
488+
diff * 100, baseline, int(tolerance * 100), current
489+
)
477490
if below_tolerance:
478-
comment = "TPUTCOMP: Computation time changed by {:.2f}% relative to baseline".format(
479-
diff * 100
480-
)
491+
comment = "TPUTCOMP: " + info
481492
else:
482-
comment = "Error: TPUTCOMP: Computation time increase > {:d}% from baseline".format(
483-
int(tolerance * 100)
484-
)
493+
comment = "Error: TPUTCOMP: " + info
485494

486495
return below_tolerance, comment
487496

@@ -509,16 +518,21 @@ def _perf_compare_memory_baseline(case, baseline, tolerance):
509518
provides explanation from comparison.
510519
"""
511520
try:
512-
current = _perf_get_memory(case)
521+
current, _ = _perf_get_memory(case)
513522
except RuntimeError as e:
514523
return None, str(e)
515-
else:
516-
current = current[-1][1]
524+
525+
try:
526+
current = float(_parse_baseline(current))
527+
except (ValueError, TypeError):
528+
comment = "Could not compare throughput to baseline, as baseline had no value."
529+
530+
return None, comment
517531

518532
try:
519533
# default baseline is stored as single float
520-
baseline = float(baseline)
521-
except ValueError:
534+
baseline = float(_parse_baseline(baseline))
535+
except (ValueError, TypeError):
522536
baseline = 0.0
523537

524538
try:
@@ -533,13 +547,64 @@ def _perf_compare_memory_baseline(case, baseline, tolerance):
533547
if diff is not None:
534548
below_tolerance = diff < tolerance
535549

550+
info = "Memory usage highwater changed by {:.2f}%: baseline={:.3f} MB, tolerance={:d}%, current={:.3f} MB".format(
551+
diff * 100, baseline, int(tolerance * 100), current
552+
)
536553
if below_tolerance:
537-
comment = "MEMCOMP: Memory usage highwater has changed by {:.2f}% relative to baseline".format(
538-
diff * 100
539-
)
554+
comment = "MEMCOMP: " + info
540555
else:
541-
comment = "Error: Memory usage increase >{:d}% from baseline's {:f} to {:f}".format(
542-
int(tolerance * 100), baseline, current
543-
)
556+
comment = "Error: MEMCOMP: " + info
544557

545558
return below_tolerance, comment
559+
560+
561+
def _format_baseline(value):
562+
"""
563+
Encodes value with default baseline format.
564+
565+
Default format:
566+
sha: <commit sha> date: <date of bless> <value>
567+
568+
Parameters
569+
----------
570+
value : str
571+
Baseline value to encode.
572+
573+
Returns
574+
-------
575+
value : str
576+
Baseline entry.
577+
"""
578+
commit_hash = get_current_commit(repo=get_src_root())
579+
580+
timestamp = get_timestamp(timestamp_format="%Y-%m-%d_%H:%M:%S")
581+
582+
return f"sha:{commit_hash} date:{timestamp} {value}\n"
583+
584+
585+
def _parse_baseline(data):
586+
"""
587+
Parses default baseline format.
588+
589+
Default format:
590+
sha: <commit sha> date: <date of bless> <value>
591+
592+
Parameters
593+
----------
594+
data : str
595+
Containing contents of baseline file.
596+
597+
Returns
598+
-------
599+
value : str
600+
Value of the latest blessed baseline.
601+
"""
602+
lines = data.split("\n")
603+
lines = [x for x in lines if x != ""]
604+
605+
try:
606+
value = lines[-1].strip().split(" ")[-1]
607+
except IndexError:
608+
value = None
609+
610+
return value

0 commit comments

Comments
 (0)