From 0d32ecb221baa10b45b72110d71feee149c400db Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sat, 3 Sep 2022 15:41:10 -0600 Subject: [PATCH] WIP, ENH: DXT plot threshold * related to gh-729 and gh-692 * draft infrastructure for skipping the processing of DXT data above a certain module compressed size threshold, in cases where runtime `HEATMAP` data is available * note that for the vast majority of log files that have been provided/problematic in this regard, including the large ones from NERSC, this is of no help, because there is no `HEATMAP` data to fall back on * for a case where this does help, on this branch: `time python -m darshan summary e3sm_io_heatmap_and_dxt.darshan` `real 0m12.415s vs. `main`: `real 0m47.470s` * so, that's not a bad improvement, but there are still many things to decide/do here: - [ ] test the size threshold empirically with more appropriate logs - [ ] decide if we want to use the current approach of summing the sizes of each DXT module together vs. having per-DXT module thresholds - [ ] decide if we'd also want a way to disable DXT handling even if `HEATMAP` is not available (otherwise, all the sample NERSC logs will use > 100 GB memory and be unusable with current report generation machinery) - [ ] add a warning mechanism/message somewhere on the report when the threshold is reached to disable DXT parsing - [ ] add a command line argument to force an override of the disable (if i.e., the user is working on a high memory node and really wants to see DXT results) - [ ] add regression tests for the new machinery --- darshan-util/pydarshan/darshan/cli/summary.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py index 8732a2df0..1aaf7ab00 100644 --- a/darshan-util/pydarshan/darshan/cli/summary.py +++ b/darshan-util/pydarshan/darshan/cli/summary.py @@ -345,6 +345,18 @@ def register_figures(self): ) modules_avail = set(self.report.modules) hmap_modules = ["HEATMAP", "DXT_POSIX", "DXT_MPIIO"] + # see gh-729 and gh-692 + # don't ingest DXT data if it appears to be + # too large as a compressed entity + if "HEATMAP" in self.report.modules: + max_allowed_dxt_mib = 2 + dxt_mod_size_mib = 0 + for mod in self.report.modules: + if "DXT" in mod: + dxt_mod_size_mib += (self.report.modules[mod]["len"] / (2 ** 20)) + if dxt_mod_size_mib > max_allowed_dxt_mib: + hmap_modules = ["HEATMAP"] + hmap_grid = OrderedDict([["HEATMAP_MPIIO", None], ["DXT_MPIIO", None], ["HEATMAP_POSIX", None],