Merge pull request ESMCI#4491 from jasonb5/performance-baselines

Updates performance baselines Updates how performance baselines are created and compared. Before Baseline generation stores cpl.log Comparison parses baseline and testcase cpl.log and compares throughput/memory --check-throughput and --check-memory will PASS even if MEMCOMP or TPUTCOMP fail bless_test_results --hist-only will update the baseline cpl.log After Baseline generation stores throughput in cpl-tput.log and memory usage in cpl-mem.log Comparison parses testcase cpl.log and compares the baselines respective files --check-throughput and --check-memory will DIFF if MEMCOMP or TPUTCOMP fail bless_test_results now has --tput-only and --mem-only to bless individual performance DIFF Fixes logging throughput/memory comparison results in TestStatus.log Allows generation/comparison to be customized by the coupler Test suite: pytest -vvv CIME/tests Test baseline: n/a Test namelist changes: n/a Test status: n/a Fixes E3SM-Project/E3SM#5885 User interface changes?: N Update gh-pages html (Y/N)?: N
nusbaume · Oct 16, 2023 · 88255d5 · 88255d5
2 parents af9b27f + 7e30703
commit 88255d5
Show file tree

Hide file tree

Showing 16 changed files with 2,329 additions and 435 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -91,7 +91,9 @@ jobs:
           user_name: 'github-actions[bot]'
           user_email: 'github-actions[bot]@users.noreply.github.com'
       - name: Comment about previewing documentation
-        if: ${{ github.event_name == 'pull_request' }}
+        if: |
+          github.event_name == 'pull_request' &&
+          github.event.pull_request.head.repo.full_name == github.repository
         uses: actions/github-script@v6
         with:
           script: |

diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -89,7 +89,7 @@ jobs:
 
           init_cime
 
-          pytest -vvv --machine docker --no-fortran-run CIME/tests/test_unit*
+          pytest -vvv --cov=CIME --machine docker --no-fortran-run CIME/tests/test_unit*
 
   # Run system tests
   system-testing:
@@ -145,7 +145,7 @@ jobs:
 
           conda activate base
 
-          pytest -vvv --machine docker --no-fortran-run --no-teardown CIME/tests/test_sys*
+          pytest -vvv --cov=CIME --machine docker --no-fortran-run --no-teardown CIME/tests/test_sys*
       - name: Create testing log archive
         if: ${{ failure() }}
         shell: bash

diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,4 @@ scripts/Tools/JENKINS*
 components
 libraries
 share
+test_coverage/**
diff --git a/CIME/SystemTests/system_tests_common.py b/CIME/SystemTests/system_tests_common.py
@@ -26,6 +26,14 @@
 from CIME.config import Config
 from CIME.provenance import save_test_time, get_test_success
 from CIME.locked_files import LOCKED_DIR, lock_file, is_locked
+from CIME.baselines.performance import (
+    get_latest_cpl_logs,
+    _perf_get_memory,
+    perf_compare_memory_baseline,
+    perf_compare_throughput_baseline,
+    perf_write_baseline,
+    load_coupler_customization,
+)
 import CIME.build as build
 
 import glob, gzip, time, traceback, os
@@ -493,7 +501,7 @@ def run_indv(
             self._case.case_st_archive(resubmit=True)
 
     def _coupler_log_indicates_run_complete(self):
-        newestcpllogfiles = self._get_latest_cpl_logs()
+        newestcpllogfiles = get_latest_cpl_logs(self._case)
         logger.debug("Latest Coupler log file(s) {}".format(newestcpllogfiles))
         # Exception is raised if the file is not compressed
         allgood = len(newestcpllogfiles)
@@ -598,43 +606,6 @@ def _st_archive_case_test(self):
             else:
                 self._test_status.set_status(STARCHIVE_PHASE, TEST_FAIL_STATUS)
 
-    def _get_mem_usage(self, cpllog):
-        """
-        Examine memory usage as recorded in the cpl log file and look for unexpected
-        increases.
-        """
-        memlist = []
-        meminfo = re.compile(
-            r".*model date =\s+(\w+).*memory =\s+(\d+\.?\d+).*highwater"
-        )
-        if cpllog is not None and os.path.isfile(cpllog):
-            if ".gz" == cpllog[-3:]:
-                fopen = gzip.open
-            else:
-                fopen = open
-            with fopen(cpllog, "rb") as f:
-                for line in f:
-                    m = meminfo.match(line.decode("utf-8"))
-                    if m:
-                        memlist.append((float(m.group(1)), float(m.group(2))))
-        # Remove the last mem record, it's sometimes artificially high
-        if len(memlist) > 0:
-            memlist.pop()
-        return memlist
-
-    def _get_throughput(self, cpllog):
-        """
-        Examine memory usage as recorded in the cpl log file and look for unexpected
-        increases.
-        """
-        if cpllog is not None and os.path.isfile(cpllog):
-            with gzip.open(cpllog, "rb") as f:
-                cpltext = f.read().decode("utf-8")
-                m = re.search(r"# simulated years / cmp-day =\s+(\d+\.\d+)\s", cpltext)
-                if m:
-                    return float(m.group(1))
-        return None
-
     def _phase_modifying_call(self, phase, function):
         """
         Ensures that unexpected exceptions from phases will result in a FAIL result
@@ -661,47 +632,29 @@ def _check_for_memleak(self):
         Examine memory usage as recorded in the cpl log file and look for unexpected
         increases.
         """
+        config = load_coupler_customization(self._case)
+
+        # default to 0.1
+        tolerance = self._case.get_value("TEST_MEMLEAK_TOLERANCE") or 0.1
+
+        expect(tolerance > 0.0, "Bad value for memleak tolerance in test")
+
         with self._test_status:
-            latestcpllogs = self._get_latest_cpl_logs()
-            for cpllog in latestcpllogs:
-                memlist = self._get_mem_usage(cpllog)
+            try:
+                memleak, comment = config.perf_check_for_memory_leak(
+                    self._case, tolerance
+                )
+            except AttributeError:
+                memleak, comment = perf_check_for_memory_leak(self._case, tolerance)
 
-                if len(memlist) < 3:
-                    self._test_status.set_status(
-                        MEMLEAK_PHASE,
-                        TEST_PASS_STATUS,
-                        comments="insuffiencient data for memleak test",
-                    )
-                else:
-                    finaldate = int(memlist[-1][0])
-                    originaldate = int(
-                        memlist[1][0]
-                    )  # skip first day mem record, it can be too low while initializing
-                    finalmem = float(memlist[-1][1])
-                    originalmem = float(memlist[1][1])
-                    memdiff = -1
-                    if originalmem > 0:
-                        memdiff = (finalmem - originalmem) / originalmem
-                    tolerance = self._case.get_value("TEST_MEMLEAK_TOLERANCE")
-                    if tolerance is None:
-                        tolerance = 0.1
-                    expect(tolerance > 0.0, "Bad value for memleak tolerance in test")
-                    if memdiff < 0:
-                        self._test_status.set_status(
-                            MEMLEAK_PHASE,
-                            TEST_PASS_STATUS,
-                            comments="data for memleak test is insuffiencient",
-                        )
-                    elif memdiff < tolerance:
-                        self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS)
-                    else:
-                        comment = "memleak detected, memory went from {:f} to {:f} in {:d} days".format(
-                            originalmem, finalmem, finaldate - originaldate
-                        )
-                        append_testlog(comment, self._orig_caseroot)
-                        self._test_status.set_status(
-                            MEMLEAK_PHASE, TEST_FAIL_STATUS, comments=comment
-                        )
+            if memleak:
+                append_testlog(comment, self._orig_caseroot)
+
+                status = TEST_FAIL_STATUS
+            else:
+                status = TEST_PASS_STATUS
+
+            self._test_status.set_status(MEMLEAK_PHASE, status, comments=comment)
 
     def compare_env_run(self, expected=None):
         """
@@ -728,121 +681,45 @@ def compare_env_run(self, expected=None):
                 return False
         return True
 
-    def _get_latest_cpl_logs(self):
+    def _compare_memory(self):
         """
-        find and return the latest cpl log file in the run directory
+        Compares current test memory usage to baseline.
         """
-        coupler_log_path = self._case.get_value("RUNDIR")
-        cpllogs = glob.glob(
-            os.path.join(coupler_log_path, "{}*.log.*".format(self._cpllog))
-        )
-        lastcpllogs = []
-        if cpllogs:
-            lastcpllogs.append(max(cpllogs, key=os.path.getctime))
-            basename = os.path.basename(lastcpllogs[0])
-            suffix = basename.split(".", 1)[1]
-            for log in cpllogs:
-                if log in lastcpllogs:
-                    continue
-
-                if log.endswith(suffix):
-                    lastcpllogs.append(log)
+        with self._test_status:
+            below_tolerance, comment = perf_compare_memory_baseline(self._case)
 
-        return lastcpllogs
+            if below_tolerance is not None:
+                append_testlog(comment, self._orig_caseroot)
 
-    def _compare_memory(self):
-        with self._test_status:
-            # compare memory usage to baseline
-            baseline_name = self._case.get_value("BASECMP_CASE")
-            basecmp_dir = os.path.join(
-                self._case.get_value("BASELINE_ROOT"), baseline_name
-            )
-            newestcpllogfiles = self._get_latest_cpl_logs()
-            if len(newestcpllogfiles) > 0:
-                memlist = self._get_mem_usage(newestcpllogfiles[0])
-            for cpllog in newestcpllogfiles:
-                m = re.search(r"/({}.*.log).*.gz".format(self._cpllog), cpllog)
-                if m is not None:
-                    baselog = os.path.join(basecmp_dir, m.group(1)) + ".gz"
-                if baselog is None or not os.path.isfile(baselog):
-                    # for backward compatibility
-                    baselog = os.path.join(basecmp_dir, self._cpllog + ".log")
-                if os.path.isfile(baselog) and len(memlist) > 3:
-                    blmem = self._get_mem_usage(baselog)
-                    blmem = 0 if blmem == [] else blmem[-1][1]
-                    curmem = memlist[-1][1]
-                    diff = 0.0 if blmem == 0 else (curmem - blmem) / blmem
-                    tolerance = self._case.get_value("TEST_MEMLEAK_TOLERANCE")
-                    if tolerance is None:
-                        tolerance = 0.1
-                    if (
-                        diff < tolerance
-                        and self._test_status.get_status(MEMCOMP_PHASE) is None
-                    ):
-                        self._test_status.set_status(MEMCOMP_PHASE, TEST_PASS_STATUS)
-                    elif (
-                        self._test_status.get_status(MEMCOMP_PHASE) != TEST_FAIL_STATUS
-                    ):
-                        comment = "Error: Memory usage increase >{:d}% from baseline's {:f} to {:f}".format(
-                            int(tolerance * 100), blmem, curmem
-                        )
-                        self._test_status.set_status(
-                            MEMCOMP_PHASE, TEST_FAIL_STATUS, comments=comment
-                        )
-                        append_testlog(comment, self._orig_caseroot)
+                if (
+                    below_tolerance
+                    and self._test_status.get_status(MEMCOMP_PHASE) is None
+                ):
+                    self._test_status.set_status(MEMCOMP_PHASE, TEST_PASS_STATUS)
+                elif self._test_status.get_status(MEMCOMP_PHASE) != TEST_FAIL_STATUS:
+                    self._test_status.set_status(
+                        MEMCOMP_PHASE, TEST_FAIL_STATUS, comments=comment
+                    )
 
     def _compare_throughput(self):
+        """
+        Compares current test throughput to baseline.
+        """
         with self._test_status:
-            # compare memory usage to baseline
-            baseline_name = self._case.get_value("BASECMP_CASE")
-            basecmp_dir = os.path.join(
-                self._case.get_value("BASELINE_ROOT"), baseline_name
-            )
-            newestcpllogfiles = self._get_latest_cpl_logs()
-            for cpllog in newestcpllogfiles:
-                m = re.search(r"/({}.*.log).*.gz".format(self._cpllog), cpllog)
-                if m is not None:
-                    baselog = os.path.join(basecmp_dir, m.group(1)) + ".gz"
-                if baselog is None or not os.path.isfile(baselog):
-                    # for backward compatibility
-                    baselog = os.path.join(basecmp_dir, self._cpllog)
-
-                if os.path.isfile(baselog):
-                    # compare throughput to baseline
-                    current = self._get_throughput(cpllog)
-                    baseline = self._get_throughput(baselog)
-                    # comparing ypd so bigger is better
-                    if baseline is not None and current is not None:
-                        diff = (baseline - current) / baseline
-                        tolerance = self._case.get_value("TEST_TPUT_TOLERANCE")
-                        if tolerance is None:
-                            tolerance = 0.1
-                        expect(
-                            tolerance > 0.0,
-                            "Bad value for throughput tolerance in test",
-                        )
-                        comment = "TPUTCOMP: Computation time changed by {:.2f}% relative to baseline".format(
-                            diff * 100
-                        )
-                        append_testlog(comment, self._orig_caseroot)
-                        if (
-                            diff < tolerance
-                            and self._test_status.get_status(THROUGHPUT_PHASE) is None
-                        ):
-                            self._test_status.set_status(
-                                THROUGHPUT_PHASE, TEST_PASS_STATUS
-                            )
-                        elif (
-                            self._test_status.get_status(THROUGHPUT_PHASE)
-                            != TEST_FAIL_STATUS
-                        ):
-                            comment = "Error: TPUTCOMP: Computation time increase > {:d}% from baseline".format(
-                                int(tolerance * 100)
-                            )
-                            self._test_status.set_status(
-                                THROUGHPUT_PHASE, TEST_FAIL_STATUS, comments=comment
-                            )
-                            append_testlog(comment, self._orig_caseroot)
+            below_tolerance, comment = perf_compare_throughput_baseline(self._case)
+
+            if below_tolerance is not None:
+                append_testlog(comment, self._orig_caseroot)
+
+                if (
+                    below_tolerance
+                    and self._test_status.get_status(THROUGHPUT_PHASE) is None
+                ):
+                    self._test_status.set_status(THROUGHPUT_PHASE, TEST_PASS_STATUS)
+                elif self._test_status.get_status(THROUGHPUT_PHASE) != TEST_FAIL_STATUS:
+                    self._test_status.set_status(
+                        THROUGHPUT_PHASE, TEST_FAIL_STATUS, comments=comment
+                    )
 
     def _compare_baseline(self):
         """
@@ -884,18 +761,59 @@ def _generate_baseline(self):
             )
             # copy latest cpl log to baseline
             # drop the date so that the name is generic
-            newestcpllogfiles = self._get_latest_cpl_logs()
+            newestcpllogfiles = get_latest_cpl_logs(self._case)
             with SharedArea():
+                # TODO ever actually more than one cpl log?
                 for cpllog in newestcpllogfiles:
                     m = re.search(r"/({}.*.log).*.gz".format(self._cpllog), cpllog)
+
                     if m is not None:
                         baselog = os.path.join(basegen_dir, m.group(1)) + ".gz"
+
                         safe_copy(
                             cpllog,
                             os.path.join(basegen_dir, baselog),
                             preserve_meta=False,
                         )
 
+                        perf_write_baseline(self._case, basegen_dir, cpllog)
+
+
+def perf_check_for_memory_leak(case, tolerance):
+    leak = False
+    comment = ""
+
+    latestcpllogs = get_latest_cpl_logs(case)
+
+    for cpllog in latestcpllogs:
+        try:
+            memlist = _perf_get_memory(case, cpllog)
+        except RuntimeError:
+            return False, "insufficient data for memleak test"
+
+        # last day - second day, skip first day, can be too low while initializing
+        elapsed_days = int(memlist[-1][0]) - int(memlist[1][0])
+
+        finalmem, originalmem = float(memlist[-1][1]), float(memlist[1][1])
+
+        memdiff = -1 if originalmem <= 0 else (finalmem - originalmem) / originalmem
+
+        if memdiff < 0:
+            leak = False
+            comment = "data for memleak test is insufficient"
+        elif memdiff < tolerance:
+            leak = False
+            comment = ""
+        else:
+            leak = True
+            comment = (
+                "memleak detected, memory went from {:f} to {:f} in {:d} days".format(
+                    originalmem, finalmem, elapsed_days
+                )
+            )
+
+    return leak, comment
+
 
 class FakeTest(SystemTestsCommon):
     """