Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci(benchmarks): run only the last release #203

Merged
merged 1 commit into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ jobs:
source .venv/bin/activate
pip install --upgrade pip
pip install -r scripts/requirements-bm.txt
deactivate

- name: Run benchmarks
env:
PYTHONPATH: "."
run: |
ulimit -c unlimited

echo "core.%p" | sudo tee /proc/sys/kernel/core_pattern
source .venv/bin/activate
python scripts/benchmark.py --format markdown | tee comment.txt
deactivate
python scripts/benchmark.py --format markdown --last | tee comment.txt

- name: Post results on PR
uses: marocchino/sticky-pull-request-comment@v2
Expand Down
32 changes: 24 additions & 8 deletions scripts/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,15 @@
import abc
import re
import sys
from textwrap import wrap
import typing as t
from argparse import ArgumentParser
from math import floor, log
from pathlib import Path

from scipy.stats import ttest_ind

from common import download_release

from test.utils import metadata, target
from textwrap import wrap

from common import download_release
from scipy.stats import ttest_ind

VERSIONS = ("3.4.1", "3.5.0", "dev")
SCENARIOS = [
Expand Down Expand Up @@ -96,6 +93,8 @@ def get_stats(output: str) -> t.Optional[dict]:


class Outcome:
__critical_p__ = 0.025

def __init__(self, data: list[float]) -> None:
self.data = data
self.mean = sum(data) / len(data)
Expand All @@ -120,7 +119,7 @@ def __len__(self):

def __eq__(self, other: "Outcome") -> bool:
t, p = ttest_ind(self.data, other.data, equal_var=False)
return p < 0.05
return p < self.__critical_p__


Results = t.Tuple[str, t.Dict[str, Outcome]]
Expand Down Expand Up @@ -312,8 +311,25 @@ def main():
help="The output format",
)

argp.add_argument(
"-l",
"--last",
action="store_true",
help="Run only with the last release of Austin",
)

argp.add_argument(
"-p",
"--pvalue",
type=float,
default=0.025,
help="The p-value to use when testing for statistical significance",
)

opts = argp.parse_args()

Outcome.__critical_p__ = opts.pvalue

renderer = {"terminal": TerminalRenderer, "markdown": MarkdownRenderer}[
opts.format
]()
Expand All @@ -330,7 +346,7 @@ def main():
continue

table: t.List[Results] = []
for version in VERSIONS:
for version in VERSIONS[-2:] if opts.last else VERSIONS:
print(f"> Running with Austin {version} ... ", end="\r", file=sys.stderr)
try:
austin = download_release(version, Path("/tmp"), variant_name=variant)
Expand Down
2 changes: 1 addition & 1 deletion scripts/requirements-bm.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
austin-python~=1.6
scipy~=1.10.1
scipy~=1.10
Loading