Skip to content

Commit

Permalink
Merge branch 'main' into contextvar-stimulus-id
Browse files Browse the repository at this point in the history
  • Loading branch information
sjperkins committed Apr 1, 2022
2 parents 3156657 + e601e79 commit a07bbec
Show file tree
Hide file tree
Showing 6 changed files with 208 additions and 96 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/test-report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ name: Test Report

on:
schedule:
- cron: "47 6 * * *"
# Run 2h after the daily tests.yaml
- cron: "0 8,20 * * *"
workflow_dispatch:

jobs:
Expand Down Expand Up @@ -38,9 +39,10 @@ jobs:
- name: Generate report
shell: bash -l {0}
run: |
python continuous_integration/scripts/test_report.py
python continuous_integration/scripts/test_report.py --days 90 --nfails 1 -o test_report.html
python continuous_integration/scripts/test_report.py --days 7 --nfails 2 -o test_short_report.html
mkdir deploy
mv test_report.html deploy/
mv test_report.html test_short_report.html deploy/
- name: Deploy 🚀
uses: JamesIves/github-pages-deploy-action@4.1.7
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ tags
.mypy_cache/

reports/
test_report.*
test_short_report.html

# Test failures will dump the cluster state in here
test_cluster_dump/
17 changes: 10 additions & 7 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
Distributed
===========

|Test Status| |Longitudinal Report| |Coverage| |Doc Status| |Discourse| |Version Status| |NumFOCUS|
|Test Status| |Longitudinal Report (full)| |Longitudinal Report (short)| |Coverage| |Doc Status| |Discourse| |Version Status| |NumFOCUS|

A library for distributed computation. See documentation_ for more details.

.. _documentation: https://distributed.dask.org
.. |Test Status| image:: https://github.com/dask/distributed/workflows/Tests/badge.svg?branch=main
:target: https://github.com/dask/distributed/actions?query=workflow%3A%22Tests%22
.. |Longitudinal Report| image:: https://github.com/dask/distributed/workflows/Test%20Report/badge.svg?branch=main
:target: https://dask.github.io/distributed/test_report.html
:alt: Longitudinal test report
.. |Doc Status| image:: https://readthedocs.org/projects/distributed/badge/?version=latest
:target: https://distributed.dask.org
:alt: Documentation Status
.. |Longitudinal Report (full)| image:: https://github.com/dask/distributed/workflows/Test%20Report/badge.svg?branch=main
:target: https://dask.org/distributed/test_report.html
:alt: Longitudinal test report (full version)
.. |Longitudinal Report (short)| image:: https://github.com/dask/distributed/workflows/Test%20Report/badge.svg?branch=main
:target: https://dask.org/distributed/test_short_report.html
:alt: Longitudinal test report (short version)
.. |Coverage| image:: https://codecov.io/gh/dask/distributed/branch/main/graph/badge.svg
:target: https://codecov.io/gh/dask/distributed/branch/main
:alt: Coverage status
.. |Doc Status| image:: https://readthedocs.org/projects/distributed/badge/?version=latest
:target: https://distributed.dask.org
:alt: Documentation Status
.. |Discourse| image:: https://img.shields.io/discourse/users?logo=discourse&server=https%3A%2F%2Fdask.discourse.group
:alt: Discuss Dask-related things and ask for help
:target: https://dask.discourse.group
Expand Down
194 changes: 144 additions & 50 deletions continuous_integration/scripts/test_report.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from __future__ import annotations

import argparse
import html
import io
import os
import re
import shelve
import sys
import zipfile
from collections.abc import Iterator
from typing import Any

import altair
import altair_saver
Expand All @@ -22,7 +27,56 @@
}


def get_from_github(url, params={}):
def parse_args(argv: list[str] | None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--repo",
default="dask/distributed",
help="github repository",
)
parser.add_argument(
"--branch",
default="main",
help="git branch",
)
parser.add_argument(
"--events",
nargs="+",
default=["push", "schedule"],
help="github events",
)
parser.add_argument(
"--days",
"-d",
type=int,
default=90,
help="Number of days to look back from now",
)
parser.add_argument(
"--max-workflows",
type=int,
default=50,
help="Maximum number of workflows to fetch regardless of days",
)
parser.add_argument(
"--nfails",
"-n",
type=int,
default=1,
help="Show test if it failed more than this many times",
)
parser.add_argument(
"--output",
"-o",
default="test_report.html",
help="Output file name",
)
return parser.parse_args(argv)


def get_from_github(url: str, params: dict[str, Any]) -> requests.Response:
"""
Make an authenticated request to the GitHub REST API.
"""
Expand All @@ -31,7 +85,7 @@ def get_from_github(url, params={}):
return r


def maybe_get_next_page_path(response):
def maybe_get_next_page_path(response: requests.Response) -> str | None:
"""
If a response is paginated, get the url for the next page.
"""
Expand All @@ -48,26 +102,26 @@ def maybe_get_next_page_path(response):
return next_page_path


def get_workflow_listing(repo="dask/distributed", branch="main", event="push"):
def get_workflow_listing(repo: str, branch: str, event: str, days: int):
"""
Get a list of workflow runs from GitHub actions.
"""
since = str((pandas.Timestamp.now(tz="UTC") - pandas.Timedelta(days=90)).date())
since = (pandas.Timestamp.now(tz="UTC") - pandas.Timedelta(days=days)).date()
params = {"per_page": 100, "branch": branch, "event": event, "created": f">{since}"}
r = get_from_github(
f"https://github.com/gitapi/repos/{repo}/actions/runs", params=params
)
workflows = r.json()["workflow_runs"]
next_page = maybe_get_next_page_path(r)
while next_page:
r = get_from_github(next_page)
workflows = workflows + r.json()["workflow_runs"]
r = get_from_github(next_page, params)
workflows += r.json()["workflow_runs"]
next_page = maybe_get_next_page_path(r)

return workflows


def get_artifacts_for_workflow(run_id, repo="dask/distributed"):
def get_artifacts_for_workflow(run_id: str, repo: str) -> list:
"""
Get a list of artifacts from GitHub actions
"""
Expand All @@ -79,14 +133,14 @@ def get_artifacts_for_workflow(run_id, repo="dask/distributed"):
artifacts = r.json()["artifacts"]
next_page = maybe_get_next_page_path(r)
while next_page:
r = get_from_github(next_page)
artifacts = workflows + r.json()["workflow_runs"]
r = get_from_github(next_page, params=params)
artifacts += r.json()["artifacts"]
next_page = maybe_get_next_page_path(r)

return artifacts


def suite_from_name(name: str):
def suite_from_name(name: str) -> str:
"""
Get a test suite name from an artifact name. The artifact
can have matrix partitions, pytest marks, etc. Basically,
Expand All @@ -95,12 +149,12 @@ def suite_from_name(name: str):
return "-".join(name.split("-")[:3])


def download_and_parse_artifact(url):
def download_and_parse_artifact(url: str):
"""
Download the artifact at the url parse it.
"""
try:
r = get_from_github(url)
r = get_from_github(url, params={})
f = zipfile.ZipFile(io.BytesIO(r.content))
run = junitparser.JUnitXml.fromstring(f.read(f.filelist[0].filename))
return run
Expand All @@ -109,7 +163,7 @@ def download_and_parse_artifact(url):
return None


def dataframe_from_jxml(run):
def dataframe_from_jxml(run: list) -> pandas.DataFrame:
"""
Turn a parsed JXML into a pandas dataframe
"""
Expand Down Expand Up @@ -161,13 +215,16 @@ def dedup(group):
return df.groupby(["file", "test"]).agg(dedup)


if __name__ == "__main__":
if not TOKEN:
raise RuntimeError("Failed to find a GitHub Token")
print("Getting all recent workflows...")
workflows = get_workflow_listing(event="push") + get_workflow_listing(
event="schedule"
)
def download_and_parse_artifacts(
repo: str, branch: str, events: list[str], days: int, max_workflows: int
) -> Iterator[pandas.DataFrame]:

print("Getting workflows list...")
workflows = []
for event in events:
workflows += get_workflow_listing(
repo=repo, branch=branch, event=event, days=days
)

# Filter the workflows listing to be in the retention period,
# and only be test runs (i.e., no linting) that completed.
Expand All @@ -176,58 +233,89 @@ def dedup(group):
for w in workflows
if (
pandas.to_datetime(w["created_at"])
> pandas.Timestamp.now(tz="UTC") - pandas.Timedelta(days=90)
> pandas.Timestamp.now(tz="UTC") - pandas.Timedelta(days=days)
and w["conclusion"] != "cancelled"
and w["name"].lower() == "tests"
)
]
print(f"Found {len(workflows)} workflows")
# Each workflow processed takes ~10-15 API requests. To avoid being
# rate limited by GitHub (1000 requests per hour) we choose just the
# most recent N runs. This also keeps the viz size from blowing up.
workflows = sorted(workflows, key=lambda w: w["created_at"])[-50:]
workflows = sorted(workflows, key=lambda w: w["created_at"])[-max_workflows:]
print(f"Fetching artifact listing for the {len(workflows)} most recent workflows")

print("Getting the artifact listing for each workflow...")
for w in workflows:
artifacts = get_artifacts_for_workflow(w["id"])
artifacts = get_artifacts_for_workflow(w["id"], repo=repo)
# We also upload timeout reports as artifacts, but we don't want them here.
w["artifacts"] = [
a
for a in artifacts
if "timeouts" not in a["name"] and "cluster_dumps" not in a["name"]
]

print("Downloading and parsing artifacts...")
for w in workflows:
w["dfs"] = []
for a in w["artifacts"]:
xml = download_and_parse_artifact(a["archive_download_url"])
df = dataframe_from_jxml(xml) if xml else None
# Note: we assign a column with the workflow timestamp rather than the
# artifact timestamp so that artifacts triggered under the same workflow
# can be aligned according to the same trigger time.
if df is not None:
df = df.assign(
name=a["name"],
suite=suite_from_name(a["name"]),
date=w["created_at"],
url=w["html_url"],
)
w["dfs"].append(df)

# Make a top-level dict of dataframes, mapping test name to a dataframe
# of all check suites that ran that test.
# Note: we drop **all** tests which did not have at least one failure.
nartifacts = sum(len(w["artifacts"]) for w in workflows)
ndownloaded = 0
print(f"Downloading and parsing {nartifacts} artifacts...")

with shelve.open("test_report") as cache:
for w in workflows:
w["dfs"] = []
for a in w["artifacts"]:
url = a["archive_download_url"]
df: pandas.DataFrame | None
try:
df = cache[url]
except KeyError:
xml = download_and_parse_artifact(url)
if xml:
df = dataframe_from_jxml(xml)
# Note: we assign a column with the workflow timestamp rather
# than the artifact timestamp so that artifacts triggered under
# the same workflow can be aligned according to the same trigger
# time.
df = df.assign(
name=a["name"],
suite=suite_from_name(a["name"]),
date=w["created_at"],
url=w["html_url"],
)
else:
df = None
cache[url] = df

if df is not None:
yield df

ndownloaded += 1
if ndownloaded and not ndownloaded % 20:
print(f"{ndownloaded}... ", end="")


def main(argv: list[str] | None = None) -> None:
args = parse_args(argv)
if not TOKEN:
raise RuntimeError("Failed to find a GitHub Token")

# Note: we drop **all** tests which did not have at least <nfails> failures.
# This is because, as nice as a block of green tests can be, there are
# far too many tests to visualize at once, so we only want to look at
# flaky tests. If the test suite has been doing well, this chart should
# dwindle to nothing!
dfs = []
for w in workflows:
dfs.extend([df for df in w["dfs"]])
dfs = list(
download_and_parse_artifacts(
repo=args.repo,
branch=args.branch,
events=args.events,
days=args.days,
max_workflows=args.max_workflows,
)
)

total = pandas.concat(dfs, axis=0)
grouped = (
total.groupby(total.index)
.filter(lambda g: (g.status == "x").any())
.filter(lambda g: (g.status == "x").sum() >= args.nfails)
.reset_index()
.assign(test=lambda df: df.file + "." + df.test)
.groupby("test")
Expand Down Expand Up @@ -299,11 +387,17 @@ def dedup(group):
.configure_title(anchor="start")
.resolve_scale(x="shared") # enforce aligned x axes
)
chart.title = " ".join(argv if argv is not None else sys.argv)

altair_saver.save(
chart,
"test_report.html",
args.output,
embed_options={
"renderer": "svg", # Makes the text searchable
"loader": {"target": "_blank"}, # Open hrefs in a new window
},
)


if __name__ == "__main__":
main()
Loading

0 comments on commit a07bbec

Please sign in to comment.