CI: add diff-shades integration (#2725)

Hopefully this makes it much easier to gauge the impacts of future
changes!
This commit is contained in:
Richard Si 2022-01-13 21:50:02 -05:00 committed by GitHub
parent 5543d1b55a
commit 565f9c92b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 507 additions and 1 deletions

134
.github/workflows/diff_shades.yml vendored Normal file
View File

@ -0,0 +1,134 @@
name: diff-shades
on:
push:
branches: [main]
paths-ignore: ["docs/**", "tests/**", "*.md"]
pull_request:
path-ignore: ["docs/**", "tests/**", "*.md"]
workflow_dispatch:
inputs:
baseline:
description: >
The baseline revision. Pro-tip, use `.pypi` to use the latest version
on PyPI or `.XXX` to use a PR.
required: true
default: main
baseline-args:
description: "Custom Black arguments (eg. -l 79)"
required: false
target:
description: >
The target revision to compare against the baseline. Same tip applies here.
required: true
target-args:
description: "Custom Black arguments (eg. -S)"
required: false
jobs:
analysis:
name: analysis / linux
runs-on: ubuntu-latest
steps:
- name: Checkout this repository (full clone)
uses: actions/checkout@v2
with:
fetch-depth: 0
- uses: actions/setup-python@v2
- name: Install diff-shades and support dependencies
run: |
python -m pip install pip --upgrade
python -m pip install https://github.com/ichard26/diff-shades/archive/stable.zip
python -m pip install click packaging urllib3
# After checking out old revisions, this might not exist so we'll use a copy.
cat scripts/diff_shades_gha_helper.py > helper.py
git config user.name "diff-shades-gha"
git config user.email "diff-shades-gha@example.com"
- name: Calculate run configuration & metadata
id: config
env:
GITHUB_TOKEN: ${{ github.token }}
run: >
python helper.py config ${{ github.event_name }}
${{ github.event.inputs.baseline }} ${{ github.event.inputs.target }}
--baseline-args "${{ github.event.inputs.baseline-args }}"
- name: Attempt to use cached baseline analysis
id: baseline-cache
uses: actions/cache@v2.1.7
with:
path: ${{ steps.config.outputs.baseline-analysis }}
key: ${{ steps.config.outputs.baseline-cache-key }}
- name: Install baseline revision
if: steps.baseline-cache.outputs.cache-hit != 'true'
env:
GITHUB_TOKEN: ${{ github.token }}
run: ${{ steps.config.outputs.baseline-setup-cmd }} && python -m pip install .
- name: Analyze baseline revision
if: steps.baseline-cache.outputs.cache-hit != 'true'
run: >
diff-shades analyze -v --work-dir projects-cache/
${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.baseline-args }}
- name: Install target revision
env:
GITHUB_TOKEN: ${{ github.token }}
run: ${{ steps.config.outputs.target-setup-cmd }} && python -m pip install .
- name: Analyze target revision
run: >
diff-shades analyze -v --work-dir projects-cache/
${{ steps.config.outputs.target-analysis }} --repeat-projects-from
${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.target-args }}
- name: Generate HTML diff report
run: >
diff-shades --dump-html diff.html compare --diff --quiet
${{ steps.config.outputs.baseline-analysis }} ${{ steps.config.outputs.target-analysis }}
- name: Upload diff report
uses: actions/upload-artifact@v2
with:
name: diff.html
path: diff.html
- name: Upload baseline analysis
uses: actions/upload-artifact@v2
with:
name: ${{ steps.config.outputs.baseline-analysis }}
path: ${{ steps.config.outputs.baseline-analysis }}
- name: Upload target analysis
uses: actions/upload-artifact@v2
with:
name: ${{ steps.config.outputs.target-analysis }}
path: ${{ steps.config.outputs.target-analysis }}
- name: Generate summary file (PR only)
if: github.event_name == 'pull_request'
run: >
python helper.py comment-body
${{ steps.config.outputs.baseline-analysis }} ${{ steps.config.outputs.target-analysis }}
${{ steps.config.outputs.baseline-sha }} ${{ steps.config.outputs.target-sha }}
- name: Upload summary file (PR only)
if: github.event_name == 'pull_request'
uses: actions/upload-artifact@v2
with:
name: .pr-comment-body.md
path: .pr-comment-body.md
# This is last so the diff-shades-comment workflow can still work even if we
# end up detecting failed files and failing the run.
- name: Check for failed files in both analyses
run: >
diff-shades show-failed --check --show-log ${{ steps.config.outputs.baseline-analysis }};
diff-shades show-failed --check --show-log ${{ steps.config.outputs.target-analysis }}

View File

@ -0,0 +1,47 @@
name: diff-shades-comment
on:
workflow_run:
workflows: [diff-shades]
types: [completed]
permissions:
pull-requests: write
jobs:
comment:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- name: Install support dependencies
run: |
python -m pip install pip --upgrade
python -m pip install click packaging urllib3
- name: Get details from initial workflow run
id: metadata
env:
GITHUB_TOKEN: ${{ github.token }}
run: >
python scripts/diff_shades_gha_helper.py comment-details
${{github.event.workflow_run.id }}
- name: Try to find pre-existing PR comment
if: steps.metadata.outputs.needs-comment == 'true'
id: find-comment
uses: peter-evans/find-comment@v1
with:
issue-number: ${{ steps.metadata.outputs.pr-number }}
comment-author: "github-actions[bot]"
body-includes: "diff-shades"
- name: Create or update PR comment
if: steps.metadata.outputs.needs-comment == 'true'
uses: peter-evans/create-or-update-comment@v1
with:
comment-id: ${{ steps.find-comment.outputs.comment-id }}
issue-number: ${{ steps.metadata.outputs.pr-number }}
body: ${{ steps.metadata.outputs.comment-body }}
edit-mode: replace

View File

@ -57,7 +57,7 @@ repos:
rev: v2.5.1
hooks:
- id: prettier
exclude: ^Pipfile\.lock
exclude: \.github/workflows/diff_shades\.yml
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1

View File

@ -40,3 +40,56 @@ If you're running locally yourself to test black on lots of code try:
```{program-output} black-primer --help
```
## diff-shades
diff-shades is a tool similar to black-primer, it also runs _Black_ across a list of Git
cloneable OSS projects recording the results. The intention is to eventually fully
replace black-primer with diff-shades as it's much more feature complete and supports
our needs better.
The main highlight feature of diff-shades is being able to compare two revisions of
_Black_. This is incredibly useful as it allows us to see what exact changes will occur,
say merging a certain PR. Black-primer's results would usually be filled with changes
caused by pre-existing code in Black drowning out the (new) changes we want to see. It
operates similarly to black-primer but crucially it saves the results as a JSON file
which allows for the rich comparison features alluded to above.
For more information, please see the [diff-shades documentation][diff-shades].
### CI integration
diff-shades is also the tool behind the "diff-shades results comparing ..." /
"diff-shades reports zero changes ..." comments on PRs. The project has a GitHub Actions
workflow which runs diff-shades twice against two revisions of _Black_ according to
these rules:
| | Baseline revision | Target revision |
| --------------------- | ----------------------- | ---------------------------- |
| On PRs | latest commit on `main` | PR commit with `main` merged |
| On pushes (main only) | latest PyPI version | the pushed commit |
Once finished, a PR comment will be posted embedding a summary of the changes and links
to further information. If there's a pre-existing diff-shades comment, it'll be updated
instead the next time the workflow is triggered on the same PR.
The workflow uploads 3-4 artifacts upon completion: the two generated analyses (they
have the .json file extension), `diff.html`, and `.pr-comment-body.md` if triggered by a
PR. The last one is downloaded by the `diff-shades-comment` workflow and shouldn't be
downloaded locally. `diff.html` comes in handy for push-based or manually triggered
runs. And the analyses exist just in case you want to do further analysis using the
collected data locally.
Note that the workflow will only fail intentionally if while analyzing a file failed to
format. Otherwise a failure indicates a bug in the workflow.
```{tip}
Maintainers with write access or higher can trigger the workflow manually from the
Actions tab using the `workflow_dispatch` event. Simply select "diff-shades"
from the workflows list on the left, press "Run workflow", and fill in which revisions
and command line arguments to use.
Once finished, check the logs or download the artifacts for local use.
```
[diff-shades]: https://github.com/ichard26/diff-shades#readme

View File

@ -0,0 +1,272 @@
"""Helper script for psf/black's diff-shades Github Actions integration.
diff-shades is a tool for analyzing what happens when you run Black on
OSS code capturing it for comparisons or other usage. It's used here to
help measure the impact of a change *before* landing it (in particular
posting a comment on completion for PRs).
This script exists as a more maintainable alternative to using inline
Javascript in the workflow YAML files. The revision configuration and
resolving, caching, and PR comment logic is contained here.
For more information, please see the developer docs:
https://black.readthedocs.io/en/latest/contributing/gauging_changes.html#diff-shades
"""
import json
import os
import platform
import pprint
import subprocess
import sys
import zipfile
from io import BytesIO
from pathlib import Path
from typing import Any, Dict, Optional, Tuple
import click
import urllib3
from packaging.version import Version
if sys.version_info >= (3, 8):
from typing import Final, Literal
else:
from typing_extensions import Final, Literal
COMMENT_BODY_FILE: Final = ".pr-comment-body.md"
DIFF_STEP_NAME: Final = "Generate HTML diff report"
DOCS_URL: Final = (
"https://black.readthedocs.io/en/latest/"
"contributing/gauging_changes.html#diff-shades"
)
USER_AGENT: Final = f"psf/black diff-shades workflow via urllib3/{urllib3.__version__}"
SHA_LENGTH: Final = 10
GH_API_TOKEN: Final = os.getenv("GITHUB_TOKEN")
REPO: Final = os.getenv("GITHUB_REPOSITORY", default="psf/black")
http = urllib3.PoolManager()
def set_output(name: str, value: str) -> None:
if len(value) < 200:
print(f"[INFO]: setting '{name}' to '{value}'")
else:
print(f"[INFO]: setting '{name}' to [{len(value)} chars]")
print(f"::set-output name={name}::{value}")
def http_get(
url: str,
is_json: bool = True,
headers: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> Any:
headers = headers or {}
headers["User-Agent"] = USER_AGENT
if "github" in url:
if GH_API_TOKEN:
headers["Authorization"] = f"token {GH_API_TOKEN}"
headers["Accept"] = "application/vnd.github.v3+json"
r = http.request("GET", url, headers=headers, **kwargs)
if is_json:
data = json.loads(r.data.decode("utf-8"))
else:
data = r.data
print(f"[INFO]: issued GET request for {r.geturl()}")
if not (200 <= r.status < 300):
pprint.pprint(dict(r.info()))
pprint.pprint(data)
raise RuntimeError(f"unexpected status code: {r.status}")
return data
def get_branch_or_tag_revision(sha: str = "main") -> str:
data = http_get(
f"https://api.github.com/repos/{REPO}/commits",
fields={"per_page": "1", "sha": sha},
)
assert isinstance(data[0]["sha"], str)
return data[0]["sha"]
def get_pr_revision(pr: int) -> str:
data = http_get(f"https://api.github.com/repos/{REPO}/pulls/{pr}")
assert isinstance(data["head"]["sha"], str)
return data["head"]["sha"]
def get_pypi_version() -> Version:
data = http_get("https://pypi.org/pypi/black/json")
versions = [Version(v) for v in data["releases"]]
sorted_versions = sorted(versions, reverse=True)
return sorted_versions[0]
def resolve_custom_ref(ref: str) -> Tuple[str, str]:
if ref == ".pypi":
# Special value to get latest PyPI version.
version = str(get_pypi_version())
return version, f"git checkout {version}"
if ref.startswith(".") and ref[1:].isnumeric():
# Special format to get a PR.
number = int(ref[1:])
revision = get_pr_revision(number)
return (
f"pr-{number}-{revision[:SHA_LENGTH]}",
f"gh pr checkout {number} && git merge origin/main",
)
# Alright, it's probably a branch, tag, or a commit SHA, let's find out!
revision = get_branch_or_tag_revision(ref)
# We're cutting the revision short as we might be operating on a short commit SHA.
if revision == ref or revision[: len(ref)] == ref:
# It's *probably* a commit as the resolved SHA isn't different from the REF.
return revision[:SHA_LENGTH], f"git checkout {revision}"
# It's *probably* a pre-existing branch or tag, yay!
return f"{ref}-{revision[:SHA_LENGTH]}", f"git checkout {revision}"
@click.group()
def main() -> None:
pass
@main.command("config", help="Acquire run configuration and metadata.")
@click.argument(
"event", type=click.Choice(["push", "pull_request", "workflow_dispatch"])
)
@click.argument("custom_baseline", required=False)
@click.argument("custom_target", required=False)
@click.option("--baseline-args", default="")
def config(
event: Literal["push", "pull_request", "workflow_dispatch"],
custom_baseline: Optional[str],
custom_target: Optional[str],
baseline_args: str,
) -> None:
import diff_shades
if event == "push":
# Push on main, let's use PyPI Black as the baseline.
baseline_name = str(get_pypi_version())
baseline_cmd = f"git checkout {baseline_name}"
target_rev = os.getenv("GITHUB_SHA")
assert target_rev is not None
target_name = "main-" + target_rev[:SHA_LENGTH]
target_cmd = f"git checkout {target_rev}"
elif event == "pull_request":
# PR, let's use main as the baseline.
baseline_rev = get_branch_or_tag_revision()
baseline_name = "main-" + baseline_rev[:SHA_LENGTH]
baseline_cmd = f"git checkout {baseline_rev}"
pr_ref = os.getenv("GITHUB_REF")
assert pr_ref is not None
pr_num = int(pr_ref[10:-6])
pr_rev = get_pr_revision(pr_num)
target_name = f"pr-{pr_num}-{pr_rev[:SHA_LENGTH]}"
target_cmd = f"gh pr checkout {pr_num} && git merge origin/main"
# These are only needed for the PR comment.
set_output("baseline-sha", baseline_rev)
set_output("target-sha", pr_rev)
else:
assert custom_baseline is not None and custom_target is not None
baseline_name, baseline_cmd = resolve_custom_ref(custom_baseline)
target_name, target_cmd = resolve_custom_ref(custom_target)
if baseline_name == target_name:
# Alright we're using the same revisions but we're (hopefully) using
# different command line arguments, let's support that too.
baseline_name += "-1"
target_name += "-2"
set_output("baseline-analysis", baseline_name + ".json")
set_output("baseline-setup-cmd", baseline_cmd)
set_output("target-analysis", target_name + ".json")
set_output("target-setup-cmd", target_cmd)
key = f"{platform.system()}-{platform.python_version()}-{diff_shades.__version__}"
key += f"-{baseline_name}-{baseline_args.encode('utf-8').hex()}"
set_output("baseline-cache-key", key)
@main.command("comment-body", help="Generate the body for a summary PR comment.")
@click.argument("baseline", type=click.Path(exists=True, path_type=Path))
@click.argument("target", type=click.Path(exists=True, path_type=Path))
@click.argument("baseline-sha")
@click.argument("target-sha")
def comment_body(
baseline: Path, target: Path, baseline_sha: str, target_sha: str
) -> None:
# fmt: off
cmd = [
sys.executable, "-m", "diff_shades", "--no-color",
"compare", str(baseline), str(target), "--quiet", "--check"
]
# fmt: on
proc = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8")
if not proc.returncode:
body = (
f"**diff-shades** reports zero changes comparing this PR ({target_sha}) to"
f" main ({baseline_sha}).\n\n---\n\n"
)
else:
body = (
f"**diff-shades** results comparing this PR ({target_sha}) to main"
f" ({baseline_sha}). The full diff is [available in the logs]"
f'($job-diff-url) under the "{DIFF_STEP_NAME}" step.'
)
body += "\n```text\n" + proc.stdout.strip() + "\n```\n"
body += (
f"[**What is this?**]({DOCS_URL}) | [Workflow run]($workflow-run-url) |"
" [diff-shades documentation](https://github.com/ichard26/diff-shades#readme)"
)
print(f"[INFO]: writing half-completed comment body to {COMMENT_BODY_FILE}")
with open(COMMENT_BODY_FILE, "w", encoding="utf-8") as f:
f.write(body)
@main.command("comment-details", help="Get PR comment resources from a workflow run.")
@click.argument("run-id")
def comment_details(run_id: str) -> None:
data = http_get(f"https://api.github.com/repos/{REPO}/actions/runs/{run_id}")
if data["event"] != "pull_request":
set_output("needs-comment", "false")
return
set_output("needs-comment", "true")
pulls = data["pull_requests"]
assert len(pulls) == 1
pr_number = pulls[0]["number"]
set_output("pr-number", str(pr_number))
jobs_data = http_get(data["jobs_url"])
assert len(jobs_data["jobs"]) == 1, "multiple jobs not supported nor tested"
job = jobs_data["jobs"][0]
steps = {s["name"]: s["number"] for s in job["steps"]}
diff_step = steps[DIFF_STEP_NAME]
diff_url = job["html_url"] + f"#step:{diff_step}:1"
artifacts_data = http_get(data["artifacts_url"])["artifacts"]
artifacts = {a["name"]: a["archive_download_url"] for a in artifacts_data}
body_url = artifacts[COMMENT_BODY_FILE]
body_zip = BytesIO(http_get(body_url, is_json=False))
with zipfile.ZipFile(body_zip) as zfile:
with zfile.open(COMMENT_BODY_FILE) as rf:
body = rf.read().decode("utf-8")
# It's more convenient to fill in these fields after the first workflow is done
# since this command can access the workflows API (doing it in the main workflow
# while it's still in progress seems impossible).
body = body.replace("$workflow-run-url", data["html_url"])
body = body.replace("$job-diff-url", diff_url)
# # https://github.community/t/set-output-truncates-multiline-strings/16852/3
escaped = body.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D")
set_output("comment-body", escaped)
if __name__ == "__main__":
main()