Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track review and author response latency #22

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
/_site
*.env
*.tmp
2 changes: 2 additions & 0 deletions data/rust-lang/rust-clippy/pr-author-latency.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
is:open draft:false -label:S-blocked label:S-waiting-on-author __status_updated:{{param|relative_date}},>1|today,3..1|last 3 days,7..4|last week,14..8|last 2 weeks,30..15|last month,90..31|last 3 months,180..91|last 6 months,<180|more than 6 months
2023-01-30,0,0,0,1,0,2,3,0
2 changes: 2 additions & 0 deletions data/rust-lang/rust-clippy/pr-review-latency.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
is:open draft:false -label:S-blocked label:S-waiting-on-review __status_updated:{{param|relative_date}},>1|today,3..1|last 3 days,7..4|last week,14..8|last 2 weeks,30..15|last month,90..31|last 3 months,180..91|last 6 months,<180|more than 6 months
2023-01-30,0,5,4,0,7,7,5,7
2 changes: 2 additions & 0 deletions data/rust-lang/rust/pr-author-latency.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
is:open draft:false -label:S-blocked label:S-waiting-on-author __status_updated:{{param|relative_date}},>1|today,3..1|last 3 days,7..4|last week,14..8|last 2 weeks,30..15|last month,90..31|last 3 months,180..91|last 6 months,<180|more than 6 months
2023-01-30,1,12,7,15,24,29,12,0
2 changes: 2 additions & 0 deletions data/rust-lang/rust/pr-review-latency.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
is:open draft:false -label:S-blocked label:S-waiting-on-review __status_updated:{{param|relative_date}},>1|today,3..1|last 3 days,7..4|last week,14..8|last 2 weeks,30..15|last month,90..31|last 3 months,180..91|last 6 months,<180|more than 6 months
2023-01-30,1,24,18,35,46,49,27,0
2 changes: 2 additions & 0 deletions index.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ graphs:
pr-activity: Last activity on pull requests
pr-age: Pull requests creation dates
pr-merged: Pull requests merged
pr-review-latency: Duration waiting on review
pr-author-latency: Duration waiting on author
layout: graphs
---
2 changes: 2 additions & 0 deletions rust-clippy.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ graphs:
pr-status: Pull requests status
pr-activity: Last activity on pull requests
pr-age: Pull requests creation dates
pr-review-latency: Duration waiting on review
pr-author-latency: Duration waiting on author
layout: graphs
---
225 changes: 210 additions & 15 deletions updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,212 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# pyright: strict

import csv
import datetime
import json
from datetime import datetime, timedelta, timezone
import os
import subprocess
import sys
import time
import jinja2
from typing import Callable
from urllib.parse import urlparse, parse_qs

import jinja2
import requests


API_URL = "https://api.github.com/search/issues"


# Various insignificant comments from triage, merge conflicts, etc
# count as "updates" on GitHub, so `updated:` isn't a ideal way to
# gauge the last activity on a PR.
#
# Instead, use the Issue Events API to find when the
# status label (`S-*`) was most recently changed.
def get_pr_status_updated(http_session: requests.Session, repo: str, events_url: str, pr_number: int) -> str | None:
"""Get the timestamp of the last status label change for the given PR"""

page = 1

while True:
print(f"Fetching events for {repo}#{pr_number}")
res = http_session.get(events_url, params={"per_page": 100, "page": page})

# Properly handle rate limits
if res.status_code == 403:
wait = float(res.headers["X-RateLimit-Reset"]) - time.time() + 1
print("Rate limit reached, waiting %s seconds..." % int(wait), flush=True)
time.sleep(wait)
continue

# Make sure we got the last page
#
# In most cases, the `per_page` of 100 should avoid
# needing to issue another request.
last = res.links.get("last")
if last is not None and "url" in last:
parsed = urlparse(last["url"])
parsed_query = parse_qs(parsed.query)
last_page = int(parsed_query["page"][0])

if last_page > page:
page = last_page
continue

data = res.json()
print(data)
if "errors" in data:
for error in data["errors"]:
print("Error while fetching events for '%s': %s" % (f"{repo}#{pr_number}", error["message"]))
exit(1)
else:
break

# Process events
data = list(data) # data is a list of events

# Find last 'labeled' event with label name 'S-*'
for i, event in enumerate(reversed(data)):
event_index = len(data) - i # because enumerating reversed

if event["event"] == "labeled":
label = str(event["label"]["name"])
if label.startswith("S-"):
# Continue iterating backwards to see if this label was the last one removed
found_prev_event = False
for prev_event in reversed(data[:event_index]):
if prev_event["event"] == "unlabeled":
prev_label = str(prev_event["label"]["name"])
if prev_label == label:
found_prev_event = True
break
elif prev_label.startswith("-S"):
break

if found_prev_event:
# Same label was just removed and added back, so keep searching
continue
else:
return event["created_at"]

return None


# Convert ">1" to `(0.0, 1.0 + epsilon)`
# Convert "7..4" to `(4.0, 7.0)`
# Convert "<30" to `(30.0 + epsilon, inf)`
def relative_date_to_range(param: str) -> Callable[[int], int]:
"""Get a lambda that returns whether or not the argument
is within the relative date range"""

if "|" in param:
value = param.split("|")[0]
else:
value = param

# Support date ranges
if ".." in value:
end, start = value.split("..", 1)
end, start = int(end), int(start)

return lambda x : (x >= start and x <= end)
else:
# Properly handle comparison operators
if value.startswith(">"):
value = value[len(">"):]
value = int(value)
return lambda x : (x < value)
if value.startswith(">="):
value = value[len(">="):]
value = int(value)
return lambda x : (x <= value)
if value.startswith("<"):
value = value[len("<"):]
value = int(value)
return lambda x : (x > value)
if value.startswith("<="):
value = value[len("<="):]
value = int(value)
return lambda x : (x >= value)

value = int(value)
return lambda x : (x == value)


def status_updated(http_session: requests.Session, now: datetime, repo: str, query: str, params: list[str]):
"""Fetch the time the status was last updated for each PR,
and compile into a histogram with bins defined by params"""

# `__status_updated:` must come last in the query
query = query.split("__status_updated:{{param|relative_date}}", 1)[0]
query = f"is:pr repo:{repo} {query}".strip()

bin_ranges = [relative_date_to_range(param) for param in params]
bins = [0 for _ in params]

# Iterate through list of all PRs
page = 1
last_page = 1

while page <= last_page:
print(f"Querying {query}", flush=True)
res = http_session.get(API_URL, params={"q": query, "per_page": 100, "page": page})

# Properly handle rate limits
if res.status_code == 403:
wait = float(res.headers["X-RateLimit-Reset"]) - time.time() + 1
print("Rate limit reached, waiting %s seconds..." % int(wait), flush=True)
time.sleep(wait)
continue

data = res.json()
print(data)
if "errors" in data:
for error in data["errors"]:
print("Error while searching for '%s': %s" % (query, error["message"]))
exit(1)

# Calculate last page
last_page = int(data["total_count"]) / 100

# Process each PR
for pr in data["items"]:
pr_number = int(pr["number"])
updated = get_pr_status_updated(http_session, repo, pr["events_url"], pr_number)

if updated is None:
updated = str(pr["updated_at"])
print(f"{repo}#{pr_number} status updated not found, using updated field instead: {updated}")
else:
print(f"{repo}#{pr_number} last updated at {updated}")

# Correct for older pythons
updated = updated.replace("Z", "+00:00")

# Get the relative time period
diff = now.date() - datetime.fromisoformat(updated).date()

print(f"{repo}#{pr_number} not updated in {diff.days} days")

# Increment the bin that this diff fits in
for i, r in enumerate(bin_ranges):
if r(diff.days):
param = params[i]
print(f"{repo}#{pr_number} added to bin #{i} for '{param}'")
bins[i] += 1
break

page += 1

return bins


# GitHub doesn't support relative dates on `created:` and `updated:`, so this
# allows the CSV files to use `{{param|relative_date}}`
def filter_relative_date(value):
def format_relative_date(date):
return str(datetime.date.today() - datetime.timedelta(days=int(date))) + "T00:00:00+00:00"
def filter_relative_date(now: datetime, value: str):
def format_relative_date(date: str):
return str(now.date() - timedelta(days=int(date))) + "T00:00:00+00:00"

# Support date ranges
if ".." in value:
Expand All @@ -60,7 +246,7 @@ def format_relative_date(date):
return cmp+format_relative_date(value)


def get_issues_count(http_session, repo, jinja_env, query, param):
def get_issues_count(http_session: requests.Session, repo: str, jinja_env: jinja2.Environment, query: str, param: str):
"""Get the number of issues with the provided label"""
# Strip pretty labels from the query
if "|" in param:
Expand All @@ -79,7 +265,7 @@ def get_issues_count(http_session, repo, jinja_env, query, param):
# Properly handle rate limits
if res.status_code == 403:
wait = float(res.headers["X-RateLimit-Reset"]) - time.time() + 1
print("Rate limit reached, waiting %s seconds..." % int(wait))
print("Rate limit reached, waiting %s seconds..." % int(wait), flush=True)
time.sleep(wait)
continue

Expand All @@ -93,26 +279,33 @@ def get_issues_count(http_session, repo, jinja_env, query, param):
return data["total_count"]


def update_csv_file(http_session, repo, path):
def update_csv_file(http_session: requests.Session, repo: str, path: str):
"""Add today's records to the provided csv file"""
today = str(datetime.date.today())
now = datetime.now(tz=timezone.utc)
today = str(now.date())

# Load the CSV file in memory
with open(path) as f:
content = list(csv.reader(f))

# If today already has its own row don't add another one
if len(content) == 1 or content[1][0] != today:
content.insert(1, None)
content.insert(1, [])
content[1] = [today]

# Setup the Jinja2 environment
jinja_env = jinja2.Environment()
jinja_env.filters["relative_date"] = filter_relative_date
jinja_env.filters["relative_date"] = lambda value : filter_relative_date(now, str(value)) # type: ignore

query = content[0][0]
for param in content[0][1:]:
content[1].append(str(get_issues_count(http_session, repo, jinja_env, query, param)))

# Custom query requring custom logic
if "__status_updated:{{param|relative_date}}" in query:
for bin in status_updated(http_session, now, repo, query, content[0][1:]):
content[1].append(str(bin))
else:
for param in content[0][1:]:
content[1].append(str(get_issues_count(http_session, repo, jinja_env, query, param)))

with open(path, "w") as f:
writer = csv.writer(f, lineterminator="\n")
Expand All @@ -121,6 +314,8 @@ def update_csv_file(http_session, repo, path):

if __name__ == "__main__":
http_session = requests.Session()
http_session.headers["Accept"] = "application/vnd.github+json"
http_session.headers["X-GitHub-Api-Version"] = "2022-11-28"

if "GITHUB_TOKEN" in os.environ:
http_session.auth = ('x-token', os.environ["GITHUB_TOKEN"])
Expand Down