Skip to content

Commit

Permalink
update scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
SkafteNicki committed Nov 25, 2024
1 parent 8499229 commit a021306
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
26 changes: 25 additions & 1 deletion tools/repo_stats/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class RepoStats(BaseModel):
average_commit_length: float | None
contributions_per_contributor: list[int] | None
total_commits: int | None
activity_matrix: list[list[int]] | None

num_docker_files: int | None
num_python_files: int | None
num_workflow_files: int | None
Expand All @@ -34,6 +36,8 @@ class RepoStats(BaseModel):
using_dvc: bool | None
repo_size: float | None
readme_length: int | None
actions_passing: bool | None

num_warnings: int | None


Expand Down Expand Up @@ -170,6 +174,26 @@ def readme_length(self) -> int:
return len(plain_text.split())
return 0

@property
def actions_passing(self) -> bool:
"""Returns True if the GitHub Actions are passing."""
commit_url = f"{self.repo_api}/commits/{self.default_branch}"
commit_response = requests.get(commit_url, headers=headers, timeout=100).json()
latest_commit = commit_response["sha"]

workflow_url = f"{self.repo_api}/actions/runs?branch={self.default_branch}&event=push"
workflow_response = requests.get(workflow_url, headers=headers, timeout=100).json()
workflow_runs = workflow_response["workflow_runs"]

all_passing = True
for w_run in workflow_runs:
if w_run["head_sha"] == latest_commit and (
w_run["status"] != "completed" or w_run["conclusion"] != "success"
):
all_passing = False
break
return all_passing


class GroupInfo(BaseModel):
"""Model for group information."""
Expand Down Expand Up @@ -238,7 +262,7 @@ def prs(self):
return None

@property
def commits(self):
def commits(self) -> list:
"""Returns all commits to the default branch."""
if self.repo_accessible:
commits = []
Expand Down
34 changes: 33 additions & 1 deletion tools/repo_stats/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
from pathlib import Path

import numpy as np
import requests
from dotenv import load_dotenv
from google.cloud.storage import Client
Expand Down Expand Up @@ -57,6 +58,27 @@ def load_data(file_name: str) -> list[GroupInfo]:
return content


def create_activity_matrix(commits: list, max_delta: int = 5, normalize: bool = True) -> list[list[int]]:
"""Creates an activity matrix from the commits."""
commit_times = [datetime.datetime.fromisoformat(commit["commit"]["committer"]["date"][:-1]) for commit in commits]
commit_times.sort()

start_time = commit_times[0]
end_time = min(start_time + datetime.timedelta(weeks=max_delta), commit_times[-1])

num_days = (end_time - start_time).days + 1 # include last day

commit_matrix = np.zeros((num_days, 24), dtype=int)

for commit_time in commit_times:
if start_time <= commit_time <= end_time:
day_index = (commit_time - start_time).days
hour_index = commit_time.hour
commit_matrix[day_index, hour_index] += 1

return commit_matrix.tolist()


app = Typer()


Expand Down Expand Up @@ -100,6 +122,10 @@ def main():
and contributor.login == commit["author"]["login"]
):
contributor.commits_pr += 1
commits += pr_commits

activity_matrix = create_activity_matrix(commits)

average_commit_length = sum([len(c) for c in commit_messages]) / len(commit_messages)

contributions_per_contributor = [c.total_commits for c in contributors]
Expand All @@ -116,6 +142,7 @@ def main():
using_dvc = repo_content.using_dvc
repo_size = repo_content.repo_size
readme_length = repo_content.readme_length
actions_passing = repo_content.actions_passing

report = Report(
group_number=group.group_number, repo_api=group.repo_api, default_branch=group.default_branch
Expand All @@ -132,6 +159,7 @@ def main():
total_commits = None
contributions_per_contributor = None
total_commits = None
activity_matrix = None

num_docker_files = None
num_python_files = None
Expand All @@ -141,6 +169,7 @@ def main():
using_dvc = None
repo_size = None
readme_length = None
actions_passing = None

num_warnings = None

Expand All @@ -155,6 +184,7 @@ def main():
average_commit_length=average_commit_length,
contributions_per_contributor=contributions_per_contributor,
total_commits=total_commits,
activity_matrix=activity_matrix,
num_docker_files=num_docker_files,
num_python_files=num_python_files,
num_workflow_files=num_workflow_files,
Expand All @@ -163,12 +193,14 @@ def main():
using_dvc=using_dvc,
repo_size=repo_size,
readme_length=readme_length,
actions_passing=actions_passing,
num_warnings=num_warnings,
)
repo_stats.append(repo_stat)

logger.info("Writing repo stats to file")
filename = f"repo_stats_{datetime.datetime.now(tz=datetime.UTC).strftime("%Y_%m_%d_%H_%M_%S")}.json"
now = datetime.datetime.now(tz=datetime.UTC).strftime("%Y_%m_%d_%H_%M_%S")
filename = f"repo_stats_{now}.json"
with open("repo_stats.json", "w") as f:
json.dump([r.model_dump() for r in repo_stats], f)
with open(filename, "w") as f:
Expand Down

0 comments on commit a021306

Please sign in to comment.