-
Notifications
You must be signed in to change notification settings - Fork 0
/
count_by_instrumentation.py
93 lines (72 loc) · 3.28 KB
/
count_by_instrumentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from datetime import datetime
from typing import List
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import argparse
from data_filter import DataFilter
from utilities import count_by_language_and_file_extension
from github_client import GithubClient
class App:
def __init__(self, languages: List[str], path_prefix: str, keyword: str):
self.client = GithubClient()
self.data_filter = DataFilter(languages=languages,
path_prefix=path_prefix, keyword=keyword)
def get_commit_by_date(self, repository, date):
return self.client.get_most_recent_commit(repository, date, "main")
def get_repository_by_commit(self, repository, commit):
repo_data = self.client.get_repository_at_commit(repository, commit)
repo_data = self.data_filter.get_file_counts_and_lengths(repo_data)
return repo_data
def main(args):
app = App(
languages=[args.language],
path_prefix="instrumentation/",
keyword="test"
)
today = datetime.now().date().strftime("%Y-%m-%dT%H:%M:%SZ")
commit = app.get_commit_by_date(date=today, repository=args.repo)
repo_files = app.get_repository_by_commit(
repository=args.repo,
commit=commit
)
file_counts, file_sizes = count_by_language_and_file_extension(
files=repo_files["files"],
languages=[args.language])
# Print the table showing file counts and sizes
data = [(key, file_counts[key], file_sizes[key]) for key in file_counts.keys()]
df2 = pd.DataFrame(data, columns=['Key', 'File Count', 'Total File Size'])
df2 = df2.sort_values(by='Total File Size', key=lambda col: col.astype(int),
ascending=False)
print(df2.to_markdown(index=False))
print(f"| Total | {df2['File Count'].sum()} | {df2['Total File Size'].sum()} |")
# Create a pie chart for file counts only
df = pd.DataFrame(list(file_counts.items()), columns=['Key', 'Value'])
df = df.sort_values(by='Value', key=lambda col: col.astype(int), ascending=False)
sns.set_theme()
colors = sns.color_palette('pastel')[0:len(df)]
explode = [0.05] * len(df) # this will "explode" each slice from the pie
df.set_index('Key')['Value'].plot.pie(autopct='%1.0f%%', colors=colors,
explode=explode)
plt.title(f'Remaining {args.language} files by Instrumentation')
plt.ylabel('')
if args.output is not None:
plt.savefig(args.output)
else:
plt.show()
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Show Pie chart of file count in test folders')
parser.add_argument("-r", "--repo",
help="Repository name. "
"ex: open-telemetry/opentelemetry-java-instrumentation",
required=True)
parser.add_argument("-l", "--language",
help="Language to analyze"
"ex: groovy",
required=True)
parser.add_argument("-o", "--output",
help="File name to output graph to (leave blank and no file is generated)."
"ex: pie-chart-counts.png")
arguments = parser.parse_args()
main(arguments)