Skip to content

Commit

Permalink
list files in count by script (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
jaydeluca authored Dec 24, 2024
1 parent 25bf048 commit e70ae84
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
Expand Down
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,8 @@ __pycache__/*
tests/cache
tests/cache/*

.coverage
.coverage

latest-groovy.png
latest-with-java.png
latest-groovy-detailed.png
19 changes: 13 additions & 6 deletions count_by_instrumentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@


class App:
def __init__(self, languages: List[str], path_prefix: str, keyword: str):
self.client = GithubClient()
def __init__(self, languages: List[str], path_prefix: str, keyword: str,
client: GithubClient = GithubClient()):
self.client = client
self.data_filter = DataFilter(languages=languages,
path_prefix=path_prefix, keyword=keyword)

Expand All @@ -35,19 +36,21 @@ def main(args):
keyword="test"
)

today = datetime.now().date().strftime("%Y-%m-%dT%H:%M:%SZ")
today = (datetime.now().date() + pd.Timedelta(days=1)).strftime(
"%Y-%m-%dT%H:%M:%SZ")

commit = app.get_commit_by_date(date=today, repository=args.repo)
repo_files = app.get_repository_by_commit(
repository=args.repo,
commit=commit
)
file_counts, file_sizes = count_by_language_and_file_extension(
file_info = count_by_language_and_file_extension(
files=repo_files["files"],
languages=[args.language])

# Print the table showing file counts and sizes
data = [(key, file_counts[key], file_sizes[key]) for key in file_counts.keys()]
data = [(key, file_info.file_counts[key], file_info.file_sizes[key]) for key in
file_info.file_counts.keys()]
df2 = pd.DataFrame(data, columns=['Key', 'File Count', 'Total File Size'])
df2 = df2.sort_values(by='Total File Size', key=lambda col: col.astype(int),
ascending=False)
Expand All @@ -56,7 +59,7 @@ def main(args):
print(f"| Total | {df2['File Count'].sum()} | {df2['Total File Size'].sum()} |")

# Create a pie chart for file counts only
df = pd.DataFrame(list(file_counts.items()), columns=['Key', 'Value'])
df = pd.DataFrame(list(file_info.file_counts.items()), columns=['Key', 'Value'])
df = df.sort_values(by='Value', key=lambda col: col.astype(int), ascending=False)

sns.set_theme()
Expand All @@ -69,6 +72,10 @@ def main(args):
plt.title(f'Remaining {args.language} files by Instrumentation')
plt.ylabel('')

print("\n")
for item in file_info.matched_files:
print(item)

if args.output is not None:
plt.savefig(args.output)
else:
Expand Down
1 change: 1 addition & 0 deletions data_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def get_file_counts_and_lengths(self, payload):
for i in tree:
if self.matches_meta(i) \
and self.matches_file_extensions(i["path"]) \
and "grails" not in i["path"] \
and self.matches_directory(i["path"]):
data_result.append(CodeFile(path=i["path"], size=i["size"]))

Expand Down
4 changes: 4 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ def main(args):

for lang, counts in language_counts.items():
df = pd.DataFrame({'Date': dates, 'Count': counts})

# remove any junk data
df = df[df['Count'] != 0]

df['Date'] = pd.to_datetime(df['Date'])
sns.lineplot(x='Date', y='Count', label=lang.capitalize(), data=df, marker='o')

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ pytest-cov
ruff==0.5.7
seaborn
pandas
tabulate
tabulate
six==1.17.0
19 changes: 16 additions & 3 deletions utilities.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
from dataclasses import dataclass
from datetime import datetime, timedelta
from collections import defaultdict
from typing import List, Dict

from CodeFile import CodeFile


@dataclass
class FileCountInfo:
file_counts: Dict[str, int]
file_sizes: Dict[str, int]
matched_files: List[str]


def get_dates_between(start_date_str, end_date, interval):
date_format = "%Y-%m-%d"
output_format = "%Y-%m-%dT%H:%M:%SZ"
Expand Down Expand Up @@ -38,12 +46,14 @@ def count_by_file_extension(files: List[str], languages: List[str]) -> dict:
for file in files:
for ext in languages:
extension = f".{ext.lower()}"
if file.endswith(extension):
if file.endswith(extension) and "grails" not in file:
file_counts[ext] += 1
return file_counts


def count_by_language_and_file_extension(files: List[CodeFile], languages: List[str]) -> Dict[str, Dict[str, int]]:
def count_by_language_and_file_extension(files: List[CodeFile],
languages: List[str]) -> FileCountInfo:
matched_files = []
file_counts = defaultdict(int)
file_sizes = defaultdict(int)
for file in files:
Expand All @@ -55,7 +65,10 @@ def count_by_language_and_file_extension(files: List[CodeFile], languages: List[
if extension in languages:
file_counts[instrumentation] += 1
file_sizes[instrumentation] += file.size
return file_counts, file_sizes
matched_files.append(file.path)

return FileCountInfo(file_counts=file_counts, file_sizes=file_sizes,
matched_files=matched_files)


def convert_to_plot(input_dict: dict, items):
Expand Down

0 comments on commit e70ae84

Please sign in to comment.