diff --git a/data_filter.py b/data_filter.py index 281d218..cbf3f6d 100644 --- a/data_filter.py +++ b/data_filter.py @@ -3,14 +3,14 @@ class DataFilter: - def __init__(self, file_extensions: List[str], path_prefix: str, keyword: str): - self.file_extensions = file_extensions + def __init__(self, languages: List[str], path_prefix: str, keyword: str): + self.languages = languages self.path_prefix = path_prefix self.keyword = keyword def matches_file_extensions(self, path: str) -> bool: - for ext in self.file_extensions: - if path.endswith(ext): + for ext in self.languages: + if path.endswith(f".{ext.lower()}"): return True return False diff --git a/main.py b/main.py index 8b726ba..83f8312 100644 --- a/main.py +++ b/main.py @@ -17,9 +17,9 @@ class App: - def __init__(self, file_extensions: List[str], path_prefix: str, keyword: str): + def __init__(self, languages: List[str], path_prefix: str, keyword: str): self.client = GithubClient() - self.data_filter = DataFilter(file_extensions=file_extensions, + self.data_filter = DataFilter(languages=languages, path_prefix=path_prefix, keyword=keyword) self.commit_cache = SingleFileCache(location=COMMIT_CACHE_FILE) self.repo_cache = MultiFileCache(location=REPO_CACHE_FILE) @@ -45,13 +45,12 @@ def get_repository_by_commit(self, repository, commit): def main(args): - file_extensions = [ - ".java", - ".groovy" + languages = [ + "groovy" ] app = App( - file_extensions=file_extensions, + languages=languages, path_prefix="instrumentation/", keyword="test" ) @@ -67,30 +66,29 @@ def main(args): commit=commit ) count = count_by_file_extension(files=repo_files["files"], - file_extensions=file_extensions) + languages=languages) if count: - result[snapshot] = { - "date": snapshot, - "java": count[".java"], - "groovy": count[".groovy"] - } + result[snapshot]["date"] = snapshot + for language in languages: + result[snapshot][language] = count[language] except Exception as e: print(f"Error for {snapshot}, {e}") dates = [] - java_counts = [] - groovy_counts = [] + + language_counts = {} for item in result.values(): - date = item["date"][:10] - java_count = item["java"] - groovy_count = item["groovy"] - dates.append(date) - java_counts.append(java_count) - groovy_counts.append(groovy_count) - - plt.plot(dates, java_counts, label='Java') - plt.plot(dates, groovy_counts, label='Groovy') + dates.append(item["date"][:10]) + for language in languages: + try: + language_counts[language].append(item[language]) + except KeyError: + language_counts[language] = [item[language]] + + for lang, counts in language_counts.items(): + plt.plot(dates, counts, label=lang.capitalize()) + plt.xlabel('Date') plt.ylabel('Count') plt.title('Test Classes by Lang in Instrumentation Directory') diff --git a/tests/test_data_filter.py b/tests/test_data_filter.py index 723fb29..4a7b74b 100644 --- a/tests/test_data_filter.py +++ b/tests/test_data_filter.py @@ -7,9 +7,9 @@ class ParseDataTestCase(unittest.TestCase): def __init__(self, *args, **kwargs): - self.file_extensions = [ - ".java", - ".groovy" + self.languages = [ + "java", + "groovy" ] super(ParseDataTestCase, self).__init__(*args, **kwargs) @@ -84,7 +84,7 @@ def test_filters_payload_to_expected_files_based_on_criteria(self): """ data = json.loads(test_json) - data_filter = DataFilter(file_extensions=self.file_extensions, + data_filter = DataFilter(languages=self.languages, path_prefix="instrumentation/", keyword="test") test = data_filter.parse_data(payload=data) @@ -111,7 +111,7 @@ def test_given_url_with_different_case_than_keyword_still_filters_correctly(self """ data = json.loads(payload) - data_filter = DataFilter(file_extensions=self.file_extensions, + data_filter = DataFilter(languages=self.languages, path_prefix="instrumentation/", keyword="test") test = data_filter.parse_data(payload=data) diff --git a/utilities.py b/utilities.py index 97836c4..a8ccf1f 100644 --- a/utilities.py +++ b/utilities.py @@ -27,10 +27,11 @@ def get_dates_between(start_date_str, end_date, interval): return date_list -def count_by_file_extension(files: List[str], file_extensions: List[str]) -> dict: +def count_by_file_extension(files: List[str], languages: List[str]) -> dict: file_counts = defaultdict(int) for file in files: - for ext in file_extensions: - if file.endswith(ext): + for ext in languages: + extension = f".{ext.lower()}" + if file.endswith(extension): file_counts[ext] += 1 return file_counts