format output structure

provide better visualisation of error/warning output
philcaz · Oct 19, 2024 · 9989eb7 · 9989eb7
1 parent 6c02bba
commit 9989eb7
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 24 deletions.
diff --git a/.github/workflows/quality-check.yml b/.github/workflows/quality-check.yml
@@ -15,10 +15,6 @@ jobs:
       with:
         python-version: '3.x'
 
-    - name: Install dependencies
-      run: |
-        pip install -r requirements.txt || true  # Only if you have dependencies listed here
-
     - name: Run Quality Check
       run: |
         python ./scripts/check_quality.py

diff --git a/scripts/check_quality.py b/scripts/check_quality.py
@@ -9,21 +9,35 @@
 SUMMARY_FILE_PATH = "./check_quality_summary.txt"
 errors = []
 warnings = []
+
+# Error and Warning Counts
+error_counts = {
+    'ERROR Wrong Escape': 0,
+    'ERROR Wrong Starting Letter': 0,
+    'ERROR Non-UTF8': 0
+}
+warning_counts = {
+    'WARN Duplicate FullName/Abbreviation': 0,
+    'WARN Same Abbreviation as Full Name': 0,
+    'WARN Outdated Manage Abbreviation': 0
+}
 # Error tracking
-def error(message):
-    errors.append(f"ERROR: {message}")
+def error(message, error_type):
+    errors.append((error_type, f"ERROR: {message}"))
+    error_counts[error_type] += 1
 
 # Warning tracking
-def warning(message):
-    warnings.append(f"WARN: {message}")
+def warning(message, warning_type):
+    warnings.append((warning_type, f"WARN: {message}"))
+    warning_counts[warning_type] += 1
 
 # Check if non-UTF8 characters are present in the file
 def check_non_utf8_characters(filepath):
     try:
         with open(filepath, 'r', encoding='utf-8') as f:
             f.read()
     except UnicodeDecodeError:
-        error(f"File {filepath} contains non-UTF8 characters")
+        error(f"File {filepath} contains non-UTF8 characters", 'ERROR Non-UTF8')
 
 # Check if there are wrong escape characters in abbreviation entries
 def check_wrong_escape(filepath):
@@ -32,15 +46,15 @@ def check_wrong_escape(filepath):
         for line_number, row in enumerate(reader, start=1):
             for field in row:
                 if re.search(r"[a-zA-Z]*\\[,\"]", field):
-                    error(f"Wrong escape character found in {filepath} at line {line_number}: {field}")
+                    error(f"Wrong escape character found in {filepath} at line {line_number}: {field}", 'ERROR Wrong Escape')
 
 # Check for wrong beginning letters in journal abbreviations
 def check_wrong_beginning_letters(filepath):
     with open(filepath, 'r', encoding='utf-8') as f:
         reader = csv.reader(f)
         for line_number, row in enumerate(reader, start=1):
             if row[0].startswith("\""):
-                error(f"Wrong beginning letter found in {filepath} at line {line_number}: {row[0]}")
+                error(f"Wrong beginning letter found in {filepath} at line {line_number}: {row[0]}", 'ERROR Wrong Starting Letter')
 
 # Check for duplicate entries
 def check_duplicates(filepath):
@@ -50,7 +64,7 @@ def check_duplicates(filepath):
         for line_number, row in enumerate(reader, start=1):
             line = ','.join(row)
             if line in entries:
-                warning(f"Duplicate found in {filepath} at line {line_number}: {line}, first instance seen at line {entries[line]}")
+                warning(f"Duplicate found in {filepath} at line {line_number}: {line}, first instance seen at line {entries[line]}", 'WARN Duplicate FullName/Abbreviation')
             else:
                 entries[line] = line_number
 
@@ -60,15 +74,15 @@ def check_full_form_identical_to_abbreviation(filepath):
         reader = csv.reader(f)
         for line_number, row in enumerate(reader, start=1):
             if len(row) == 2 and row[0].strip() == row[1].strip() and ' ' in row[0].strip():
-                warning(f"Abbreviation is the same as full form in {filepath} at line {line_number}: {row[0]}")
+                warning(f"Abbreviation is the same as full form in {filepath} at line {line_number}: {row[0]}", 'WARN Same Abbreviation as Full Name')
 
 # Check for outdated abbreviations
 def check_outdated_abbreviations(filepath):
     with open(filepath, 'r', encoding='utf-8') as f:
         reader = csv.reader(f)
         for line_number, row in enumerate(reader, start=1):
             if "Manage." in row and "Manag." not in row:
-                warning(f"Outdated abbreviation used in {filepath} at line {line_number}: {','.join(row)}")
+                warning(f"Outdated abbreviation used in {filepath} at line {line_number}: {','.join(row)}", 'WARN Outdated Manage Abbreviation')
 
 if __name__ == "__main__":
     if not os.path.exists(JOURNALS_FOLDER_PATH):
@@ -89,19 +103,40 @@ def check_outdated_abbreviations(filepath):
             check_outdated_abbreviations(filepath)
 
     # Write the summary to a file
+    total_issues = sum(error_counts.values()) + sum(warning_counts.values())
     with open(SUMMARY_FILE_PATH, 'w') as summary_file:
+        # Write summary table with vertical headers
+        summary_file.write(f"Total: {total_issues}\n")
+        summary_file.write(f"ERROR Wrong Escape: {error_counts['ERROR Wrong Escape']}\n")
+        summary_file.write(f"ERROR Wrong Starting Letter: {error_counts['ERROR Wrong Starting Letter']}\n")
+        summary_file.write(f"ERROR Non-UTF8: {error_counts['ERROR Non-UTF8']}\n")
+        summary_file.write(f"WARN Duplicate FullName/Abbreviation: {warning_counts['WARN Duplicate FullName/Abbreviation']}\n")
+        summary_file.write(f"WARN Same Abbreviation as Full Name: {warning_counts['WARN Same Abbreviation as Full Name']}\n")
+        summary_file.write(f"WARN Outdated Manage Abbreviation: {warning_counts['WARN Outdated Manage Abbreviation']}\n")
+
+        # Write detailed errors and warnings
         if errors or warnings:
-            summary_file.write("Quality Check Summary:\n")
-            if errors:
-                summary_file.write("\nErrors:\n")
-                for err in errors:
-                    summary_file.write(f"{err}\n")
-            if warnings:
-                summary_file.write("\nWarnings:\n")
-                for warn in warnings:
-                    summary_file.write(f"{warn}\n")
+            summary_file.write("\nQuality Check Summary:\n")
+            for subtitle in [
+                'ERROR Wrong Escape', 
+                'ERROR Wrong Starting Letter', 
+                'ERROR Non-UTF8',
+                'WARN Duplicate FullName/Abbreviation',
+                'WARN Same Abbreviation as Full Name',
+                'WARN Outdated Manage Abbreviation'
+            ]:
+                # Write subtitle and corresponding messages
+                filtered_errors = [err for err_type, err in errors if err_type == subtitle]
+                filtered_warnings = [warn for warn_type, warn in warnings if warn_type == subtitle]
+                if filtered_errors or filtered_warnings:
+                    count = len(filtered_errors) + len(filtered_warnings)
+                    summary_file.write(f"\n{subtitle}: with {count} instances\n")
+                    for err in filtered_errors:
+                        summary_file.write(f"{err}\n")
+                    for warn in filtered_warnings:
+                        summary_file.write(f"{warn}\n")
         else:
-            summary_file.write("Quality check completed with no errors or warnings.\n")
+            summary_file.write("\nQuality check completed with no errors or warnings.\n")
 
     # Print summary and set exit code
     if errors: