From 27f91d7edd1a006ae43f7b500c022d97bdb5d40b Mon Sep 17 00:00:00 2001
From: Onewbiek <yankun0213@gmail.com>
Date: Thu, 24 Aug 2023 13:22:38 -0400
Subject: [PATCH 01/19] Init .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2ed7bdc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+build/
+dist/
+*.egg-info/
\ No newline at end of file

From 79e61cb4caa7b3c044a0dced5188aac748627204 Mon Sep 17 00:00:00 2001
From: Onewbiek <yankun0213@gmail.com>
Date: Thu, 24 Aug 2023 23:37:39 -0400
Subject: [PATCH 02/19] extend drishti to support Recorder traces

---
 drishti/{main.py => handle_darshan.py} |  397 +-------
 drishti/handle_recorder.py             | 1185 ++++++++++++++++++++++++
 drishti/includes.py                    |  192 ++++
 drishti/reporter.py                    |  132 +++
 requirements.txt                       |    1 +
 setup.py                               |    5 +-
 6 files changed, 1563 insertions(+), 349 deletions(-)
 rename drishti/{main.py => handle_darshan.py} (82%)
 create mode 100644 drishti/handle_recorder.py
 create mode 100644 drishti/includes.py
 create mode 100644 drishti/reporter.py

diff --git a/drishti/main.py b/drishti/handle_darshan.py
similarity index 82%
rename from drishti/main.py
rename to drishti/handle_darshan.py
index 3afb96c..6d4e70f 100644
--- a/drishti/main.py
+++ b/drishti/handle_darshan.py
@@ -9,7 +9,6 @@
 import shlex
 import shutil
 import datetime
-import argparse
 import subprocess
 
 import pandas as pd
@@ -17,230 +16,17 @@
 import darshan
 import darshan.backend.cffi_backend as darshanll
 
-from rich import print, box, rule
-from rich.console import Console, Group
+from rich import print, box
+from rich.console import Group
 from rich.padding import Padding
-from rich.text import Text
 from rich.syntax import Syntax
 from rich.panel import Panel
 from rich.terminal_theme import TerminalTheme
 from rich.terminal_theme import MONOKAI
-from subprocess import call
 
 from packaging import version
 
-
-RECOMMENDATIONS = 0
-HIGH = 1
-WARN = 2
-INFO = 3
-OK = 4
-
-ROOT = os.path.abspath(os.path.dirname(__file__))
-
-TARGET_USER = 1
-TARGET_DEVELOPER = 2
-TARGET_SYSTEM = 3
-
-insights_operation = []
-insights_metadata = []
-insights_dxt = []
-
-insights_total = dict()
-
-insights_total[HIGH] = 0
-insights_total[WARN] = 0
-insights_total[RECOMMENDATIONS] = 0
-
-THRESHOLD_OPERATION_IMBALANCE = 0.1
-THRESHOLD_SMALL_REQUESTS = 0.1
-THRESHOLD_SMALL_REQUESTS_ABSOLUTE = 1000
-THRESHOLD_MISALIGNED_REQUESTS = 0.1
-THRESHOLD_METADATA = 0.1
-THRESHOLD_METADATA_TIME_RANK = 30  # seconds
-THRESHOLD_RANDOM_OPERATIONS = 0.2
-THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE = 1000
-THRESHOLD_STRAGGLERS = 0.15
-THRESHOLD_IMBALANCE = 0.30
-THRESHOLD_INTERFACE_STDIO = 0.1
-THRESHOLD_COLLECTIVE_OPERATIONS = 0.5
-THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE = 1000
-
-INSIGHTS_STDIO_HIGH_USAGE = 'S01'
-INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE = 'P01'
-INSIGHTS_POSIX_READ_COUNT_INTENSIVE = 'P02'
-INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE = 'P03'
-INSIGHTS_POSIX_READ_SIZE_INTENSIVE = 'P04'
-INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE = 'P05'
-INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE = 'P06'
-INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE = 'P07'
-INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE = 'P08'
-INSIGHTS_POSIX_REDUNDANT_READ_USAGE = 'P09'
-INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE = 'P10'
-INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE = 'P11'
-INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE = 'P12'
-INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE = 'P13'
-INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE = 'P14'
-INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE = 'P15'
-INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE = 'P16'
-INSIGHTS_POSIX_HIGH_METADATA_TIME = 'P17'
-INSIGHTS_POSIX_SIZE_IMBALANCE = 'P18'
-INSIGHTS_POSIX_TIME_IMBALANCE = 'P19'
-INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE = 'P21'
-INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE = 'P22'
-INSIGHTS_MPI_IO_NO_USAGE = 'M01'
-INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE = 'M02'
-INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE = 'M03'
-INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE = 'M04'
-INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE = 'M05'
-INSIGHTS_MPI_IO_BLOCKING_READ_USAGE = 'M06'
-INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE = 'M07'
-INSIGHTS_MPI_IO_AGGREGATORS_INTRA = 'M08'
-INSIGHTS_MPI_IO_AGGREGATORS_INTER = 'M09'
-INSIGHTS_MPI_IO_AGGREGATORS_OK = 'M10'
-
-# TODO: need to verify the threashold to be between 0 and 1
-# TODO: read thresholds from file
-
-parser = argparse.ArgumentParser(
-    description='Drishti: '
-)
-
-parser.add_argument(
-    'darshan',
-    help='Input .darshan file'
-)
-
-parser.add_argument(
-    '--issues',
-    default=False,
-    action='store_true',
-    dest='only_issues',
-    help='Only displays the detected issues and hides the recommendations'
-)
-
-parser.add_argument(
-    '--html',
-    default=False,
-    action='store_true',
-    dest='export_html',
-    help='Export the report as an HTML page'
-)
-
-parser.add_argument(
-    '--svg',
-    default=False,
-    action='store_true',
-    dest='export_svg',
-    help='Export the report as an SVG image'
-)
-
-parser.add_argument(
-    '--light',
-    default=False,
-    action='store_true',
-    dest='export_theme_light',
-    help='Use a light theme for the report when generating files'
-)
-
-parser.add_argument(
-    '--size',
-    default=False,
-    dest='export_size',
-    help='Console width used for the report and generated files'
-)
-
-parser.add_argument(
-    '--verbose',
-    default=False,
-    action='store_true',
-    dest='verbose',
-    help='Display extended details for the recommendations'
-)
-
-parser.add_argument(
-    '--code',
-    default=False,
-    action='store_true',
-    dest='code',
-    help='Display insights identification code'
-)
-
-parser.add_argument(
-    '--path',
-    default=False,
-    action='store_true',
-    dest='full_path',
-    help='Display the full file path for the files that triggered the issue'
-)
-
-parser.add_argument(
-    '--csv',
-    default=False,
-    action='store_true',
-    dest='export_csv',
-    help='Export a CSV with the code of all issues that were triggered'
-)
-
-parser.add_argument(
-    '--json', 
-    default=False, 
-    dest='json',
-    help=argparse.SUPPRESS)
-
-args = parser.parse_args()
-
-if args.export_size:
-    console = Console(record=True, width=int(args.export_size))
-else:
-    console = Console(record=True)
-
-csv_report = []
-
-
-def validate_thresholds():
-    """
-    Validate thresholds defined by the user.
-    """
-    assert(THRESHOLD_OPERATION_IMBALANCE >= 0.0 and THRESHOLD_OPERATION_IMBALANCE <= 1.0)
-    assert(THRESHOLD_SMALL_REQUESTS >= 0.0 and THRESHOLD_SMALL_REQUESTS <= 1.0)
-    assert(THRESHOLD_MISALIGNED_REQUESTS >= 0.0 and THRESHOLD_MISALIGNED_REQUESTS <= 1.0)
-    assert(THRESHOLD_METADATA >= 0.0 and THRESHOLD_METADATA <= 1.0)
-    assert(THRESHOLD_RANDOM_OPERATIONS >= 0.0 and THRESHOLD_RANDOM_OPERATIONS <= 1.0)
-
-    assert(THRESHOLD_METADATA_TIME_RANK >= 0.0)
-
-
-def clear():
-    """
-    Clear the screen with the comment call based on the operating system.
-    """
-    _ = call('clear' if os.name == 'posix' else 'cls')
-
-
-def convert_bytes(bytes_number):
-    """
-    Convert bytes into formatted string.
-    """
-    tags = [
-        'bytes',
-        'KB',
-        'MB',
-        'GB',
-        'TB',
-        'PB',
-        'EB'
-    ]
-
-    i = 0
-    double_bytes = bytes_number
-
-    while (i < len(tags) and  bytes_number >= 1024):
-        double_bytes = bytes_number / 1024.0
-        i = i + 1
-        bytes_number = bytes_number / 1024
-
-    return str(round(double_bytes, 2)) + ' ' + tags[i] 
+from .includes import *
 
 
 def is_available(name):
@@ -249,71 +35,6 @@ def is_available(name):
     return shutil.which(name) is not None
 
 
-def message(code, target, level, issue, recommendations=None, details=None):
-    """
-    Display the message on the screen with level, issue, and recommendation.
-    """
-    icon = ':arrow_forward:'
-
-    if level in (HIGH, WARN):
-        insights_total[level] += 1
-
-    if level == HIGH:
-        color = '[red]'
-    elif level == WARN:
-        color = '[orange1]'
-    elif level == OK:
-        color = '[green]'
-    else:
-        color = ''
-
-    messages = [
-        '{}{}{} {}'.format(
-            color,
-            icon,
-            ' [' + code + ']' if args.code else '',
-            issue
-        )
-    ]
-
-    if args.export_csv:
-        csv_report.append(code)
-
-    if details:
-        for detail in details:
-            messages.append('  {}:left_arrow_curving_right: {}'.format(
-                    color,
-                    detail['message']
-                )
-            )
-
-    if recommendations:
-        if not args.only_issues:
-            messages.append('  [white]:left_arrow_curving_right: [b]Recommendations:[/b]')
-
-            for recommendation in recommendations:
-                messages.append('    :left_arrow_curving_right: {}'.format(recommendation['message']))
-
-                if args.verbose and 'sample' in recommendation:
-                    messages.append(
-                        Padding(
-                            Panel(
-                                recommendation['sample'],
-                                title='Solution Example Snippet',
-                                title_align='left',
-                                padding=(1, 2)
-                            ),
-                            (1, 0, 1, 7)
-                        )
-                    )
-
-        insights_total[RECOMMENDATIONS] += len(recommendations)
-
-    return Group(
-        *messages
-    )
-
-
 def check_log_version(file, log_version, library_version):
     use_file = file
 
@@ -363,18 +84,13 @@ def check_log_version(file, log_version, library_version):
     return use_file
 
 
-def main():
-    if not os.path.isfile(args.darshan):
-        print('Unable to open .darshan file.')
-
-        sys.exit(os.EX_NOINPUT)
-
-    # clear()
+def handler(args):
+    init_console(args)
     validate_thresholds()
 
     insights_start_time = time.time()
 
-    log = darshanll.log_open(args.darshan)
+    log = darshanll.log_open(args.log_path)
 
     modules = darshanll.log_get_modules(log)
 
@@ -384,7 +100,7 @@ def main():
     library_version = darshanll.darshan.backend.cffi_backend.get_lib_version()
 
     # Make sure log format is of the same version
-    filename = check_log_version(args.darshan, log_version, library_version)
+    filename = check_log_version(args.log_path, log_version, library_version)
  
     darshanll.log_close(log)
 
@@ -491,8 +207,6 @@ def main():
             'mpiio': uses_mpiio
         }
 
-    df_posix_files = df_posix
-
     if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
         issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
             total_size_stdio / total_size * 100.0,
@@ -506,7 +220,7 @@ def main():
         ]
 
         insights_operation.append(
-            message(INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+            message(args, INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
         )
 
     if 'MPI-IO' not in modules:
@@ -519,7 +233,7 @@ def main():
         ]
 
         insights_operation.append(
-            message(INSIGHTS_MPI_IO_NO_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+            message(args, INSIGHTS_MPI_IO_NO_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
         )
 
     #########################################################################################################################################################################
@@ -527,10 +241,6 @@ def main():
     if 'POSIX' in report.records:
         df = report.records['POSIX'].to_df()
 
-        #print(df)
-        #print(df['counters'].columns)
-        #print(df['fcounters'].columns)
-
         #########################################################################################################################################################################
 
         # Get number of write/read operations
@@ -547,7 +257,7 @@ def main():
             )
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+                message(args, INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
             )
 
         if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
@@ -556,7 +266,7 @@ def main():
             )
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_READ_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+                message(args, INSIGHTS_POSIX_READ_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
             )
 
         total_read_size = df['counters']['POSIX_BYTES_READ'].sum()
@@ -564,22 +274,22 @@ def main():
 
         total_size = total_written_size + total_read_size
 
-        if total_written_size > total_read_size and abs(total_written_size - total_read_size) / (total_written_size + total_read_size) > THRESHOLD_OPERATION_IMBALANCE:
+        if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
             issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
-                total_written_size / (total_written_size + total_read_size) * 100.0, total_read_size / (total_written_size + total_read_size) * 100.0
+                total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
             )
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+                message(args, INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
             )
 
-        if total_read_size > total_written_size and abs(total_written_size - total_read_size) / (total_written_size + total_read_size) > THRESHOLD_OPERATION_IMBALANCE:
+        if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
             issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
-                total_written_size / (total_written_size + total_read_size) * 100.0, total_read_size / (total_written_size + total_read_size) * 100.0
+                total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
             )
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_READ_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+                message(args, INSIGHTS_POSIX_READ_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
             )
 
         #########################################################################################################################################################################
@@ -657,7 +367,7 @@ def main():
                 )
 
             insights_operation.append(
-                message(INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
             )
 
         # Get the number of small I/O operations (less than the stripe size)
@@ -710,7 +420,7 @@ def main():
                 )
 
             insights_operation.append(
-                message(INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
             )
 
         #########################################################################################################################################################################
@@ -726,7 +436,7 @@ def main():
             )
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE, TARGET_DEVELOPER, HIGH, issue, None)
+                message(args, INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE, TARGET_DEVELOPER, HIGH, issue, None)
             )
 
         if total_operations and total_file_not_aligned / total_operations > THRESHOLD_MISALIGNED_REQUESTS:
@@ -760,7 +470,7 @@ def main():
                 )
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                message(args, INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
             )
 
         #########################################################################################################################################################################
@@ -773,7 +483,7 @@ def main():
             issue = 'Application might have redundant read traffic (more data read than the highest offset)'
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_REDUNDANT_READ_USAGE, TARGET_DEVELOPER, WARN, issue, None)
+                message(args, INSIGHTS_POSIX_REDUNDANT_READ_USAGE, TARGET_DEVELOPER, WARN, issue, None)
             )
 
         max_write_offset = df['counters']['POSIX_MAX_BYTE_WRITTEN'].max()
@@ -782,7 +492,7 @@ def main():
             issue = 'Application might have redundant write traffic (more data written than the highest offset)'
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, None)
+                message(args, INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, None)
             )
 
         #########################################################################################################################################################################
@@ -792,6 +502,7 @@ def main():
         read_consecutive = df['counters']['POSIX_CONSEC_READS'].sum()
         #print('READ Consecutive: {} ({:.2f}%)'.format(read_consecutive, read_consecutive / total_reads * 100))
 
+
         read_sequential = df['counters']['POSIX_SEQ_READS'].sum()
         read_sequential -= read_consecutive
         #print('READ Sequential: {} ({:.2f}%)'.format(read_sequential, read_sequential / total_reads * 100))
@@ -812,7 +523,7 @@ def main():
                 ]
 
                 insights_operation.append(
-                    message(INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                    message(args, INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
                 )
             else:
                 issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) read requests'.format(
@@ -821,15 +532,13 @@ def main():
                 )
 
                 insights_operation.append(
-                    message(INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE, TARGET_DEVELOPER, OK, issue, None)
+                    message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE, TARGET_DEVELOPER, OK, issue, None)
                 )
 
         write_consecutive = df['counters']['POSIX_CONSEC_WRITES'].sum()
-        #print('WRITE Consecutive: {} ({:.2f}%)'.format(write_consecutive, write_consecutive / total_writes * 100))
 
         write_sequential = df['counters']['POSIX_SEQ_WRITES'].sum()
         write_sequential -= write_consecutive
-        #print('WRITE Sequential: {} ({:.2f}%)'.format(write_sequential, write_sequential / total_writes * 100))
 
         write_random = total_writes - write_consecutive - write_sequential
         #print('WRITE Random: {} ({:.2f}%)'.format(write_random, write_random / total_writes * 100))
@@ -847,7 +556,7 @@ def main():
                 ]
 
                 insights_operation.append(
-                    message(INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                    message(args, INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
                 )
             else:
                 issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) write requests'.format(
@@ -856,13 +565,12 @@ def main():
                 )
 
                 insights_operation.append(
-                    message(INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE, TARGET_DEVELOPER, OK, issue, None)
+                    message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE, TARGET_DEVELOPER, OK, issue, None)
                 )
 
         #########################################################################################################################################################################
 
         # Shared file with small operations
-        # print(df['counters'].loc[(df['counters']['rank'] == -1)])
 
         shared_files = df['counters'].loc[(df['counters']['rank'] == -1)]
 
@@ -913,7 +621,7 @@ def main():
                 ]
 
                 insights_operation.append(
-                    message(INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                    message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
                 )
 
             total_shared_writes = shared_files['POSIX_WRITES'].sum()
@@ -960,7 +668,7 @@ def main():
                 ]
 
                 insights_operation.append(
-                    message(INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                    message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
                 )
 
         #########################################################################################################################################################################
@@ -991,7 +699,7 @@ def main():
                 )
 
             insights_metadata.append(
-                message(INSIGHTS_POSIX_HIGH_METADATA_TIME, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                message(args, INSIGHTS_POSIX_HIGH_METADATA_TIME, TARGET_DEVELOPER, HIGH, issue, recommendation)
             )
 
         # We already have a single line for each shared-file access
@@ -1046,7 +754,7 @@ def main():
             ]
 
             insights_operation.append(
-                message(INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
+                message(args, INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
             )
 
         # POSIX_F_FASTEST_RANK_TIME
@@ -1101,7 +809,7 @@ def main():
             ]
 
             insights_operation.append(
-                message(INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
+                message(args, INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
             )
 
         aggregated = df['counters'].loc[(df['counters']['rank'] != -1)][
@@ -1164,7 +872,7 @@ def main():
             ]
 
             insights_operation.append(
-                message(INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                message(args, INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
             )
 
         imbalance_count = 0
@@ -1214,7 +922,7 @@ def main():
             ]
 
             insights_operation.append(
-                message(INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                message(args, INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
             )
 
     #########################################################################################################################################################################
@@ -1225,8 +933,6 @@ def main():
 
         df_mpiio['counters'] = df_mpiio['counters'].assign(id=lambda d: d['id'].astype(str))
 
-        #print(df_mpiio)
-
         # Get the files responsible
         detected_files = []
 
@@ -1265,7 +971,7 @@ def main():
                 ]
 
                 insights_operation.append(
-                    message(INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                    message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
                 )
         else:
             issue = 'Application uses MPI-IO and read data using {} ({:.2f}%) collective operations'.format(
@@ -1274,7 +980,7 @@ def main():
             )
 
             insights_operation.append(
-                message(INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, OK, issue)
+                message(args, INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, OK, issue)
             )
 
         df_mpiio_collective_writes = df_mpiio['counters']  #.loc[(df_mpiio['counters']['MPIIO_COLL_WRITES'] > 0)]
@@ -1312,7 +1018,7 @@ def main():
                 ]
 
                 insights_operation.append(
-                    message(INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                    message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
                 )
         else:
             issue = 'Application uses MPI-IO and write data using {} ({:.2f}%) collective operations'.format(
@@ -1321,7 +1027,7 @@ def main():
             )
 
             insights_operation.append(
-                message(INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, OK, issue)
+                message(args, INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, OK, issue)
             )
 
         #########################################################################################################################################################################
@@ -1358,7 +1064,7 @@ def main():
                 )
 
             insights_operation.append(
-                message(INSIGHTS_MPI_IO_BLOCKING_READ_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+                message(args, INSIGHTS_MPI_IO_BLOCKING_READ_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
             )
 
         if df_mpiio['counters']['MPIIO_NB_WRITES'].sum() == 0:
@@ -1383,7 +1089,7 @@ def main():
                 )
 
             insights_operation.append(
-                message(INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+                message(args, INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
             )
 
     #########################################################################################################################################################################
@@ -1448,21 +1154,21 @@ def main():
                             ]
 
                             insights_operation.append(
-                                message(INSIGHTS_MPI_IO_AGGREGATORS_INTER, TARGET_USER, HIGH, issue, recommendation)
+                                message(args, INSIGHTS_MPI_IO_AGGREGATORS_INTER, TARGET_USER, HIGH, issue, recommendation)
                             )
 
                         if cb_nodes < NUMBER_OF_COMPUTE_NODES:
                             issue = 'Application is using intra-node aggregators'
 
                             insights_operation.append(
-                                message(INSIGHTS_MPI_IO_AGGREGATORS_INTRA, TARGET_USER, OK, issue)
+                                message(args, INSIGHTS_MPI_IO_AGGREGATORS_INTRA, TARGET_USER, OK, issue)
                             )
 
                         if cb_nodes == NUMBER_OF_COMPUTE_NODES:
                             issue = 'Application is using one aggregator per compute node'
 
                             insights_operation.append(
-                                message(INSIGHTS_MPI_IO_AGGREGATORS_OK, TARGET_USER, OK, issue)
+                                message(args, INSIGHTS_MPI_IO_AGGREGATORS_OK, TARGET_USER, OK, issue)
                             )
 
 
@@ -1491,7 +1197,7 @@ def main():
                     recommendation.append(new_message)
 
                 insights_dxt.append(
-                    message(code, TARGET_DEVELOPER, level, issue, recommendation)
+                    message(args, code, TARGET_DEVELOPER, level, issue, recommendation)
                 )
 
     #########################################################################################################################################################################
@@ -1518,7 +1224,7 @@ def main():
                     job['exe'].split()[0]
                 ),
                 ' [b]DARSHAN[/b]:        [white]{}[/white]'.format(
-                    os.path.basename(args.darshan)
+                    os.path.basename(args.log_path)
                 ),
                 ' [b]EXECUTION TIME[/b]: [white]{} to {} ({:.2f} hours)[/white]'.format(
                     job_start,
@@ -1541,7 +1247,7 @@ def main():
                     ' '.join(hints)
                 )
             ]),
-            title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.3[/b]',
+            title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]',
             title_align='left',
             subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format(
                 insights_total[HIGH],
@@ -1638,14 +1344,14 @@ def main():
 
     if args.export_html:
         console.save_html(
-            '{}.html'.format(args.darshan),
+            '{}.html'.format(args.log_path),
             theme=export_theme,
             clear=False
         )
 
     if args.export_svg:
         console.save_svg(
-            '{}.svg'.format(args.darshan),
+            '{}.svg'.format(args.log_path),
             title='Drishti',
             theme=export_theme,
             clear=False
@@ -1697,7 +1403,7 @@ def main():
             detected_issues[report] = True
 
         filename = '{}-summary.csv'.format(
-            args.darshan.replace('.darshan', '')
+            args.log_path.replace('.darshan', '')
         )
 
         with open(filename, 'w') as f:
@@ -1705,6 +1411,3 @@ def main():
             w.writerow(detected_issues.keys())
             w.writerow(detected_issues.values())
 
-
-if __name__ == '__main__':
-    main()
diff --git a/drishti/handle_recorder.py b/drishti/handle_recorder.py
new file mode 100644
index 0000000..18136f3
--- /dev/null
+++ b/drishti/handle_recorder.py
@@ -0,0 +1,1185 @@
+#!/usr/bin/env python3
+
+import os
+import csv
+import time
+import json
+
+import datetime
+
+import pandas as pd
+
+from rich import print, box
+from rich.console import Group
+from rich.padding import Padding
+from rich.syntax import Syntax
+from rich.panel import Panel
+from rich.terminal_theme import TerminalTheme
+from rich.terminal_theme import MONOKAI
+
+from recorder_utils import RecorderReader
+from recorder_utils.build_offset_intervals import build_offset_intervals
+
+from .includes import *
+
+
+def get_modules(reader):
+    func_list = reader.funcs
+    ranks = reader.GM.total_ranks
+    modules = set()
+
+    for rank in range(ranks):
+        for i in range(reader.LMs[rank].total_records):
+            record = reader.records[rank][i]
+            func_name = func_list[record.func_id]
+            if 'MPI_File' in func_name:
+                modules.add('MPI-IO')
+            elif 'MPI' in func_name:
+                modules.add('MPI')
+            elif 'H5' in func_name:
+                modules.add('H5F')
+            else: modules.add('POSIX')
+
+    return modules
+
+
+def get_accessed_files(reader):
+    ranks = reader.GM.total_ranks
+    filemap = {}
+    for rank in range(ranks):
+        filemap.update(reader.LMs[rank].filemap)
+
+    return filemap
+
+
+def init_df_posix_recordes(reader):
+    func_list = reader.funcs
+    ranks = reader.GM.total_ranks
+    records = []
+    for rank in range(ranks):
+        for i in range(reader.LMs[rank].total_records):
+            record = reader.records[rank][i]
+            func_name = func_list[record.func_id]
+
+            if 'MPI' not in func_name and 'H5' not in func_name:
+                records.append( [rank, func_name, record.tstart, record.tend] )
+
+    head = ['rank', 'function', 'start', 'end']
+    df_posix_records = pd.DataFrame(records, columns=head)
+    return df_posix_records
+
+
+def handler(args):
+    init_console(args)
+    validate_thresholds()
+
+    insights_start_time = time.time()
+
+    reader = RecorderReader(args.log_path)
+    df_intervals = build_offset_intervals(reader)
+    df_posix_records = init_df_posix_recordes(reader)
+
+    modules = get_modules(reader)
+    unique_files = get_accessed_files(reader)
+
+    def add_api(row):
+        if 'MPI' in row['function']:
+            return 'MPIIO'
+        elif 'H5' in row['function']:
+            return 'H5F'
+        else:
+            return 'POSIX'
+
+    df_intervals['api'] = df_intervals.apply(add_api, axis=1)
+
+    def add_duration(row):
+        return row['end'] - row['start']
+    
+    df_intervals['duration'] = df_intervals.apply(add_duration, axis=1)
+    df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1)
+
+    #########################################################################################################################################################################
+
+    # Check usage of POSIX, and MPI-IO per file
+    total_size_stdio = 0
+    total_size_posix = 0
+    total_size_mpiio = 0
+    total_size = 0
+
+    total_files = len(unique_files)
+    total_files_stdio = 0
+    total_files_posix = 0
+    total_files_mpiio = 0
+
+    for fid in unique_files.keys():
+        df_intervals_in_one_file = df_intervals[(df_intervals['file_id'] == fid)]
+        df_stdio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'STDIO')]
+        df_posix_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'POSIX')]
+        df_mpiio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'MPIIO')]
+
+        if len(df_stdio_intervals_in_one_file):
+            total_files_stdio += 1
+            total_size_stdio += df_stdio_intervals_in_one_file['size'].sum()
+
+        if len(df_posix_intervals_in_one_file):
+            total_files_posix += 1
+            total_size_posix += df_posix_intervals_in_one_file['size'].sum()
+
+        if len(df_mpiio_intervals_in_one_file):
+            total_files_mpiio += 1
+            total_size_mpiio += df_mpiio_intervals_in_one_file['size'].sum()       
+
+
+    # Since POSIX will capture both POSIX-only accesses and those comming from MPI-IO, we can subtract those
+    if total_size_posix > 0 and total_size_posix >= total_size_mpiio:
+        total_size_posix -= total_size_mpiio
+
+    total_size = total_size_stdio + total_size_posix + total_size_mpiio
+
+    assert(total_size_stdio >= 0)
+    assert(total_size_posix >= 0)
+    assert(total_size_mpiio >= 0)
+
+    if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
+        issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
+            total_size_stdio / total_size * 100.0,
+            convert_bytes(total_size_stdio)
+        )
+
+        recommendation = [
+            {
+                'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
+            }
+        ]
+
+        insights_operation.append(
+            message(args, INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+        )
+
+    if 'MPI-IO' not in modules:
+        issue = 'Application is using low-performance interface'
+
+        recommendation = [
+            {
+                'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
+            }
+        ]
+
+        insights_operation.append(
+            message(args, INSIGHTS_MPI_IO_NO_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+        )
+
+    #########################################################################################################################################################################
+
+    if df_intervals['api'].eq('POSIX').any():
+        df_posix = df_intervals[(df_intervals['api'] == 'POSIX')]
+
+        #########################################################################################################################################################################
+
+        # Get number of write/read operations
+        total_reads = len(df_posix[(df_posix['function'].str.contains('read'))])
+        total_writes = len(df_posix[~(df_posix['function'].str.contains('read'))])
+
+        # Get total number of I/O operations
+        total_operations = total_writes + total_reads 
+
+        # To check whether the application is write-intersive or read-intensive we only look at the POSIX level and check if the difference between reads and writes is larger than 10% (for more or less), otherwise we assume a balance
+        if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
+            issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
+                total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
+            )
+
+            insights_metadata.append(
+                message(args, INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+            )
+
+        if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
+            issue = 'Application is read operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
+                total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
+            )
+
+            insights_metadata.append(
+                message(args, INSIGHTS_POSIX_READ_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+            )
+
+        total_read_size = df_posix[(df_posix['function'].str.contains('read'))]['size'].sum()
+        total_written_size = df_posix[~(df_posix['function'].str.contains('read'))]['size'].sum()
+
+        total_size = total_written_size + total_read_size
+
+        if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
+            issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
+                total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
+            )
+
+            insights_metadata.append(
+                message(args, INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+            )
+
+        if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
+            issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
+                total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
+            )
+
+            insights_metadata.append(
+                message(args, INSIGHTS_POSIX_READ_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+            )
+
+        #########################################################################################################################################################################
+
+        # Get the number of small I/O operations (less than 1 MB)
+
+        total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+        total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+
+        detected_files = [] # [fname, num of read, num of write]
+        for fid in unique_files.keys():
+            read_cnt = len(df_posix[(df_posix['file_id'] == fid) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+            write_cnt = len(df_posix[(df_posix['file_id'] == fid) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+            detected_files.append([unique_files[fid], read_cnt, write_cnt])
+
+        if total_reads_small and total_reads_small / total_reads > THRESHOLD_SMALL_REQUESTS and total_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+            issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
+                total_reads_small, total_reads_small / total_reads * 100.0
+            )
+        
+            detail = []
+            recommendation = []
+
+            for file in detected_files:
+                if file[1] > (total_reads * THRESHOLD_SMALL_REQUESTS / 2):
+                    detail.append(
+                        {
+                            'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
+                                file[1],
+                                file[1] / total_reads * 100.0,
+                                file[0] if args.full_path else os.path.basename(file[0])
+                            ) 
+                        }
+                    )
+
+            recommendation.append(
+                {
+                    'message': 'Consider buffering read operations into larger more contiguous ones'
+                }
+            )
+
+            if 'MPI-IO' in modules:
+                recommendation.append(
+                    {
+                        'message': 'Since the appplication already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_read_all() or MPI_File_read_at_all()) to aggregate requests into larger ones',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+                    }
+                )
+            else:
+                recommendation.append(
+                    {
+                        'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
+                    }
+                )
+
+            insights_operation.append(
+                message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+            )
+
+        if total_writes_small and total_writes_small / total_writes > THRESHOLD_SMALL_REQUESTS and total_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+            issue = 'Application issues a high number ({}) of small write requests (i.e., < 1MB) which represents {:.2f}% of all write requests'.format(
+                total_writes_small, total_writes_small / total_writes * 100.0
+            )
+
+            detail = []
+            recommendation = []
+
+            for file in detected_files:
+                if file[2] > (total_writes * THRESHOLD_SMALL_REQUESTS / 2):
+                    detail.append(
+                        {
+                            'message': '{} ({:.2f}%) small write requests are to "{}"'.format(
+                                file[2],
+                                file[2] / total_writes * 100.0,
+                                file[0] if args.full_path else os.path.basename(file[0])
+                            ) 
+                        }
+                    )
+
+            recommendation.append(
+                {
+                    'message': 'Consider buffering write operations into larger more contiguous ones'
+                }
+            )
+
+            if 'MPI-IO' in modules:
+                recommendation.append(
+                    {
+                        'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_write_all() or MPI_File_write_at_all()) to aggregate requests into larger ones',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+                    }
+                )
+            else:
+                recommendation.append(
+                    {
+                        'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
+                    }
+                )
+
+            insights_operation.append(
+                message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+            )
+
+        #########################################################################################################################################################################
+
+        # How many requests are misaligned?
+        # TODO: 
+
+        #########################################################################################################################################################################
+
+        # Redundant read-traffic (based on Phill)
+        # POSIX_MAX_BYTE_READ (Highest offset in the file that was read)
+        max_read_offset = df_posix[(df_posix['function'].str.contains('read'))]['offset'].max()
+
+        if max_read_offset > total_read_size:
+            issue = 'Application might have redundant read traffic (more data read than the highest offset)'
+
+            insights_metadata.append(
+                message(args, INSIGHTS_POSIX_REDUNDANT_READ_USAGE, TARGET_DEVELOPER, WARN, issue, None)
+            )
+
+        max_write_offset = df_posix[~(df_posix['function'].str.contains('read'))]['offset'].max()
+
+        if max_write_offset > total_written_size:
+            issue = 'Application might have redundant write traffic (more data written than the highest offset)'
+
+            insights_metadata.append(
+                message(args, INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, None)
+            )
+
+        #########################################################################################################################################################################
+
+        # Check for a lot of random operations
+
+        grp_posix_by_fid = df_posix.groupby('file_id')
+
+        read_consecutive = 0
+        read_sequential = 0
+        read_random = 0
+
+        for fid, df_filtered in grp_posix_by_fid:
+            df_filtered = df_filtered[(df_filtered['function'].str.contains('read'))].sort_values('start')
+
+            for i in range(len(df_filtered) - 1):
+                curr_interval = df_filtered.iloc[i]
+                next_interval = df_filtered.iloc[i + 1]
+                if curr_interval['offset'] + curr_interval['size'] == next_interval['offset']:
+                    read_consecutive += 1
+                elif curr_interval['offset'] + curr_interval['size'] < next_interval['offset']:
+                    read_sequential += 1
+                else:
+                    read_random += 1
+
+        if total_reads:
+            if read_random and read_random / total_reads > THRESHOLD_RANDOM_OPERATIONS and read_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
+                issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
+                    read_random, read_random / total_reads * 100.0
+                )
+
+                recommendation = [
+                    {
+                        'message': 'Consider changing your data model to have consecutive or sequential reads'
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+            else:
+                issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) read requests'.format(
+                    read_consecutive / total_reads * 100.0,
+                    read_sequential / total_reads * 100.0
+                )
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE, TARGET_DEVELOPER, OK, issue, None)
+                )
+
+        write_consecutive = 0
+        write_sequential = 0
+        write_random = 0
+
+        for fid, df_filtered in grp_posix_by_fid:
+            df_filtered = df_filtered[~(df_filtered['function'].str.contains('read'))].sort_values('start')
+
+            for i in range(len(df_filtered) - 1):
+                curr_interval = df_filtered.iloc[i]
+                next_interval = df_filtered.iloc[i + 1]
+                if curr_interval['offset'] + curr_interval['size'] == next_interval['offset']:
+                    write_consecutive += 1
+                elif curr_interval['offset'] + curr_interval['size'] < next_interval['offset']:
+                    write_sequential += 1
+                else:
+                    write_random += 1
+
+        if total_writes:
+            if write_random and write_random / total_writes > THRESHOLD_RANDOM_OPERATIONS and write_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
+                issue = 'Application is issuing a high number ({}) of random write operations ({:.2f}%)'.format(
+                    write_random, write_random / total_writes * 100.0
+                )
+
+                recommendation = [
+                    {
+                        'message': 'Consider changing your data model to have consecutive or sequential writes'
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+            else:
+                issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) write requests'.format(
+                    write_consecutive / total_writes * 100.0,
+                    write_sequential / total_writes * 100.0
+                )
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE, TARGET_DEVELOPER, OK, issue, None)
+                )
+
+        #########################################################################################################################################################################
+
+        # Shared file with small operations
+
+        # A file is shared if it's been read/written by more than 1 rank
+        detected_files = grp_posix_by_fid['rank'].nunique()
+        shared_files = set(detected_files[detected_files > 1].index)
+
+        total_shared_reads = 0
+        total_shared_reads_small = 0
+        total_shared_writes = 0
+        total_shared_writes_small = 0
+
+        detected_files = [] # [fname, num of read, num of write]
+        for fid in shared_files:
+            total_shared_reads += len(df_posix[(df_posix['file_id'] == fid) & (df_posix['function'].str.contains('read'))])
+            total_shared_writes += len(df_posix[(df_posix['file_id'] == fid) & ~(df_posix['function'].str.contains('read'))])
+
+            read_cnt = len(df_posix[(df_posix['file_id'] == fid) 
+                                    & (df_posix['function'].str.contains('read')) 
+                                    & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+            write_cnt = len(df_posix[(df_posix['file_id'] == fid) 
+                                    & ~(df_posix['function'].str.contains('read')) 
+                                    & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+            detected_files.append([unique_files[fid], read_cnt, write_cnt])
+
+            total_shared_reads_small += read_cnt
+            total_shared_writes_small += write_cnt
+
+        if total_shared_reads and total_shared_reads_small / total_shared_reads > THRESHOLD_SMALL_REQUESTS and total_shared_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+            issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
+                total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
+            )
+
+            detail = []
+
+            for file in detected_files:
+                if file[1] > (total_reads * THRESHOLD_SMALL_REQUESTS / 2):
+                    detail.append(
+                        {
+                            'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
+                                file[1],
+                                file[1] / total_reads * 100.0,
+                                file[0] if args.full_path else os.path.basename(file[0])
+                            ) 
+                        }
+                    )
+            
+            recommendation = [
+                {
+                    'message': 'Consider coalesceing read requests into larger more contiguous ones using MPI-IO collective operations',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+                }
+            ]
+        
+            insights_operation.append(
+                message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+            )
+
+        if total_shared_writes and total_shared_writes_small / total_shared_writes > THRESHOLD_SMALL_REQUESTS and total_shared_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+            issue = 'Application issues a high number ({}) of small write requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file write requests'.format(
+                total_shared_writes_small, total_shared_writes_small / total_shared_writes * 100.0
+            )
+
+            detail = []
+
+            for file in detected_files:
+                if file[2] > (total_writes * THRESHOLD_SMALL_REQUESTS / 2):
+                    detail.append(
+                        {
+                            'message': '{} ({:.2f}%) small write requests are to "{}"'.format(
+                                file[2],
+                                file[2] / total_writes * 100.0,
+                                file[0] if args.full_path else os.path.basename(file[0])
+                            ) 
+                        }
+                    )
+
+            recommendation = [
+                {
+                    'message': 'Consider coalescing write requests into larger more contiguous ones using MPI-IO collective operations',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+                }
+            ]
+
+            insights_operation.append(
+                message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+            )
+
+        #########################################################################################################################################################################
+
+        # TODO: Here I assume all operations other than write/read are metadata operations
+        df_posix_metadata = df_posix_records[~(df_posix_records['function'].str.contains('read|write|print', na=False))]
+        df_detected = df_posix_metadata.groupby('rank')['duration'].sum().reset_index()
+        has_long_metadata = df_detected[(df_detected['duration'] > THRESHOLD_METADATA_TIME_RANK)]
+
+        if not has_long_metadata.empty:
+            issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
+                len(has_long_metadata), THRESHOLD_METADATA_TIME_RANK
+            )
+
+            recommendation = [
+                {
+                    'message': 'Attempt to combine files, reduce, or cache metadata operations'
+                }
+            ]
+
+            if 'H5F' in modules:
+                recommendation.append(
+                    {
+                        'message': 'Since your appplication uses HDF5, try enabling collective metadata calls with H5Pset_coll_metadata_write() and H5Pset_all_coll_metadata_ops()',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-collective-metadata.c'), line_numbers=True, background_color='default')
+                    },
+                    {
+                        'message': 'Since your appplication uses HDF5, try using metadata cache to defer metadata operations',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-cache.c'), line_numbers=True, background_color='default')
+                    }
+                )
+
+            insights_metadata.append(
+                message(args, INSIGHTS_POSIX_HIGH_METADATA_TIME, TARGET_DEVELOPER, HIGH, issue, recommendation)
+            )
+
+        # We already have a single line for each shared-file access
+        # To check for stragglers, we can check the difference between the 
+
+        # POSIX_FASTEST_RANK_BYTES
+        # POSIX_SLOWEST_RANK_BYTES
+        # POSIX_VARIANCE_RANK_BYTES
+
+        stragglers_count = 0
+        
+        detected_files = []
+
+        for fid in shared_files:
+            df_posix_in_one_file = df_posix[(df_posix['file_id'] == fid)]
+            total_transfer_size = df_posix_in_one_file['size'].sum()
+
+            df_detected = df_posix_in_one_file.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index()
+            slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size']
+            fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size']
+
+            if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
+                stragglers_count += 1
+
+                detected_files.append([
+                    unique_files[fid], abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
+                ])
+
+        if stragglers_count:
+            issue = 'Detected data transfer imbalance caused by stragglers when accessing {} shared file.'.format(
+                stragglers_count
+            )
+
+            detail = []
+            
+            for file in detected_files:
+                detail.append(
+                    {
+                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
+                            file[1],
+                            file[0] if args.full_path else os.path.basename(file[0])
+                        ) 
+                    }
+                )
+
+            recommendation = [
+                {
+                    'message': 'Consider better balancing the data transfer between the application ranks'
+                },
+                {
+                    'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                }
+            ]
+
+            insights_operation.append(
+                message(args, INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
+            )
+    
+        # POSIX_F_FASTEST_RANK_TIME
+        # POSIX_F_SLOWEST_RANK_TIME
+        # POSIX_F_VARIANCE_RANK_TIME
+
+        stragglers_count = 0
+        
+        detected_files = []
+
+        for fid in shared_files:
+            df_posix_in_one_file = df_posix[(df_posix['file_id'] == fid)]
+            total_transfer_time = df_posix_in_one_file['duration'].sum()
+
+            df_detected = df_posix_in_one_file.groupby('rank')['duration'].sum().reset_index()
+
+            slowest_rank_time = df_detected['duration'].max()
+            fastest_rank_time = df_detected['duration'].min()
+
+            if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
+                stragglers_count += 1
+
+                detected_files.append([
+                    unique_files[fid], abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
+                ])
+
+        if stragglers_count:
+            issue = 'Detected time imbalance caused by stragglers when accessing {} shared file.'.format(
+                stragglers_count
+            )
+
+            detail = []
+            
+            for file in detected_files:
+                detail.append(
+                    {
+                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
+                            file[1],
+                            file[0] if args.full_path else os.path.basename(file[0])
+                        ) 
+                    }
+                )
+
+            recommendation = [
+                {
+                    'message': 'Consider better distributing the data in the parallel file system' # needs to review what suggestion to give
+                },
+                {
+                    'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                }
+            ]
+
+            insights_operation.append(
+                message(args, INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
+            )
+
+        # Get the individual files responsible for imbalance
+        imbalance_count = 0
+
+        detected_files = []
+
+        for fid in unique_files.keys():
+            if fid in shared_files: continue
+            df_detected = df_posix[(df_posix['file_id'] == fid) & ~(df_posix['function'].str.contains('read'))]
+            
+            max_bytes_written = df_detected['size'].max()
+            min_bytes_written = df_detected['size'].min()
+
+            if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
+                imbalance_count += 1
+
+                detected_files.append([
+                    unique_files[fid], abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
+                ])
+
+        if imbalance_count:
+            issue = 'Detected write imbalance when accessing {} individual files'.format(
+                imbalance_count
+            )
+
+            detail = []
+            
+            for file in detected_files:
+                detail.append(
+                    {
+                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
+                            file[1],
+                            file[0] if args.full_path else os.path.basename(file[0])
+                        ) 
+                    }
+                )
+
+            recommendation = [
+                {
+                    'message': 'Consider better balancing the data transfer between the application ranks'
+                },
+                {
+                    'message': 'Consider tuning the stripe size and count to better distribute the data',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                },
+                {
+                    'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
+                },
+                {
+                    'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
+                }
+            ]
+
+            insights_operation.append(
+                message(args, INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+            )
+
+        imbalance_count = 0
+
+        detected_files = []
+
+        for fid in shared_files:
+            df_detected = df_posix[(df_posix['file_id'] == fid) & (df_posix['function'].str.contains('read'))]
+            
+            max_bytes_read = df_detected['size'].max()
+            min_bytes_read = df_detected['size'].min()
+
+            if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
+                imbalance_count += 1
+
+                detected_files.append([
+                    unique_files[fid], abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
+                ])        
+
+        if imbalance_count:
+            issue = 'Detected read imbalance when accessing {} individual files.'.format(
+                imbalance_count
+            )
+
+            detail = []
+            
+            for file in detected_files:
+                detail.append(
+                    {
+                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
+                            file[1],
+                            file[0] if args.full_path else os.path.basename(file[0])
+                        ) 
+                    }
+                )
+
+            recommendation = [
+                {
+                    'message': 'Consider better balancing the data transfer between the application ranks'
+                },
+                {
+                    'message': 'Consider tuning the stripe size and count to better distribute the data',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                },
+                {
+                    'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
+                },
+                {
+                    'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
+                }
+            ]
+
+            insights_operation.append(
+                message(args, INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+            )
+
+    #########################################################################################################################################################################
+
+    if df_intervals['api'].eq('MPIIO').any():
+        df_mpiio = df_intervals[(df_intervals['api'] == 'MPIIO')]
+
+        df_mpiio_reads = df_mpiio[(df_mpiio['function'].str.contains('read'))]
+        mpiio_indp_reads = len(df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all'))])
+        mpiio_coll_reads = len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('_all'))])
+        total_mpiio_read_operations = mpiio_indp_reads + mpiio_coll_reads
+
+        df_mpiio_writes = df_mpiio[~(df_mpiio['function'].str.contains('read'))]
+        mpiio_indp_writes = len(df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all'))])
+        mpiio_coll_writes = len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('_all'))])
+        total_mpiio_write_operations = mpiio_indp_writes + mpiio_coll_writes
+
+        detected_files = [] # [fname, total_read, total_write]
+        for fid in unique_files.keys():
+            read_cnt = len(df_mpiio_reads[(df_mpiio_reads['file_id'] == fid) & (df_mpiio_reads['function'].str.contains('read'))])
+            write_cnt = len(df_mpiio_reads[(df_mpiio_reads['file_id'] == fid) & ~(df_mpiio_reads['function'].str.contains('read'))])
+            detected_files.append([unique_files[fid], read_cnt, write_cnt])
+
+        if mpiio_coll_reads == 0:
+            if total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+                issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
+                    mpiio_indp_reads,
+                    mpiio_indp_reads / (total_mpiio_read_operations) * 100
+                )
+
+                detail = []
+
+                for file in detected_files:
+                    total_cnt = file[1] + file[2]
+                    if total_cnt and file[1] / total_cnt > THRESHOLD_COLLECTIVE_OPERATIONS and total_cnt > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+                        detail.append(
+                            {
+                                'message': '{} ({}%) of independent reads to "{}"'.format(
+                                    file[1],
+                                    file[1] / total_cnt * 100,
+                                    file[0] if args.full_path else os.path.basename(file[0])
+                                ) 
+                            }
+                        )
+
+                recommendation = [
+                    {
+                        'message': 'Use collective read operations (e.g. MPI_File_read_all() or MPI_File_read_at_all()) and set one aggregator per compute node',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                )
+        else:
+            issue = 'Application uses MPI-IO and read data using {} ({:.2f}%) collective operations'.format(
+                mpiio_coll_reads,
+                mpiio_coll_reads / total_mpiio_read_operations * 100
+            )
+
+            insights_operation.append(
+                message(args, INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, OK, issue)
+            )
+
+        if mpiio_coll_writes == 0:
+            if total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+                issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
+                    mpiio_indp_writes,
+                    mpiio_indp_writes / (total_mpiio_write_operations) * 100
+                )
+
+                detail = []
+
+                for file in detected_files:
+                    total_cnt = file[1] + file[2]
+                    if total_cnt and file[2] / total_cnt > THRESHOLD_COLLECTIVE_OPERATIONS and total_cnt > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+                        detail.append(
+                            {
+                                'message': '{} ({}%) of independent writes to "{}"'.format(
+                                    file[2],
+                                    file[2] / total_cnt * 100,
+                                    file[0] if args.full_path else os.path.basename(file[0])
+                                ) 
+                            }
+                        )
+
+                recommendation = [
+                    {
+                        'message': 'Use collective write operations (e.g. MPI_File_write_all() or MPI_File_write_at_all()) and set one aggregator per compute node',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+                )
+
+        else:
+            issue = 'Application uses MPI-IO and write data using {} ({:.2f}%) collective operations'.format(
+                mpiio_coll_writes,
+                mpiio_coll_writes / total_mpiio_write_operations * 100
+            )
+
+            insights_operation.append(
+                message(args, INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, OK, issue)
+            )
+
+        #########################################################################################################################################################################
+
+        # Look for usage of non-block operations
+
+        # Look for HDF5 file extension
+
+        has_hdf5_extension = False
+
+        for fid in unique_files.keys():
+            fname = unique_files[fid]
+            if fname.endswith('.h5') or fname.endswith('.hdf5'):
+                has_hdf5_extension = True
+
+        if len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('iread|begin|end'))]) == 0:
+            issue = 'Application could benefit from non-blocking (asynchronous) reads'
+
+            recommendation = []
+
+            if 'H5F' in modules or has_hdf5_extension:
+                recommendation.append(
+                    {
+                        'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-read.c'), line_numbers=True, background_color='default')
+                    }
+                )
+
+            if 'MPI-IO' in modules:
+                recommendation.append(
+                    {
+                        'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations', # (e.g., MPI_File_iread(), MPI_File_read_all_begin/end(), or MPI_File_read_at_all_begin/end())',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iread.c'), line_numbers=True, background_color='default')
+                    }
+                )
+
+            insights_operation.append(
+                message(args, INSIGHTS_MPI_IO_BLOCKING_READ_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+            )
+
+        if len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('iwrite|begin|end'))]) == 0:
+            issue = 'Application could benefit from non-blocking (asynchronous) writes'
+
+            recommendation = []
+
+            if 'H5F' in modules or has_hdf5_extension:
+                recommendation.append(
+                    {
+                        'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-write.c'), line_numbers=True, background_color='default')
+                    }
+                )
+
+            if 'MPI-IO' in modules:
+                recommendation.append(
+                    {
+                        'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations',  # (e.g., MPI_File_iwrite(), MPI_File_write_all_begin/end(), or MPI_File_write_at_all_begin/end())',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iwrite.c'), line_numbers=True, background_color='default')
+                    }
+                )
+
+            insights_operation.append(
+                message(args, INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+            )
+
+    #########################################################################################################################################################################
+
+    # Nodes and MPI-IO aggregators
+    # If the application uses collective reads or collective writes, look for the number of aggregators
+    # TODO:
+
+    #########################################################################################################################################################################
+    
+    NUMBER_OF_COMPUTE_NODES = 0
+
+    #########################################################################################################################################################################
+
+    codes = []
+    if args.json:
+        f = open(args.json)
+        data = json.load(f)
+
+        for key, values in data.items():
+            for value in values:
+                code = value['code']
+                codes.append(code)
+
+                level = value['level']
+                issue = value['issue']
+                recommendation = []
+                for rec in value['recommendations']:
+                    new_message = {'message': rec}
+                    recommendation.append(new_message)
+
+                insights_dxt.append(
+                    message(args, code, TARGET_DEVELOPER, level, issue, recommendation)
+                )
+
+    #########################################################################################################################################################################
+
+
+    insights_end_time = time.time()
+
+    console.print()
+
+    console.print(
+        Panel(
+            '\n'.join([
+                ' [b]RECORDER[/b]:       [white]{}[/white]'.format(
+                    os.path.basename(args.log_path)
+                ),
+                ' [b]FILES[/b]:          [white]{} files ({} use STDIO, {} use POSIX, {} use MPI-IO)[/white]'.format(
+                    total_files,
+                    total_files_stdio,
+                    total_files_posix - total_files_mpiio,  # Since MPI-IO files will always use POSIX, we can decrement to get a unique count
+                    total_files_mpiio
+                ),
+                ' [b]COMPUTE NODES[/b]   [white]{}[/white]'.format(
+                    NUMBER_OF_COMPUTE_NODES
+                ),
+                ' [b]PROCESSES[/b]       [white]{}[/white]'.format(
+                    reader.GM.total_ranks
+                ),
+            ]),
+            title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]',
+            title_align='left',
+            subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format(
+                insights_total[HIGH],
+                insights_total[WARN],
+                insights_total[RECOMMENDATIONS],
+            ),
+            subtitle_align='left',
+            padding=1
+        )
+    )
+
+    console.print()
+
+    if insights_metadata:
+        console.print(
+            Panel(
+                Padding(
+                    Group(
+                        *insights_metadata
+                    ),
+                    (1, 1)
+                ),
+                title='METADATA',
+                title_align='left'
+            )
+        )
+
+    if insights_operation:
+        console.print(
+            Panel(
+                Padding(
+                    Group(
+                        *insights_operation
+                    ),
+                    (1, 1)
+                ),
+                title='OPERATIONS',
+                title_align='left'
+            )
+        )
+
+    if insights_dxt:
+        console.print(
+            Panel(
+                Padding(
+                    Group(
+                        *insights_dxt
+                    ),
+                    (1, 1)
+                ),
+                title='DXT',
+                title_align='left'
+            )
+        )
+        
+    console.print(
+        Panel(
+            ' {} | [white]LBNL[/white] | [white]Drishti report generated at {} in[/white] {:.3f} seconds'.format(
+                datetime.datetime.now().year,
+                datetime.datetime.now(),
+                insights_end_time - insights_start_time
+            ),
+            box=box.SIMPLE
+        )
+    )
+
+    if args.export_theme_light:
+        export_theme = TerminalTheme(
+            (255, 255, 255),
+            (0, 0, 0),
+            [
+                (26, 26, 26),
+                (244, 0, 95),
+                (152, 224, 36),
+                (253, 151, 31),
+                (157, 101, 255),
+                (244, 0, 95),
+                (88, 209, 235),
+                (120, 120, 120),
+                (98, 94, 76),
+            ],
+            [
+                (244, 0, 95),
+                (152, 224, 36),
+                (224, 213, 97),
+                (157, 101, 255),
+                (244, 0, 95),
+                (88, 209, 235),
+                (246, 246, 239),
+            ],
+        )
+    else:
+        export_theme = MONOKAI
+
+    if args.export_html:
+        console.save_html(
+            '{}.html'.format(args.log_path),
+            theme=export_theme,
+            clear=False
+        )
+
+    if args.export_svg:
+        console.save_svg(
+            '{}.svg'.format(args.log_path),
+            title='Drishti',
+            theme=export_theme,
+            clear=False
+        )
+
+    if args.export_csv:
+        issues = [
+            'JOB',
+            INSIGHTS_STDIO_HIGH_USAGE,
+            INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE,
+            INSIGHTS_POSIX_READ_COUNT_INTENSIVE,
+            INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE,
+            INSIGHTS_POSIX_READ_SIZE_INTENSIVE,
+            INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE,
+            INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE,
+            INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE,
+            INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE,
+            INSIGHTS_POSIX_REDUNDANT_READ_USAGE,
+            INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE,
+            INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE,
+            INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE,
+            INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE,
+            INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE,
+            INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE,
+            INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE,
+            INSIGHTS_POSIX_HIGH_METADATA_TIME,
+            INSIGHTS_POSIX_SIZE_IMBALANCE,
+            INSIGHTS_POSIX_TIME_IMBALANCE,
+            INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE,
+            INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE,
+            INSIGHTS_MPI_IO_NO_USAGE,
+            INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE,
+            INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE,
+            INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE,
+            INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE,
+            INSIGHTS_MPI_IO_BLOCKING_READ_USAGE,
+            INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE,
+            INSIGHTS_MPI_IO_AGGREGATORS_INTRA,
+            INSIGHTS_MPI_IO_AGGREGATORS_INTER,
+            INSIGHTS_MPI_IO_AGGREGATORS_OK
+        ]
+        if codes:
+            issues.extend(codes)
+
+        detected_issues = dict.fromkeys(issues, False)
+        detected_issues['JOB'] = None
+
+        for report in csv_report:
+            detected_issues[report] = True
+
+        filename = '{}-summary.csv'.format(
+            args.log_path
+        )
+
+        with open(filename, 'w') as f:
+            w = csv.writer(f)
+            w.writerow(detected_issues.keys())
+            w.writerow(detected_issues.values())
+
+
diff --git a/drishti/includes.py b/drishti/includes.py
new file mode 100644
index 0000000..e801cdf
--- /dev/null
+++ b/drishti/includes.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+
+import os
+
+from rich.console import Console, Group
+from rich.padding import Padding
+from rich.panel import Panel
+
+
+RECOMMENDATIONS = 0
+HIGH = 1
+WARN = 2
+INFO = 3
+OK = 4
+
+ROOT = os.path.abspath(os.path.dirname(__file__))
+
+TARGET_USER = 1
+TARGET_DEVELOPER = 2
+TARGET_SYSTEM = 3
+
+insights_operation = []
+insights_metadata = []
+insights_dxt = []
+
+insights_total = dict()
+
+insights_total[HIGH] = 0
+insights_total[WARN] = 0
+insights_total[RECOMMENDATIONS] = 0
+
+THRESHOLD_OPERATION_IMBALANCE = 0.1
+THRESHOLD_SMALL_REQUESTS = 0.1
+THRESHOLD_SMALL_REQUESTS_ABSOLUTE = 1000
+THRESHOLD_MISALIGNED_REQUESTS = 0.1
+THRESHOLD_METADATA = 0.1
+THRESHOLD_METADATA_TIME_RANK = 30  # seconds
+THRESHOLD_RANDOM_OPERATIONS = 0.2
+THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE = 1000
+THRESHOLD_STRAGGLERS = 0.15
+THRESHOLD_IMBALANCE = 0.30
+THRESHOLD_INTERFACE_STDIO = 0.1
+THRESHOLD_COLLECTIVE_OPERATIONS = 0.5
+THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE = 1000
+THRESHOLD_SMALL_BYTES = 1048576 # 1 MB
+
+INSIGHTS_STDIO_HIGH_USAGE = 'S01'
+INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE = 'P01'
+INSIGHTS_POSIX_READ_COUNT_INTENSIVE = 'P02'
+INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE = 'P03'
+INSIGHTS_POSIX_READ_SIZE_INTENSIVE = 'P04'
+INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE = 'P05'
+INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE = 'P06'
+INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE = 'P07'
+INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE = 'P08'
+INSIGHTS_POSIX_REDUNDANT_READ_USAGE = 'P09'
+INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE = 'P10'
+INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE = 'P11'
+INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE = 'P12'
+INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE = 'P13'
+INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE = 'P14'
+INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE = 'P15'
+INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE = 'P16'
+INSIGHTS_POSIX_HIGH_METADATA_TIME = 'P17'
+INSIGHTS_POSIX_SIZE_IMBALANCE = 'P18'
+INSIGHTS_POSIX_TIME_IMBALANCE = 'P19'
+INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE = 'P21'
+INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE = 'P22'
+INSIGHTS_MPI_IO_NO_USAGE = 'M01'
+INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE = 'M02'
+INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE = 'M03'
+INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE = 'M04'
+INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE = 'M05'
+INSIGHTS_MPI_IO_BLOCKING_READ_USAGE = 'M06'
+INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE = 'M07'
+INSIGHTS_MPI_IO_AGGREGATORS_INTRA = 'M08'
+INSIGHTS_MPI_IO_AGGREGATORS_INTER = 'M09'
+INSIGHTS_MPI_IO_AGGREGATORS_OK = 'M10'
+
+# TODO: need to verify the threashold to be between 0 and 1
+# TODO: read thresholds from file
+
+
+console = Console(record=True)
+csv_report = []
+
+
+def init_console(args):
+    if args.export_size: console.width = int(args.export_size)
+
+
+def validate_thresholds():
+    """
+    Validate thresholds defined by the user.
+    """
+    assert(THRESHOLD_OPERATION_IMBALANCE >= 0.0 and THRESHOLD_OPERATION_IMBALANCE <= 1.0)
+    assert(THRESHOLD_SMALL_REQUESTS >= 0.0 and THRESHOLD_SMALL_REQUESTS <= 1.0)
+    assert(THRESHOLD_MISALIGNED_REQUESTS >= 0.0 and THRESHOLD_MISALIGNED_REQUESTS <= 1.0)
+    assert(THRESHOLD_METADATA >= 0.0 and THRESHOLD_METADATA <= 1.0)
+    assert(THRESHOLD_RANDOM_OPERATIONS >= 0.0 and THRESHOLD_RANDOM_OPERATIONS <= 1.0)
+
+    assert(THRESHOLD_METADATA_TIME_RANK >= 0.0)
+
+
+def convert_bytes(bytes_number):
+    """
+    Convert bytes into formatted string.
+    """
+    tags = [
+        'bytes',
+        'KB',
+        'MB',
+        'GB',
+        'TB',
+        'PB',
+        'EB'
+    ]
+
+    i = 0
+    double_bytes = bytes_number
+
+    while (i < len(tags) and  bytes_number >= 1024):
+        double_bytes = bytes_number / 1024.0
+        i = i + 1
+        bytes_number = bytes_number / 1024
+
+    return str(round(double_bytes, 2)) + ' ' + tags[i] 
+
+
+def message(args, code, target, level, issue, recommendations=None, details=None):
+    """
+    Display the message on the screen with level, issue, and recommendation.
+    """
+    icon = ':arrow_forward:'
+
+    if level in (HIGH, WARN):
+        insights_total[level] += 1
+
+    if level == HIGH:
+        color = '[red]'
+    elif level == WARN:
+        color = '[orange1]'
+    elif level == OK:
+        color = '[green]'
+    else:
+        color = ''
+
+    messages = [
+        '{}{}{} {}'.format(
+            color,
+            icon,
+            ' [' + code + ']' if args.code else '',
+            issue
+        )
+    ]
+
+    if args.export_csv:
+        csv_report.append(code)
+
+    if details:
+        for detail in details:
+            messages.append('  {}:left_arrow_curving_right: {}'.format(
+                    color,
+                    detail['message']
+                )
+            )
+
+    if recommendations:
+        if not args.only_issues:
+            messages.append('  [white]:left_arrow_curving_right: [b]Recommendations:[/b]')
+
+            for recommendation in recommendations:
+                messages.append('    :left_arrow_curving_right: {}'.format(recommendation['message']))
+
+                if args.verbose and 'sample' in recommendation:
+                    messages.append(
+                        Padding(
+                            Panel(
+                                recommendation['sample'],
+                                title='Solution Example Snippet',
+                                title_align='left',
+                                padding=(1, 2)
+                            ),
+                            (1, 0, 1, 7)
+                        )
+                    )
+
+        insights_total[RECOMMENDATIONS] += len(recommendations)
+
+    return Group(
+        *messages
+    )
diff --git a/drishti/reporter.py b/drishti/reporter.py
new file mode 100644
index 0000000..f1ab847
--- /dev/null
+++ b/drishti/reporter.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import argparse
+
+from subprocess import call
+
+
+LOG_TYPE_DARSHAN = 0
+LOG_TYPE_RECORDER = 1
+
+parser = argparse.ArgumentParser(
+    description='Drishti: '
+)
+
+parser.add_argument(
+    'log_path',
+    help='Input .darshan file or recorder folder'
+)
+
+parser.add_argument(
+    '--issues',
+    default=False,
+    action='store_true',
+    dest='only_issues',
+    help='Only displays the detected issues and hides the recommendations'
+)
+
+parser.add_argument(
+    '--html',
+    default=False,
+    action='store_true',
+    dest='export_html',
+    help='Export the report as an HTML page'
+)
+
+parser.add_argument(
+    '--svg',
+    default=False,
+    action='store_true',
+    dest='export_svg',
+    help='Export the report as an SVG image'
+)
+
+parser.add_argument(
+    '--light',
+    default=False,
+    action='store_true',
+    dest='export_theme_light',
+    help='Use a light theme for the report when generating files'
+)
+
+parser.add_argument(
+    '--size',
+    default=False,
+    dest='export_size',
+    help='Console width used for the report and generated files'
+)
+
+parser.add_argument(
+    '--verbose',
+    default=False,
+    action='store_true',
+    dest='verbose',
+    help='Display extended details for the recommendations'
+)
+
+parser.add_argument(
+    '--code',
+    default=False,
+    action='store_true',
+    dest='code',
+    help='Display insights identification code'
+)
+
+parser.add_argument(
+    '--path',
+    default=False,
+    action='store_true',
+    dest='full_path',
+    help='Display the full file path for the files that triggered the issue'
+)
+
+parser.add_argument(
+    '--csv',
+    default=False,
+    action='store_true',
+    dest='export_csv',
+    help='Export a CSV with the code of all issues that were triggered'
+)
+
+parser.add_argument(
+    '--json', 
+    default=False, 
+    dest='json',
+    help=argparse.SUPPRESS)
+
+args = parser.parse_args()
+
+
+def clear():
+    """
+    Clear the screen with the comment call based on the operating system.
+    """
+    _ = call('clear' if os.name == 'posix' else 'cls')
+
+
+def check_log_type(path):
+    if path.endswith('.darshan'):
+        if not os.path.isfile(path):
+            print('Unable to open .darshan file.')
+            sys.exit(os.EX_NOINPUT)
+        else: return LOG_TYPE_DARSHAN
+    else: # check whether is a valid recorder log
+        if not os.path.isdir(path):
+            print('Unable to open recorder folder.')
+            sys.exit(os.EX_NOINPUT)
+        else: return LOG_TYPE_RECORDER
+
+
+def main():
+    log_type = check_log_type(args.log_path)
+    
+    if log_type == LOG_TYPE_DARSHAN:
+        from . import handle_darshan
+        handle_darshan.handler(args)
+
+    elif log_type == LOG_TYPE_RECORDER:
+        from . import handle_recorder
+        handle_recorder.handler(args)
+
diff --git a/requirements.txt b/requirements.txt
index 1f1dc56..467f761 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ argparse
 darshan
 pandas
 rich==12.5.1
+recorder-utils
diff --git a/setup.py b/setup.py
index e680e7b..dd18cb6 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
 setuptools.setup(
     name="drishti-io",
     keywords="drishti",
-    version="0.4",
+    version="0.5",
     author="Jean Luca Bez, Suren Byna",
     author_email="jlbez@lbl.gov, sbyna@lbl.gov",
     description="",
@@ -21,6 +21,7 @@
         'pandas',
         'darshan',
         'rich ==12.5.1',
+        'recorder-utils',
     ],
     packages=[
         'drishti'
@@ -33,7 +34,7 @@
     include_package_data=True,
     entry_points={
         "console_scripts": [
-            "drishti=drishti.main:main"
+            "drishti=drishti.reporter:main"
         ]
     },
     classifiers=[

From ae1dde1e3cf412e0a71a30dbcbf0c7f672820c53 Mon Sep 17 00:00:00 2001
From: Onewbiek <yankun0213@gmail.com>
Date: Fri, 3 Nov 2023 00:39:38 -0400
Subject: [PATCH 03/19] Limit details to 10 lines

---
 drishti/includes.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drishti/includes.py b/drishti/includes.py
index e801cdf..3b921aa 100644
--- a/drishti/includes.py
+++ b/drishti/includes.py
@@ -77,6 +77,8 @@
 INSIGHTS_MPI_IO_AGGREGATORS_INTER = 'M09'
 INSIGHTS_MPI_IO_AGGREGATORS_OK = 'M10'
 
+DETAILS_MAX_SIZE = 10
+
 # TODO: need to verify the threashold to be between 0 and 1
 # TODO: read thresholds from file
 
@@ -88,6 +90,15 @@
 def init_console(args):
     if args.export_size: console.width = int(args.export_size)
 
+    insights_operation.clear()
+    insights_metadata.clear()
+    insights_dxt.clear()
+
+    insights_total[HIGH] = 0
+    insights_total[WARN] = 0
+    insights_total[RECOMMENDATIONS] = 0
+
+    csv_report.clear()
 
 def validate_thresholds():
     """
@@ -158,7 +169,7 @@ def message(args, code, target, level, issue, recommendations=None, details=None
         csv_report.append(code)
 
     if details:
-        for detail in details:
+        for detail in details[:DETAILS_MAX_SIZE]:
             messages.append('  {}:left_arrow_curving_right: {}'.format(
                     color,
                     detail['message']

From a221578c35216baf154b219d458541538b4b5b64 Mon Sep 17 00:00:00 2001
From: Onewbiek <yankun0213@gmail.com>
Date: Fri, 3 Nov 2023 00:41:39 -0400
Subject: [PATCH 04/19] Add new function to split report for each file been
 tracked

---
 drishti/handle_recorder.py       |  33 +-
 drishti/handle_recorder_split.py | 982 +++++++++++++++++++++++++++++++
 drishti/reporter.py              |  19 +-
 3 files changed, 1005 insertions(+), 29 deletions(-)
 create mode 100644 drishti/handle_recorder_split.py

diff --git a/drishti/handle_recorder.py b/drishti/handle_recorder.py
index 18136f3..59462af 100644
--- a/drishti/handle_recorder.py
+++ b/drishti/handle_recorder.py
@@ -23,26 +23,6 @@
 from .includes import *
 
 
-def get_modules(reader):
-    func_list = reader.funcs
-    ranks = reader.GM.total_ranks
-    modules = set()
-
-    for rank in range(ranks):
-        for i in range(reader.LMs[rank].total_records):
-            record = reader.records[rank][i]
-            func_name = func_list[record.func_id]
-            if 'MPI_File' in func_name:
-                modules.add('MPI-IO')
-            elif 'MPI' in func_name:
-                modules.add('MPI')
-            elif 'H5' in func_name:
-                modules.add('H5F')
-            else: modules.add('POSIX')
-
-    return modules
-
-
 def get_accessed_files(reader):
     ranks = reader.GM.total_ranks
     filemap = {}
@@ -79,7 +59,6 @@ def handler(args):
     df_intervals = build_offset_intervals(reader)
     df_posix_records = init_df_posix_recordes(reader)
 
-    modules = get_modules(reader)
     unique_files = get_accessed_files(reader)
 
     def add_api(row):
@@ -98,6 +77,8 @@ def add_duration(row):
     df_intervals['duration'] = df_intervals.apply(add_duration, axis=1)
     df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1)
 
+    modules = set(df_intervals['api'].unique())
+
     #########################################################################################################################################################################
 
     # Check usage of POSIX, and MPI-IO per file
@@ -156,7 +137,7 @@ def add_duration(row):
             message(args, INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
         )
 
-    if 'MPI-IO' not in modules:
+    if 'MPIIO' not in modules:
         issue = 'Application is using low-performance interface'
 
         recommendation = [
@@ -264,7 +245,7 @@ def add_duration(row):
                 }
             )
 
-            if 'MPI-IO' in modules:
+            if 'MPIIO' in modules:
                 recommendation.append(
                     {
                         'message': 'Since the appplication already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_read_all() or MPI_File_read_at_all()) to aggregate requests into larger ones',
@@ -308,7 +289,7 @@ def add_duration(row):
                 }
             )
 
-            if 'MPI-IO' in modules:
+            if 'MPIIO' in modules:
                 recommendation.append(
                     {
                         'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_write_all() or MPI_File_write_at_all()) to aggregate requests into larger ones',
@@ -922,7 +903,7 @@ def add_duration(row):
                     }
                 )
 
-            if 'MPI-IO' in modules:
+            if 'MPIIO' in modules:
                 recommendation.append(
                     {
                         'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations', # (e.g., MPI_File_iread(), MPI_File_read_all_begin/end(), or MPI_File_read_at_all_begin/end())',
@@ -947,7 +928,7 @@ def add_duration(row):
                     }
                 )
 
-            if 'MPI-IO' in modules:
+            if 'MPIIO' in modules:
                 recommendation.append(
                     {
                         'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations',  # (e.g., MPI_File_iwrite(), MPI_File_write_all_begin/end(), or MPI_File_write_at_all_begin/end())',
diff --git a/drishti/handle_recorder_split.py b/drishti/handle_recorder_split.py
new file mode 100644
index 0000000..74bc899
--- /dev/null
+++ b/drishti/handle_recorder_split.py
@@ -0,0 +1,982 @@
+#!/usr/bin/env python3
+
+import os
+import csv
+import time
+import json
+
+import datetime
+
+import pandas as pd
+
+from rich import print, box
+from rich.console import Group
+from rich.padding import Padding
+from rich.syntax import Syntax
+from rich.panel import Panel
+from rich.terminal_theme import TerminalTheme
+from rich.terminal_theme import MONOKAI
+
+from recorder_utils import RecorderReader
+from recorder_utils.build_offset_intervals import build_offset_intervals
+
+from .includes import *
+
+
+def get_accessed_files(reader):
+    ranks = reader.GM.total_ranks
+    filemap = {}
+    for rank in range(ranks):
+        filemap.update(reader.LMs[rank].filemap)
+
+    return filemap
+
+
+def init_df_posix_recordes(reader):
+    func_list = reader.funcs
+    ranks = reader.GM.total_ranks
+    records = []
+    for rank in range(ranks):
+        for i in range(reader.LMs[rank].total_records):
+            record = reader.records[rank][i]
+            func_name = func_list[record.func_id]
+
+            if 'MPI' not in func_name and 'H5' not in func_name:
+                filename = None
+                if "open" in func_name or "close" in func_name or "creat" in func_name \
+                                or "seek" in func_name or "sync" in func_name:
+                                fstr = record.args[0]
+                                filename = fstr if type(fstr)==str else fstr.decode('utf-8')
+                                filename = filename.replace('./', '')
+
+                records.append( [filename, rank, func_name, record.tstart, record.tend] )
+
+    head = ['fname', 'rank', 'function', 'start', 'end']
+    df_posix_records = pd.DataFrame(records, columns=head)
+    return df_posix_records
+
+
+def handler(args):
+    reader = RecorderReader(args.log_path)
+    df_intervals = build_offset_intervals(reader)
+    df_posix_records = init_df_posix_recordes(reader)
+
+    unique_files = get_accessed_files(reader)
+
+    def add_api(row):
+        if 'MPI' in row['function']:
+            return 'MPIIO'
+        elif 'H5' in row['function']:
+            return 'H5F'
+        else:
+            return 'POSIX'
+
+    df_intervals['api'] = df_intervals.apply(add_api, axis=1)
+
+    def add_duration(row):
+        return row['end'] - row['start']
+    
+    df_intervals['duration'] = df_intervals.apply(add_duration, axis=1)
+    df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1)
+
+    #########################################################################################################################################################################
+    for fid, fname in unique_files.items():
+        console = Console(record=True)
+        init_console(args)
+        validate_thresholds()
+        insights_start_time = time.time()
+
+        df_intervals_temp = df_intervals[(df_intervals['file_id'] == fid)]
+        if not len(df_intervals_temp): continue
+
+        df_posix_records = df_posix_records[(df_posix_records['fname'] == fname)]
+        modules = set(df_intervals_temp['api'].unique())
+
+        # Check usage of POSIX, and MPI-IO per file
+        total_size_stdio = 0
+        total_size_posix = 0
+        total_size_mpiio = 0
+        total_size = 0
+
+        df_stdio_intervals = df_intervals_temp[(df_intervals_temp['api'] == 'STDIO')]
+        df_posix_intervals = df_intervals_temp[(df_intervals_temp['api'] == 'POSIX')]
+        df_mpiio_intervals = df_intervals_temp[(df_intervals_temp['api'] == 'MPIIO')]
+
+        if len(df_stdio_intervals):
+            total_size_stdio += df_stdio_intervals['size'].sum()
+
+        if len(df_posix_intervals):
+            total_size_posix += df_posix_intervals['size'].sum()
+
+        if len(df_mpiio_intervals):
+            total_size_mpiio += df_mpiio_intervals['size'].sum()   
+
+
+        # Since POSIX will capture both POSIX-only accesses and those comming from MPI-IO, we can subtract those
+        if total_size_posix > 0 and total_size_posix >= total_size_mpiio:
+            total_size_posix -= total_size_mpiio
+
+        total_size = total_size_stdio + total_size_posix + total_size_mpiio
+
+        assert(total_size_stdio >= 0)
+        assert(total_size_posix >= 0)
+        assert(total_size_mpiio >= 0)
+
+        if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
+            issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
+                total_size_stdio / total_size * 100.0,
+                convert_bytes(total_size_stdio)
+            )
+
+            recommendation = [
+                {
+                    'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
+                }
+            ]
+
+            insights_operation.append(
+                message(args, INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+            )
+
+        if 'MPIIO' not in modules:
+            issue = 'Application is using low-performance interface'
+
+            recommendation = [
+                {
+                    'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
+                }
+            ]
+
+            insights_operation.append(
+                message(args, INSIGHTS_MPI_IO_NO_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+            )
+
+        #########################################################################################################################################################################
+
+        if df_intervals_temp['api'].eq('POSIX').any():
+            df_posix = df_intervals_temp[(df_intervals_temp['api'] == 'POSIX')]
+
+            #########################################################################################################################################################################
+
+            # Get number of write/read operations
+            total_reads = len(df_posix[(df_posix['function'].str.contains('read'))])
+            total_writes = len(df_posix[~(df_posix['function'].str.contains('read'))])
+
+            # Get total number of I/O operations
+            total_operations = total_writes + total_reads 
+
+            # To check whether the application is write-intersive or read-intensive we only look at the POSIX level and check if the difference between reads and writes is larger than 10% (for more or less), otherwise we assume a balance
+            if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
+                issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
+                    total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
+                )
+
+                insights_metadata.append(
+                    message(args, INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+                )
+
+            if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
+                issue = 'Application is read operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
+                    total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
+                )
+
+                insights_metadata.append(
+                    message(args, INSIGHTS_POSIX_READ_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+                )
+
+            total_read_size = df_posix[(df_posix['function'].str.contains('read'))]['size'].sum()
+            total_written_size = df_posix[~(df_posix['function'].str.contains('read'))]['size'].sum()
+
+            total_size = total_written_size + total_read_size
+
+            if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
+                issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
+                    total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
+                )
+
+                insights_metadata.append(
+                    message(args, INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+                )
+
+            if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
+                issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
+                    total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
+                )
+
+                insights_metadata.append(
+                    message(args, INSIGHTS_POSIX_READ_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+                )
+
+            #########################################################################################################################################################################
+
+            # Get the number of small I/O operations (less than 1 MB)
+
+            total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+            total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+
+            if total_reads_small and total_reads_small / total_reads > THRESHOLD_SMALL_REQUESTS and total_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+                issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
+                    total_reads_small, total_reads_small / total_reads * 100.0
+                )
+            
+                recommendation = []
+
+                recommendation.append(
+                    {
+                        'message': 'Consider buffering read operations into larger more contiguous ones'
+                    }
+                )
+
+                if 'MPIIO' in modules:
+                    recommendation.append(
+                        {
+                            'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_read_all() or MPI_File_read_at_all()) to aggregate requests into larger ones',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+                        }
+                    )
+                else:
+                    recommendation.append(
+                        {
+                            'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
+                        }
+                    )
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+
+            if total_writes_small and total_writes_small / total_writes > THRESHOLD_SMALL_REQUESTS and total_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+                issue = 'Application issues a high number ({}) of small write requests (i.e., < 1MB) which represents {:.2f}% of all write requests'.format(
+                    total_writes_small, total_writes_small / total_writes * 100.0
+                )
+
+                recommendation = []
+
+                recommendation.append(
+                    {
+                        'message': 'Consider buffering write operations into larger more contiguous ones'
+                    }
+                )
+
+                if 'MPIIO' in modules:
+                    recommendation.append(
+                        {
+                            'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_write_all() or MPI_File_write_at_all()) to aggregate requests into larger ones',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+                        }
+                    )
+                else:
+                    recommendation.append(
+                        {
+                            'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
+                        }
+                    )
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+
+            #########################################################################################################################################################################
+
+            # How many requests are misaligned?
+            # TODO: 
+
+            #########################################################################################################################################################################
+
+            # Redundant read-traffic (based on Phill)
+            # POSIX_MAX_BYTE_READ (Highest offset in the file that was read)
+            max_read_offset = df_posix[(df_posix['function'].str.contains('read'))]['offset'].max()
+
+            if max_read_offset > total_read_size:
+                issue = 'Application might have redundant read traffic (more data read than the highest offset)'
+
+                insights_metadata.append(
+                    message(args, INSIGHTS_POSIX_REDUNDANT_READ_USAGE, TARGET_DEVELOPER, WARN, issue, None)
+                )
+
+            max_write_offset = df_posix[~(df_posix['function'].str.contains('read'))]['offset'].max()
+
+            if max_write_offset > total_written_size:
+                issue = 'Application might have redundant write traffic (more data written than the highest offset)'
+
+                insights_metadata.append(
+                    message(args, INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, None)
+                )
+
+            #########################################################################################################################################################################
+
+            # Check for a lot of random operations
+
+            read_consecutive = 0
+            read_sequential = 0
+            read_random = 0
+
+            df_filtered = df_posix[(df_posix['function'].str.contains('read'))].sort_values('start')
+
+            for i in range(len(df_filtered) - 1):
+                curr_interval = df_filtered.iloc[i]
+                next_interval = df_filtered.iloc[i + 1]
+                if curr_interval['offset'] + curr_interval['size'] == next_interval['offset']:
+                    read_consecutive += 1
+                elif curr_interval['offset'] + curr_interval['size'] < next_interval['offset']:
+                    read_sequential += 1
+                else:
+                    read_random += 1
+
+            if total_reads:
+                if read_random and read_random / total_reads > THRESHOLD_RANDOM_OPERATIONS and read_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
+                    issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
+                        read_random, read_random / total_reads * 100.0
+                    )
+
+                    recommendation = [
+                        {
+                            'message': 'Consider changing your data model to have consecutive or sequential reads'
+                        }
+                    ]
+
+                    insights_operation.append(
+                        message(args, INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                    )
+                else:
+                    issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) read requests'.format(
+                        read_consecutive / total_reads * 100.0,
+                        read_sequential / total_reads * 100.0
+                    )
+
+                    insights_operation.append(
+                        message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE, TARGET_DEVELOPER, OK, issue, None)
+                    )
+
+            write_consecutive = 0
+            write_sequential = 0
+            write_random = 0
+
+
+            df_filtered = df_posix[~(df_posix['function'].str.contains('read'))].sort_values('start')
+
+            for i in range(len(df_filtered) - 1):
+                curr_interval = df_filtered.iloc[i]
+                next_interval = df_filtered.iloc[i + 1]
+                if curr_interval['offset'] + curr_interval['size'] == next_interval['offset']:
+                    write_consecutive += 1
+                elif curr_interval['offset'] + curr_interval['size'] < next_interval['offset']:
+                    write_sequential += 1
+                else:
+                    write_random += 1
+
+            if total_writes:
+                if write_random and write_random / total_writes > THRESHOLD_RANDOM_OPERATIONS and write_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
+                    issue = 'Application is issuing a high number ({}) of random write operations ({:.2f}%)'.format(
+                        write_random, write_random / total_writes * 100.0
+                    )
+
+                    recommendation = [
+                        {
+                            'message': 'Consider changing your data model to have consecutive or sequential writes'
+                        }
+                    ]
+
+                    insights_operation.append(
+                        message(args, INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                    )
+                else:
+                    issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) write requests'.format(
+                        write_consecutive / total_writes * 100.0,
+                        write_sequential / total_writes * 100.0
+                    )
+
+                    insights_operation.append(
+                        message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE, TARGET_DEVELOPER, OK, issue, None)
+                    )
+
+            #########################################################################################################################################################################
+
+            # Shared file with small operations
+
+            # A file is shared if it's been read/written by more than 1 rank
+            detected_files = df_posix['rank'].nunique()
+
+            total_shared_reads = 0
+            total_shared_reads_small = 0
+            total_shared_writes = 0
+            total_shared_writes_small = 0
+
+            if df_posix['rank'].nunique() > 1:
+                total_shared_reads += len(df_posix[(df_posix['function'].str.contains('read'))])
+                total_shared_writes += len(df_posix[~(df_posix['function'].str.contains('read'))])
+
+                total_shared_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) 
+                                        & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                total_shared_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) 
+                                        & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+
+            if total_shared_reads and total_shared_reads_small / total_shared_reads > THRESHOLD_SMALL_REQUESTS and total_shared_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+                issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
+                    total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
+                )
+                
+                recommendation = [
+                    {
+                        'message': 'Consider coalesceing read requests into larger more contiguous ones using MPI-IO collective operations',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+                    }
+                ]
+            
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+
+            if total_shared_writes and total_shared_writes_small / total_shared_writes > THRESHOLD_SMALL_REQUESTS and total_shared_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+                issue = 'Application issues a high number ({}) of small write requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file write requests'.format(
+                    total_shared_writes_small, total_shared_writes_small / total_shared_writes * 100.0
+                )
+
+                recommendation = [
+                    {
+                        'message': 'Consider coalescing write requests into larger more contiguous ones using MPI-IO collective operations',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+
+            #########################################################################################################################################################################
+
+            # TODO: Here I assume all operations other than write/read are metadata operations
+            df_posix_metadata = df_posix_records[~(df_posix_records['function'].str.contains('read|write|print', na=False))]
+            df_detected = df_posix_metadata.groupby('rank')['duration'].sum().reset_index()
+            has_long_metadata = df_detected[(df_detected['duration'] > THRESHOLD_METADATA_TIME_RANK)]
+
+            if not has_long_metadata.empty:
+                issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
+                    len(has_long_metadata), THRESHOLD_METADATA_TIME_RANK
+                )
+
+                recommendation = [
+                    {
+                        'message': 'Attempt to combine files, reduce, or cache metadata operations'
+                    }
+                ]
+
+                if 'H5F' in modules:
+                    recommendation.append(
+                        {
+                            'message': 'Since your appplication uses HDF5, try enabling collective metadata calls with H5Pset_coll_metadata_write() and H5Pset_all_coll_metadata_ops()',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-collective-metadata.c'), line_numbers=True, background_color='default')
+                        },
+                        {
+                            'message': 'Since your appplication uses HDF5, try using metadata cache to defer metadata operations',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-cache.c'), line_numbers=True, background_color='default')
+                        }
+                    )
+
+                insights_metadata.append(
+                    message(args, INSIGHTS_POSIX_HIGH_METADATA_TIME, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+
+            # We already have a single line for each shared-file access
+            # To check for stragglers, we can check the difference between the 
+
+            # POSIX_FASTEST_RANK_BYTES
+            # POSIX_SLOWEST_RANK_BYTES
+            # POSIX_VARIANCE_RANK_BYTES
+
+            stragglers = False
+
+            if df_posix['rank'].nunique() > 1:
+                total_transfer_size = df_posix['size'].sum()
+
+                df_detected = df_posix.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index()
+                slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size']
+                fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size']
+
+                if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
+                    stragglers = True
+
+            if stragglers:
+                issue = 'Load imbalance of {:.2f}% detected'.format(
+                    abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
+                )
+
+                recommendation = [
+                    {
+                        'message': 'Consider better balancing the data transfer between the application ranks'
+                    },
+                    {
+                        'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation)
+                )
+        
+            # POSIX_F_FASTEST_RANK_TIME
+            # POSIX_F_SLOWEST_RANK_TIME
+            # POSIX_F_VARIANCE_RANK_TIME
+
+            stragglers = False
+
+            if df_posix['rank'].nunique() > 1:
+                total_transfer_time = df_posix['duration'].sum()
+
+                df_detected = df_posix.groupby('rank')['duration'].sum().reset_index()
+
+                slowest_rank_time = df_detected['duration'].max()
+                fastest_rank_time = df_detected['duration'].min()
+
+                if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
+                    stragglers = True
+
+            if stragglers:
+                issue = 'Load imbalance of {:.2f}% detected'.format(
+                    abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
+                )
+
+                recommendation = [
+                    {
+                        'message': 'Consider better distributing the data in the parallel file system' # needs to review what suggestion to give
+                    },
+                    {
+                        'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation)
+                )
+
+            # Get the individual files responsible for imbalance
+            imbalance = False
+
+            if df_posix['rank'].nunique() == 1:
+                df_detected = df_posix[~(df_posix['function'].str.contains('read'))]
+                
+                max_bytes_written = df_detected['size'].max()
+                min_bytes_written = df_detected['size'].min()
+
+                if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
+                    imbalance = True
+
+            if imbalance:
+                issue = 'Load imbalance of {:.2f}% detected'.format(
+                    abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
+                )
+
+                recommendation = [
+                    {
+                        'message': 'Consider better balancing the data transfer between the application ranks'
+                    },
+                    {
+                        'message': 'Consider tuning the stripe size and count to better distribute the data',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                    },
+                    {
+                        'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
+                    },
+                    {
+                        'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+
+            imbalance = False
+
+            if df_posix['rank'].nunique() == 1:
+                df_detected = df_posix[(df_posix['function'].str.contains('read'))]
+                
+                max_bytes_read = df_detected['size'].max()
+                min_bytes_read = df_detected['size'].min()
+
+                if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
+                    imbalance = True   
+
+            if imbalance:
+                issue = 'Load imbalance of {:.2f}% detected'.format(
+                    abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
+                )
+
+                recommendation = [
+                    {
+                        'message': 'Consider better balancing the data transfer between the application ranks'
+                    },
+                    {
+                        'message': 'Consider tuning the stripe size and count to better distribute the data',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                    },
+                    {
+                        'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
+                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
+                    },
+                    {
+                        'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
+                    }
+                ]
+
+                insights_operation.append(
+                    message(args, INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                )
+
+        #########################################################################################################################################################################
+
+        if df_intervals_temp['api'].eq('MPIIO').any():
+            df_mpiio = df_intervals_temp[(df_intervals_temp['api'] == 'MPIIO')]
+
+            df_mpiio_reads = df_mpiio[(df_mpiio['function'].str.contains('read'))]
+            mpiio_indp_reads = len(df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all'))])
+            mpiio_coll_reads = len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('_all'))])
+            total_mpiio_read_operations = mpiio_indp_reads + mpiio_coll_reads
+
+            df_mpiio_writes = df_mpiio[~(df_mpiio['function'].str.contains('read'))]
+            mpiio_indp_writes = len(df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all'))])
+            mpiio_coll_writes = len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('_all'))])
+            total_mpiio_write_operations = mpiio_indp_writes + mpiio_coll_writes
+
+            if mpiio_coll_reads == 0:
+                if total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+                    issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
+                        mpiio_indp_reads,
+                        mpiio_indp_reads / (total_mpiio_read_operations) * 100
+                    )
+
+                    recommendation = [
+                        {
+                            'message': 'Use collective read operations (e.g. MPI_File_read_all() or MPI_File_read_at_all()) and set one aggregator per compute node',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+                        }
+                    ]
+
+                    insights_operation.append(
+                        message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                    )
+            else:
+                issue = 'Application uses MPI-IO and read data using {} ({:.2f}%) collective operations'.format(
+                    mpiio_coll_reads,
+                    mpiio_coll_reads / total_mpiio_read_operations * 100
+                )
+
+                insights_operation.append(
+                    message(args, INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, OK, issue)
+                )
+
+            if mpiio_coll_writes == 0:
+                if total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+                    issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
+                        mpiio_indp_writes,
+                        mpiio_indp_writes / (total_mpiio_write_operations) * 100
+                    )
+
+                    recommendation = [
+                        {
+                            'message': 'Use collective write operations (e.g. MPI_File_write_all() or MPI_File_write_at_all()) and set one aggregator per compute node',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+                        }
+                    ]
+
+                    insights_operation.append(
+                        message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+                    )
+
+            else:
+                issue = 'Application uses MPI-IO and write data using {} ({:.2f}%) collective operations'.format(
+                    mpiio_coll_writes,
+                    mpiio_coll_writes / total_mpiio_write_operations * 100
+                )
+
+                insights_operation.append(
+                    message(args, INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, OK, issue)
+                )
+
+            #########################################################################################################################################################################
+
+            # Look for usage of non-block operations
+
+            # Look for HDF5 file extension
+
+            has_hdf5_extension = False
+
+            if fname.endswith('.h5') or fname.endswith('.hdf5'):
+                has_hdf5_extension = True
+
+            if len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('iread|begin|end'))]) == 0:
+                issue = 'Application could benefit from non-blocking (asynchronous) reads'
+
+                recommendation = []
+
+                if 'H5F' in modules or has_hdf5_extension:
+                    recommendation.append(
+                        {
+                            'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-read.c'), line_numbers=True, background_color='default')
+                        }
+                    )
+
+                if 'MPIIO' in modules:
+                    recommendation.append(
+                        {
+                            'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations', # (e.g., MPI_File_iread(), MPI_File_read_all_begin/end(), or MPI_File_read_at_all_begin/end())',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iread.c'), line_numbers=True, background_color='default')
+                        }
+                    )
+
+                insights_operation.append(
+                    message(args, INSIGHTS_MPI_IO_BLOCKING_READ_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+                )
+
+            if len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('iwrite|begin|end'))]) == 0:
+                issue = 'Application could benefit from non-blocking (asynchronous) writes'
+
+                recommendation = []
+
+                if 'H5F' in modules or has_hdf5_extension:
+                    recommendation.append(
+                        {
+                            'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-write.c'), line_numbers=True, background_color='default')
+                        }
+                    )
+
+                if 'MPIIO' in modules:
+                    recommendation.append(
+                        {
+                            'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations',  # (e.g., MPI_File_iwrite(), MPI_File_write_all_begin/end(), or MPI_File_write_at_all_begin/end())',
+                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iwrite.c'), line_numbers=True, background_color='default')
+                        }
+                    )
+
+                insights_operation.append(
+                    message(args, INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+                )
+
+        #########################################################################################################################################################################
+
+        # Nodes and MPI-IO aggregators
+        # If the application uses collective reads or collective writes, look for the number of aggregators
+        # TODO:
+
+        #########################################################################################################################################################################
+        
+        NUMBER_OF_COMPUTE_NODES = 0
+
+        #########################################################################################################################################################################
+
+        codes = []
+        if args.json:
+            f = open(args.json)
+            data = json.load(f)
+
+            for key, values in data.items():
+                for value in values:
+                    code = value['code']
+                    codes.append(code)
+
+                    level = value['level']
+                    issue = value['issue']
+                    recommendation = []
+                    for rec in value['recommendations']:
+                        new_message = {'message': rec}
+                        recommendation.append(new_message)
+
+                    insights_dxt.append(
+                        message(args, code, TARGET_DEVELOPER, level, issue, recommendation)
+                    )
+
+        #########################################################################################################################################################################
+
+        insights_end_time = time.time()
+
+        console.print()
+
+        console.print(
+            Panel(
+                '\n'.join([
+                    ' [b]RECORDER[/b]:       [white]{}[/white]'.format(
+                        os.path.basename(args.log_path)
+                    ),
+                    ' [b]FILE[/b]:          [white]{} ({})[/white]'.format(
+                        fname,
+                        fid,
+                    ),
+                    # ' [b]COMPUTE NODES[/b]   [white]{}[/white]'.format(
+                    #     NUMBER_OF_COMPUTE_NODES
+                    # ),
+                    ' [b]PROCESSES[/b]       [white]{}[/white]'.format(
+                        reader.GM.total_ranks
+                    ),
+                ]),
+                title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]',
+                title_align='left',
+                subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format(
+                    insights_total[HIGH],
+                    insights_total[WARN],
+                    insights_total[RECOMMENDATIONS],
+                ),
+                subtitle_align='left',
+                padding=1
+            )
+        )
+
+        console.print()
+
+        if insights_metadata:
+            console.print(
+                Panel(
+                    Padding(
+                        Group(
+                            *insights_metadata
+                        ),
+                        (1, 1)
+                    ),
+                    title='METADATA',
+                    title_align='left'
+                )
+            )
+
+        if insights_operation:
+            console.print(
+                Panel(
+                    Padding(
+                        Group(
+                            *insights_operation
+                        ),
+                        (1, 1)
+                    ),
+                    title='OPERATIONS',
+                    title_align='left'
+                )
+            )
+
+        if insights_dxt:
+            console.print(
+                Panel(
+                    Padding(
+                        Group(
+                            *insights_dxt
+                        ),
+                        (1, 1)
+                    ),
+                    title='DXT',
+                    title_align='left'
+                )
+            )
+            
+        console.print(
+            Panel(
+                ' {} | [white]LBNL[/white] | [white]Drishti report generated at {} in[/white] {:.3f} seconds'.format(
+                    datetime.datetime.now().year,
+                    datetime.datetime.now(),
+                    insights_end_time - insights_start_time
+                ),
+                box=box.SIMPLE
+            )
+        )
+
+        if args.export_theme_light:
+            export_theme = TerminalTheme(
+                (255, 255, 255),
+                (0, 0, 0),
+                [
+                    (26, 26, 26),
+                    (244, 0, 95),
+                    (152, 224, 36),
+                    (253, 151, 31),
+                    (157, 101, 255),
+                    (244, 0, 95),
+                    (88, 209, 235),
+                    (120, 120, 120),
+                    (98, 94, 76),
+                ],
+                [
+                    (244, 0, 95),
+                    (152, 224, 36),
+                    (224, 213, 97),
+                    (157, 101, 255),
+                    (244, 0, 95),
+                    (88, 209, 235),
+                    (246, 246, 239),
+                ],
+            )
+        else:
+            export_theme = MONOKAI
+
+        if args.export_html:
+            console.save_html(
+                '{}.{}.html'.format(args.log_path, fid),
+                theme=export_theme,
+                clear=False
+            )
+
+        if args.export_svg:
+            console.save_svg(
+                '{}.{}.svg'.format(args.log_path, fid),
+                title='Drishti',
+                theme=export_theme,
+                clear=False
+            )
+
+        if args.export_csv:
+            issues = [
+                'JOB',
+                INSIGHTS_STDIO_HIGH_USAGE,
+                INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE,
+                INSIGHTS_POSIX_READ_COUNT_INTENSIVE,
+                INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE,
+                INSIGHTS_POSIX_READ_SIZE_INTENSIVE,
+                INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE,
+                INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE,
+                INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE,
+                INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE,
+                INSIGHTS_POSIX_REDUNDANT_READ_USAGE,
+                INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE,
+                INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE,
+                INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE,
+                INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE,
+                INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE,
+                INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE,
+                INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE,
+                INSIGHTS_POSIX_HIGH_METADATA_TIME,
+                INSIGHTS_POSIX_SIZE_IMBALANCE,
+                INSIGHTS_POSIX_TIME_IMBALANCE,
+                INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE,
+                INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE,
+                INSIGHTS_MPI_IO_NO_USAGE,
+                INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE,
+                INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE,
+                INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE,
+                INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE,
+                INSIGHTS_MPI_IO_BLOCKING_READ_USAGE,
+                INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE,
+                INSIGHTS_MPI_IO_AGGREGATORS_INTRA,
+                INSIGHTS_MPI_IO_AGGREGATORS_INTER,
+                INSIGHTS_MPI_IO_AGGREGATORS_OK
+            ]
+            if codes:
+                issues.extend(codes)
+
+            detected_issues = dict.fromkeys(issues, False)
+            detected_issues['JOB'] = None
+
+            for report in csv_report:
+                detected_issues[report] = True
+
+            filename = '{}.{}.summary.csv'.format(
+                args.log_path,
+                fid
+            )
+
+            with open(filename, 'w') as f:
+                w = csv.writer(f)
+                w.writerow(detected_issues.keys())
+                w.writerow(detected_issues.values())
+
+        
+
diff --git a/drishti/reporter.py b/drishti/reporter.py
index f1ab847..4a274fe 100644
--- a/drishti/reporter.py
+++ b/drishti/reporter.py
@@ -94,7 +94,16 @@
     '--json', 
     default=False, 
     dest='json',
-    help=argparse.SUPPRESS)
+    help=argparse.SUPPRESS
+)
+
+parser.add_argument(
+    '--split',
+    default=False,
+    action='store_true',
+    dest='split_files',
+    help='Split the files and generate report for each file'
+)
 
 args = parser.parse_args()
 
@@ -127,6 +136,10 @@ def main():
         handle_darshan.handler(args)
 
     elif log_type == LOG_TYPE_RECORDER:
-        from . import handle_recorder
-        handle_recorder.handler(args)
+        if args.split_files:
+            from . import handle_recorder_split
+            handle_recorder_split.handler(args)
+        else:
+            from . import handle_recorder
+            handle_recorder.handler(args)
 

From c05bf2d449878ef2dacaa7a71c1a5ee7906d555c Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Wed, 29 Nov 2023 16:29:56 -0800
Subject: [PATCH 05/19] Update configuration file

---
 drishti/config.py | 272 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 272 insertions(+)
 create mode 100644 drishti/config.py

diff --git a/drishti/config.py b/drishti/config.py
new file mode 100644
index 0000000..aaf25b1
--- /dev/null
+++ b/drishti/config.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+
+import os
+import json
+
+from rich.console import Console, Group
+from rich.padding import Padding
+from rich.panel import Panel
+from rich.terminal_theme import TerminalTheme
+from rich.terminal_theme import MONOKAI
+
+from .parser import *
+
+
+RECOMMENDATIONS = 0
+HIGH = 1
+WARN = 2
+INFO = 3
+OK = 4
+
+ROOT = os.path.abspath(os.path.dirname(__file__))
+
+TARGET_USER = 1
+TARGET_DEVELOPER = 2
+TARGET_SYSTEM = 3
+
+insights_operation = []
+insights_metadata = []
+insights_dxt = []
+
+insights_total = dict()
+
+insights_total[HIGH] = 0
+insights_total[WARN] = 0
+insights_total[RECOMMENDATIONS] = 0
+
+THRESHOLD_OPERATION_IMBALANCE = 0.1
+THRESHOLD_SMALL_REQUESTS = 0.1
+THRESHOLD_SMALL_REQUESTS_ABSOLUTE = 1000
+THRESHOLD_MISALIGNED_REQUESTS = 0.1
+THRESHOLD_METADATA = 0.1
+THRESHOLD_METADATA_TIME_RANK = 30  # seconds
+THRESHOLD_RANDOM_OPERATIONS = 0.2
+THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE = 1000
+THRESHOLD_STRAGGLERS = 0.15
+THRESHOLD_IMBALANCE = 0.30
+THRESHOLD_INTERFACE_STDIO = 0.1
+THRESHOLD_COLLECTIVE_OPERATIONS = 0.5
+THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE = 1000
+THRESHOLD_SMALL_BYTES = 1048576 # 1 MB
+
+INSIGHTS_STDIO_HIGH_USAGE = 'S01'
+INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE = 'P01'
+INSIGHTS_POSIX_READ_COUNT_INTENSIVE = 'P02'
+INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE = 'P03'
+INSIGHTS_POSIX_READ_SIZE_INTENSIVE = 'P04'
+INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE = 'P05'
+INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE = 'P06'
+INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE = 'P07'
+INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE = 'P08'
+INSIGHTS_POSIX_REDUNDANT_READ_USAGE = 'P09'
+INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE = 'P10'
+INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE = 'P11'
+INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE = 'P12'
+INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE = 'P13'
+INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE = 'P14'
+INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE = 'P15'
+INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE = 'P16'
+INSIGHTS_POSIX_HIGH_METADATA_TIME = 'P17'
+INSIGHTS_POSIX_SIZE_IMBALANCE = 'P18'
+INSIGHTS_POSIX_TIME_IMBALANCE = 'P19'
+INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE = 'P21'
+INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE = 'P22'
+INSIGHTS_MPI_IO_NO_USAGE = 'M01'
+INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE = 'M02'
+INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE = 'M03'
+INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE = 'M04'
+INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE = 'M05'
+INSIGHTS_MPI_IO_BLOCKING_READ_USAGE = 'M06'
+INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE = 'M07'
+INSIGHTS_MPI_IO_AGGREGATORS_INTRA = 'M08'
+INSIGHTS_MPI_IO_AGGREGATORS_INTER = 'M09'
+INSIGHTS_MPI_IO_AGGREGATORS_OK = 'M10'
+
+DETAILS_MAX_SIZE = 10
+
+# TODO: need to verify the threashold to be between 0 and 1
+# TODO: read thresholds from file
+
+
+console = Console(record=True)
+csv_report = []
+codes = []
+export_theme = MONOKAI
+
+
+def init_console():
+    set_export_size()
+    set_export_theme()
+
+    insights_operation.clear()
+    insights_metadata.clear()
+
+    insights_total[HIGH] = 0
+    insights_total[WARN] = 0
+    insights_total[RECOMMENDATIONS] = 0
+
+
+def set_export_theme():
+    global export_theme
+    if args.export_theme_light:
+        export_theme = TerminalTheme(
+            (255, 255, 255),
+            (0, 0, 0),
+            [
+                (26, 26, 26),
+                (244, 0, 95),
+                (152, 224, 36),
+                (253, 151, 31),
+                (157, 101, 255),
+                (244, 0, 95),
+                (88, 209, 235),
+                (120, 120, 120),
+                (98, 94, 76),
+            ],
+            [
+                (244, 0, 95),
+                (152, 224, 36),
+                (224, 213, 97),
+                (157, 101, 255),
+                (244, 0, 95),
+                (88, 209, 235),
+                (246, 246, 239),
+            ],
+        )
+
+
+def set_export_size():
+    if args.export_size: console.width = int(args.export_size)
+
+
+def load_json():
+    codes = []
+    if args.json:
+        f = open(args.json)
+        data = json.load(f)
+
+        for key, values in data.items():
+            for value in values:
+                code = value['code']
+                codes.append(code)
+
+                level = value['level']
+                issue = value['issue']
+                recommendation = []
+                for rec in value['recommendations']:
+                    new_message = {'message': rec}
+                    recommendation.append(new_message)
+
+                insights_dxt.append(
+                    message(code, TARGET_DEVELOPER, level, issue, recommendation)
+                )
+
+
+def validate_thresholds():
+    """
+    Validate thresholds defined by the user.
+    """
+    assert(THRESHOLD_OPERATION_IMBALANCE >= 0.0 and THRESHOLD_OPERATION_IMBALANCE <= 1.0)
+    assert(THRESHOLD_SMALL_REQUESTS >= 0.0 and THRESHOLD_SMALL_REQUESTS <= 1.0)
+    assert(THRESHOLD_MISALIGNED_REQUESTS >= 0.0 and THRESHOLD_MISALIGNED_REQUESTS <= 1.0)
+    assert(THRESHOLD_METADATA >= 0.0 and THRESHOLD_METADATA <= 1.0)
+    assert(THRESHOLD_RANDOM_OPERATIONS >= 0.0 and THRESHOLD_RANDOM_OPERATIONS <= 1.0)
+
+    assert(THRESHOLD_METADATA_TIME_RANK >= 0.0)
+
+
+def convert_bytes(bytes_number):
+    """
+    Convert bytes into formatted string.
+    """
+    tags = [
+        'bytes',
+        'KB',
+        'MB',
+        'GB',
+        'TB',
+        'PB',
+        'EB'
+    ]
+
+    i = 0
+    double_bytes = bytes_number
+
+    while (i < len(tags) and  bytes_number >= 1024):
+        double_bytes = bytes_number / 1024.0
+        i = i + 1
+        bytes_number = bytes_number / 1024
+
+    return str(round(double_bytes, 2)) + ' ' + tags[i] 
+
+
+def message(code, target, level, issue, recommendations=None, details=None):
+    """
+    Display the message on the screen with level, issue, and recommendation.
+    """
+    icon = ':arrow_forward:'
+
+    if level in (HIGH, WARN):
+        insights_total[level] += 1
+
+    if level == HIGH:
+        color = '[red]'
+    elif level == WARN:
+        color = '[orange1]'
+    elif level == OK:
+        color = '[green]'
+    else:
+        color = ''
+
+    messages = [
+        '{}{}{} {}'.format(
+            color,
+            icon,
+            ' [' + code + ']' if args.code else '',
+            issue
+        )
+    ]
+
+    if args.export_csv:
+        csv_report.append(code)
+
+    if details:
+        for detail in details[:DETAILS_MAX_SIZE]:
+            messages.append('  {}:left_arrow_curving_right: {}'.format(
+                    color,
+                    detail['message']
+                )
+            )
+
+    if recommendations:
+        if not args.only_issues:
+            messages.append('  [white]:left_arrow_curving_right: [b]Recommendations:[/b]')
+
+            for recommendation in recommendations:
+                messages.append('    :left_arrow_curving_right: {}'.format(recommendation['message']))
+
+                if args.verbose and 'sample' in recommendation:
+                    messages.append(
+                        Padding(
+                            Panel(
+                                recommendation['sample'],
+                                title='Solution Example Snippet',
+                                title_align='left',
+                                padding=(1, 2)
+                            ),
+                            (1, 0, 1, 7)
+                        )
+                    )
+
+        insights_total[RECOMMENDATIONS] += len(recommendations)
+
+    return Group(
+        *messages
+    )
+
+
+'''
+Pre-load
+'''
+load_json()
+

From 9509e90fc75fbd87a7eecacc42379db1623d4aab Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Wed, 29 Nov 2023 16:30:42 -0800
Subject: [PATCH 06/19] Seperate argument parser

---
 drishti/parser.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 drishti/parser.py

diff --git a/drishti/parser.py b/drishti/parser.py
new file mode 100644
index 0000000..0261312
--- /dev/null
+++ b/drishti/parser.py
@@ -0,0 +1,98 @@
+import argparse
+
+parser = argparse.ArgumentParser(
+    description='Drishti: '
+)
+
+parser.add_argument(
+    'log_path',
+    help='Input .darshan file or recorder folder'
+)
+
+parser.add_argument(
+    '--issues',
+    default=False,
+    action='store_true',
+    dest='only_issues',
+    help='Only displays the detected issues and hides the recommendations'
+)
+
+parser.add_argument(
+    '--html',
+    default=False,
+    action='store_true',
+    dest='export_html',
+    help='Export the report as an HTML page'
+)
+
+parser.add_argument(
+    '--svg',
+    default=False,
+    action='store_true',
+    dest='export_svg',
+    help='Export the report as an SVG image'
+)
+
+parser.add_argument(
+    '--light',
+    default=False,
+    action='store_true',
+    dest='export_theme_light',
+    help='Use a light theme for the report when generating files'
+)
+
+parser.add_argument(
+    '--size',
+    default=False,
+    dest='export_size',
+    help='Console width used for the report and generated files'
+)
+
+parser.add_argument(
+    '--verbose',
+    default=False,
+    action='store_true',
+    dest='verbose',
+    help='Display extended details for the recommendations'
+)
+
+parser.add_argument(
+    '--code',
+    default=False,
+    action='store_true',
+    dest='code',
+    help='Display insights identification code'
+)
+
+parser.add_argument(
+    '--path',
+    default=False,
+    action='store_true',
+    dest='full_path',
+    help='Display the full file path for the files that triggered the issue'
+)
+
+parser.add_argument(
+    '--csv',
+    default=False,
+    action='store_true',
+    dest='export_csv',
+    help='Export a CSV with the code of all issues that were triggered'
+)
+
+parser.add_argument(
+    '--json', 
+    default=False, 
+    dest='json',
+    help=argparse.SUPPRESS
+)
+
+parser.add_argument(
+    '--split',
+    default=False,
+    action='store_true',
+    dest='split_files',
+    help='Split the files and generate report for each file'
+)
+
+args = parser.parse_args()

From db437085bbdbbbb4e3c10614409232e331014df0 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Wed, 29 Nov 2023 16:31:57 -0800
Subject: [PATCH 07/19] Main entry

---
 drishti/reporter.py | 111 +++-----------------------------------------
 1 file changed, 6 insertions(+), 105 deletions(-)

diff --git a/drishti/reporter.py b/drishti/reporter.py
index 4a274fe..ef92d11 100644
--- a/drishti/reporter.py
+++ b/drishti/reporter.py
@@ -2,111 +2,13 @@
 
 import os
 import sys
-import argparse
-
 from subprocess import call
+from .parser import *
 
 
 LOG_TYPE_DARSHAN = 0
 LOG_TYPE_RECORDER = 1
 
-parser = argparse.ArgumentParser(
-    description='Drishti: '
-)
-
-parser.add_argument(
-    'log_path',
-    help='Input .darshan file or recorder folder'
-)
-
-parser.add_argument(
-    '--issues',
-    default=False,
-    action='store_true',
-    dest='only_issues',
-    help='Only displays the detected issues and hides the recommendations'
-)
-
-parser.add_argument(
-    '--html',
-    default=False,
-    action='store_true',
-    dest='export_html',
-    help='Export the report as an HTML page'
-)
-
-parser.add_argument(
-    '--svg',
-    default=False,
-    action='store_true',
-    dest='export_svg',
-    help='Export the report as an SVG image'
-)
-
-parser.add_argument(
-    '--light',
-    default=False,
-    action='store_true',
-    dest='export_theme_light',
-    help='Use a light theme for the report when generating files'
-)
-
-parser.add_argument(
-    '--size',
-    default=False,
-    dest='export_size',
-    help='Console width used for the report and generated files'
-)
-
-parser.add_argument(
-    '--verbose',
-    default=False,
-    action='store_true',
-    dest='verbose',
-    help='Display extended details for the recommendations'
-)
-
-parser.add_argument(
-    '--code',
-    default=False,
-    action='store_true',
-    dest='code',
-    help='Display insights identification code'
-)
-
-parser.add_argument(
-    '--path',
-    default=False,
-    action='store_true',
-    dest='full_path',
-    help='Display the full file path for the files that triggered the issue'
-)
-
-parser.add_argument(
-    '--csv',
-    default=False,
-    action='store_true',
-    dest='export_csv',
-    help='Export a CSV with the code of all issues that were triggered'
-)
-
-parser.add_argument(
-    '--json', 
-    default=False, 
-    dest='json',
-    help=argparse.SUPPRESS
-)
-
-parser.add_argument(
-    '--split',
-    default=False,
-    action='store_true',
-    dest='split_files',
-    help='Split the files and generate report for each file'
-)
-
-args = parser.parse_args()
-
 
 def clear():
     """
@@ -132,14 +34,13 @@ def main():
     log_type = check_log_type(args.log_path)
     
     if log_type == LOG_TYPE_DARSHAN:
-        from . import handle_darshan
-        handle_darshan.handler(args)
+        from .handle_darshan import handler
 
     elif log_type == LOG_TYPE_RECORDER:
         if args.split_files:
-            from . import handle_recorder_split
-            handle_recorder_split.handler(args)
+            from .handle_recorder_split import handler
         else:
-            from . import handle_recorder
-            handle_recorder.handler(args)
+            from .handle_recorder import handler
+    
+    handler()
 

From d178dfd77cbb1ade7bef59aec9b37eedb108ad3d Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Wed, 29 Nov 2023 19:10:53 -0800
Subject: [PATCH 08/19] Modules to be called by handlers

---
 drishti/module.py | 864 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 864 insertions(+)
 create mode 100644 drishti/module.py

diff --git a/drishti/module.py b/drishti/module.py
new file mode 100644
index 0000000..fe21a18
--- /dev/null
+++ b/drishti/module.py
@@ -0,0 +1,864 @@
+#!/usr/bin/env python3
+
+import datetime
+import csv
+from rich import box
+from rich.syntax import Syntax
+from .config import *
+
+'''
+Before calling the functions below
+Make sure the variables passed are in the given structure:
+file_map: a dict of (id, path) pair
+modules: a set or a dict should be ok
+detected_files: A pandas dataframe
+'''
+
+# Basic usage check
+
+def check_stdio(total_size, total_size_stdio):
+    if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
+        issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
+            total_size_stdio / total_size * 100.0,
+            convert_bytes(total_size_stdio)
+        )
+
+        recommendation = [
+            {
+                'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+        )
+
+
+def check_mpiio(modules):
+    if 'MPI-IO' not in modules:
+        issue = 'Application is using low-performance interface'
+
+        recommendation = [
+            {
+                'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_NO_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+        )
+
+
+
+# POSIX level check
+
+
+def check_operation_intensive(total_operations, total_reads, total_writes):
+    if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
+        issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
+            total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
+        )
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+        )
+
+    if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
+        issue = 'Application is read operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
+            total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
+        )
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_READ_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+        )
+
+
+def check_size_intensive(total_size, total_read_size, total_written_size):
+    if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
+        issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
+            total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
+        )
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+        )
+
+    if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
+        issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
+            total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
+        )
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_READ_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
+        )
+
+
+'''
+detected_files required columns:
+['id', 'total_reads', 'total_writes']
+detected_files.loc[:, 'id'] = detected_files.loc[:, 'id'].astype(str)
+'''
+def check_small_operation(total_reads, total_reads_small, total_writes, total_writes_small, detected_files, modules, file_map):
+    if total_reads_small and total_reads_small / total_reads > THRESHOLD_SMALL_REQUESTS and total_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+        issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
+            total_reads_small, total_reads_small / total_reads * 100.0
+        )
+
+        detail = []
+        recommendation = []
+
+        for index, row in detected_files.iterrows():
+            if row['total_reads'] > (total_reads * THRESHOLD_SMALL_REQUESTS / 2):
+                detail.append(
+                    {
+                        'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
+                            row['total_reads'],
+                            row['total_reads'] / total_reads * 100.0,
+                            file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                        ) 
+                    }
+                )
+
+        recommendation.append(
+            {
+                'message': 'Consider buffering read operations into larger more contiguous ones'
+            }
+        )
+
+        if 'MPI-IO' in modules:
+            recommendation.append(
+                {
+                    'message': 'Since the appplication already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_read_all() or MPI_File_read_at_all()) to aggregate requests into larger ones',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+                }
+            )
+        else:
+            recommendation.append(
+                {
+                    'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
+                }
+            )
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+        )
+
+    if total_writes_small and total_writes_small / total_writes > THRESHOLD_SMALL_REQUESTS and total_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+        issue = 'Application issues a high number ({}) of small write requests (i.e., < 1MB) which represents {:.2f}% of all write requests'.format(
+            total_writes_small, total_writes_small / total_writes * 100.0
+        )
+
+        detail = []
+        recommendation = []
+
+        for index, row in detected_files.iterrows():
+            if row['total_writes'] > (total_writes * THRESHOLD_SMALL_REQUESTS / 2):
+                detail.append(
+                    {
+                        'message': '{} ({:.2f}%) small write requests are to "{}"'.format(
+                            row['total_writes'],
+                            row['total_writes'] / total_writes * 100.0,
+                            file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                        ) 
+                    }
+                )
+
+        recommendation.append(
+            {
+                'message': 'Consider buffering write operations into larger more contiguous ones'
+            }
+        )
+
+        if 'MPI-IO' in modules:
+            recommendation.append(
+                {
+                    'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_write_all() or MPI_File_write_at_all()) to aggregate requests into larger ones',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+                }
+            )
+        else:
+            recommendation.append(
+                {
+                    'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
+                }
+            )
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+        )
+
+
+def check_misaligned(total_operations, total_mem_not_aligned, total_file_not_aligned, modules):
+    if total_operations and total_mem_not_aligned / total_operations > THRESHOLD_MISALIGNED_REQUESTS:
+        issue = 'Application has a high number ({:.2f}%) of misaligned memory requests'.format(
+            total_mem_not_aligned / total_operations * 100.0
+        )
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE, TARGET_DEVELOPER, HIGH, issue, None)
+        )
+
+    if total_operations and total_file_not_aligned / total_operations > THRESHOLD_MISALIGNED_REQUESTS:
+        issue = 'Application issues a high number ({:.2f}%) of misaligned file requests'.format(
+            total_file_not_aligned / total_operations * 100.0
+        )
+
+        recommendation = [
+            {
+                'message': 'Consider aligning the requests to the file system block boundaries'
+            }
+        ]
+
+        if 'HF5' in modules:
+            recommendation.append(
+                {
+                    'message': 'Since the appplication uses HDF5, consider using H5Pset_alignment() in a file access property list',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-alignment.c'), line_numbers=True, background_color='default')
+                },
+                {
+                    'message': 'Any file object greater than or equal in size to threshold bytes will be aligned on an address which is a multiple of alignment'
+                }
+            )
+
+        if 'LUSTRE' in modules:
+            recommendation.append(
+                {
+                    'message': 'Consider using a Lustre alignment that matches the file system stripe configuration',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+                }
+            )
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+        )
+
+
+def check_traffic(max_read_offset, total_read_size, max_write_offset, total_written_size):
+    if max_read_offset > total_read_size:
+        issue = 'Application might have redundant read traffic (more data read than the highest offset)'
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_REDUNDANT_READ_USAGE, TARGET_DEVELOPER, WARN, issue, None)
+        )
+
+    if max_write_offset > total_written_size:
+        issue = 'Application might have redundant write traffic (more data written than the highest offset)'
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, None)
+        )
+
+
+def check_random_operation(read_consecutive, read_sequential, read_random, total_reads, write_consecutive, write_sequential, write_random, total_writes):
+    if total_reads:
+        if read_random and read_random / total_reads > THRESHOLD_RANDOM_OPERATIONS and read_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
+            issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
+                read_random, read_random / total_reads * 100.0
+            )
+
+            recommendation = [
+                {
+                    'message': 'Consider changing your data model to have consecutive or sequential reads'
+                }
+            ]
+
+            insights_operation.append(
+                message(INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+            )
+        else:
+            issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) read requests'.format(
+                read_consecutive / total_reads * 100.0,
+                read_sequential / total_reads * 100.0
+            )
+
+            insights_operation.append(
+                message(INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE, TARGET_DEVELOPER, OK, issue, None)
+            )
+
+    if total_writes:
+        if write_random and write_random / total_writes > THRESHOLD_RANDOM_OPERATIONS and write_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
+            issue = 'Application is issuing a high number ({}) of random write operations ({:.2f}%)'.format(
+                write_random, write_random / total_writes * 100.0
+            )
+
+            recommendation = [
+                {
+                    'message': 'Consider changing your data model to have consecutive or sequential writes'
+                }
+            ]
+
+            insights_operation.append(
+                message(INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+            )
+        else:
+            issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) write requests'.format(
+                write_consecutive / total_writes * 100.0,
+                write_sequential / total_writes * 100.0
+            )
+
+            insights_operation.append(
+                message(INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE, TARGET_DEVELOPER, OK, issue, None)
+            )
+
+
+''''
+The shared_file required columns:
+['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES']
+'''
+def check_shared_small_operation(total_shared_reads, total_shared_reads_small, total_shared_writes, total_shared_writes_small, shared_files, file_map):
+    if total_shared_reads and total_shared_reads_small / total_shared_reads > THRESHOLD_SMALL_REQUESTS and total_shared_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+        issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
+            total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
+        )
+
+        detail = []
+
+        for index, row in shared_files.iterrows():
+            if row['INSIGHTS_POSIX_SMALL_READS'] > (total_shared_reads * THRESHOLD_SMALL_REQUESTS / 2):
+                detail.append(
+                    {
+                        'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
+                            row['INSIGHTS_POSIX_SMALL_READS'],
+                            row['INSIGHTS_POSIX_SMALL_READS'] / total_shared_reads * 100.0,
+                            file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                        ) 
+                    }
+                )
+
+        recommendation = [
+            {
+                'message': 'Consider coalesceing read requests into larger more contiguous ones using MPI-IO collective operations',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+        )
+
+    if total_shared_writes and total_shared_writes_small / total_shared_writes > THRESHOLD_SMALL_REQUESTS and total_shared_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+        issue = 'Application issues a high number ({}) of small write requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file write requests'.format(
+            total_shared_writes_small, total_shared_writes_small / total_shared_writes * 100.0
+        )
+
+        detail = []
+
+        for index, row in shared_files.iterrows():
+            if row['INSIGHTS_POSIX_SMALL_WRITES'] > (total_shared_writes * THRESHOLD_SMALL_REQUESTS / 2):
+                detail.append(
+                    {
+                        'message': '{} ({:.2f}%) small writes requests are to "{}"'.format(
+                            row['INSIGHTS_POSIX_SMALL_WRITES'],
+                            row['INSIGHTS_POSIX_SMALL_WRITES'] / total_shared_writes * 100.0,
+                            file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                        ) 
+                    }
+                )
+
+        recommendation = [
+            {
+                'message': 'Consider coalescing write requests into larger more contiguous ones using MPI-IO collective operations',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+        )
+
+
+def check_long_metadata(count_long_metadata, modules):
+    if count_long_metadata > 0:
+        issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
+            count_long_metadata, THRESHOLD_METADATA_TIME_RANK
+        )
+
+        recommendation = [
+            {
+                'message': 'Attempt to combine files, reduce, or cache metadata operations'
+            }
+        ]
+
+        if 'HF5' in modules:
+            recommendation.append(
+                {
+                    'message': 'Since your appplication uses HDF5, try enabling collective metadata calls with H5Pset_coll_metadata_write() and H5Pset_all_coll_metadata_ops()',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-collective-metadata.c'), line_numbers=True, background_color='default')
+                },
+                {
+                    'message': 'Since your appplication uses HDF5, try using metadata cache to defer metadata operations',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-cache.c'), line_numbers=True, background_color='default')
+                }
+            )
+
+        insights_metadata.append(
+            message(INSIGHTS_POSIX_HIGH_METADATA_TIME, TARGET_DEVELOPER, HIGH, issue, recommendation)
+        )
+
+
+'''
+detected_files required columns:
+['id', 'data_imbalance']
+'''
+def check_shared_data_imblance(stragglers_count, detected_files, file_map):
+    if stragglers_count:
+        issue = 'Detected data transfer imbalance caused by stragglers when accessing {} shared file.'.format(
+            stragglers_count
+        )
+
+        detail = []
+
+        for index, row in detected_files.iterrows():
+            detail.append(
+                {
+                    'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
+                        row['data_imbalance'],
+                        file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                    ) 
+                }
+            )
+
+        recommendation = [
+            {
+                'message': 'Consider better balancing the data transfer between the application ranks'
+            },
+            {
+                'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
+        )
+
+
+'''
+detected_files required columns:
+['id', 'time_imbalance']
+'''
+def check_shared_time_imbalance(stragglers_count, detected_files, file_map):
+    if stragglers_count:
+        issue = 'Detected time imbalance caused by stragglers when accessing {} shared file.'.format(
+            stragglers_count
+        )
+
+        detail = []
+        
+        for index, row in detected_files.iterrows():
+            detail.append(
+                {
+                    'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
+                        row['time_imbalance'],
+                        file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                    ) 
+                }
+            )
+
+        recommendation = [
+            {
+                'message': 'Consider better distributing the data in the parallel file system' # needs to review what suggestion to give
+            },
+            {
+                'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
+        )
+
+
+'''
+detected_files required columns:
+['id', 'write_imbalance']
+'''
+def check_individual_write_imbalance(imbalance_count, detected_files, file_map):
+    if imbalance_count:
+        issue = 'Detected write imbalance when accessing {} individual files'.format(
+            imbalance_count
+        )
+
+        detail = []
+        
+        for index, row in detected_files.iterrows():
+            detail.append(
+                {
+                    'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
+                        row['write_imbalance'],
+                        file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                    ) 
+                }
+            )
+
+        recommendation = [
+            {
+                'message': 'Consider better balancing the data transfer between the application ranks'
+            },
+            {
+                'message': 'Consider tuning the stripe size and count to better distribute the data',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+            },
+            {
+                'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
+            },
+            {
+                'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+        )
+
+
+'''
+detected_files required columns:
+['id', 'read_imbalance']
+'''
+def check_individual_read_imbalance(imbalance_count, detected_files, file_map):
+    if imbalance_count:
+        issue = 'Detected read imbalance when accessing {} individual files.'.format(
+            imbalance_count
+        )
+
+        detail = []
+        
+        for index, row in detected_files.iterrows():
+            detail.append(
+                {
+                    'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
+                        row['read_imbalance'],
+                        file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                    ) 
+                }
+            )
+
+        recommendation = [
+            {
+                'message': 'Consider better balancing the data transfer between the application ranks'
+            },
+            {
+                'message': 'Consider tuning the stripe size and count to better distribute the data',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+            },
+            {
+                'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
+            },
+            {
+                'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+        )
+
+
+# MPIIO level check
+
+'''
+detected_files required columns:
+['id', 'absolute_indep_reads', 'percent_indep_reads']
+'''
+def check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, total_mpiio_read_operations, detected_files, file_map):
+    if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+        issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
+            mpiio_indep_reads,
+            mpiio_indep_reads / total_mpiio_read_operations * 100
+        )
+
+        detail = []
+
+        for index, row in detected_files.iterrows():
+                detail.append(
+                    {
+                        'message': '{} ({}%) of independent reads to "{}"'.format(
+                            row['absolute_indep_reads'],
+                            row['percent_indep_reads'],
+                            file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                        ) 
+                    }
+                )
+
+        recommendation = [
+            {
+                'message': 'Use collective read operations (e.g. MPI_File_read_all() or MPI_File_read_at_all()) and set one aggregator per compute node',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+        )
+    else:
+        issue = 'Application uses MPI-IO and read data using {} ({:.2f}%) collective operations'.format(
+            mpiio_coll_reads,
+            mpiio_coll_reads / total_mpiio_read_operations * 100
+        )
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, OK, issue)
+        )
+
+
+'''
+detected_files required columns:
+['id', 'absolute_indep_writes', 'percent_indep_writes']
+'''
+def check_mpi_collective_write_operation(mpi_coll_writes, mpi_indep_writes, total_mpiio_write_operations, detected_files, file_map):
+    if mpi_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+        issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
+            mpi_indep_writes,
+            mpi_indep_writes / total_mpiio_write_operations * 100
+        )
+
+        detail = []
+
+        for index, row in detected_files.iterrows():
+                detail.append(
+                    {
+                        'message': '{} ({}%) independent writes to "{}"'.format(
+                            row['absolute_indep_writes'],
+                            row['percent_indep_writes'],
+                            file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                        ) 
+                    }
+                )
+
+        recommendation = [
+            {
+                'message': 'Use collective write operations (e.g. MPI_File_write_all() or MPI_File_write_at_all()) and set one aggregator per compute node',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+        )
+    else:
+        issue = 'Application uses MPI-IO and write data using {} ({:.2f}%) collective operations'.format(
+            mpi_coll_writes,
+            mpi_coll_writes / total_mpiio_write_operations * 100
+        )
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, OK, issue)
+        )
+
+
+def check_mpi_none_block_operation(mpiio_nb_reads, mpiio_nb_writes, has_hdf5_extension, modules):
+    if mpiio_nb_reads == 0:
+        issue = 'Application could benefit from non-blocking (asynchronous) reads'
+
+        recommendation = []
+
+        if 'H5F' in modules or has_hdf5_extension:
+            recommendation.append(
+                {
+                    'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-read.c'), line_numbers=True, background_color='default')
+                }
+            )
+
+        if 'MPI-IO' in modules:
+            recommendation.append(
+                {
+                    'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations', # (e.g., MPI_File_iread(), MPI_File_read_all_begin/end(), or MPI_File_read_at_all_begin/end())',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iread.c'), line_numbers=True, background_color='default')
+                }
+            )
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_BLOCKING_READ_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+        )
+
+    if mpiio_nb_writes == 0:
+        issue = 'Application could benefit from non-blocking (asynchronous) writes'
+
+        recommendation = []
+
+        if 'H5F' in modules or has_hdf5_extension:
+            recommendation.append(
+                {
+                    'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-write.c'), line_numbers=True, background_color='default')
+                }
+            )
+
+        if 'MPI-IO' in modules:
+            recommendation.append(
+                {
+                    'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations',  # (e.g., MPI_File_iwrite(), MPI_File_write_all_begin/end(), or MPI_File_write_at_all_begin/end())',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iwrite.c'), line_numbers=True, background_color='default')
+                }
+            )
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
+        )
+
+
+def check_mpi_aggregator(cb_nodes, NUMBER_OF_COMPUTE_NODES):
+    if cb_nodes > NUMBER_OF_COMPUTE_NODES:
+        issue = 'Application is using inter-node aggregators (which require network communication)'
+
+        recommendation = [
+            {
+                'message': 'Set the MPI hints for the number of aggregators as one per compute node (e.g., cb_nodes={})'.format(
+                    NUMBER_OF_COMPUTE_NODES
+                ),
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-hints.bash'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_AGGREGATORS_INTER, TARGET_USER, HIGH, issue, recommendation)
+        )
+
+    if cb_nodes < NUMBER_OF_COMPUTE_NODES:
+        issue = 'Application is using intra-node aggregators'
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_AGGREGATORS_INTRA, TARGET_USER, OK, issue)
+        )
+
+    if cb_nodes == NUMBER_OF_COMPUTE_NODES:
+        issue = 'Application is using one aggregator per compute node'
+
+        insights_operation.append(
+            message(INSIGHTS_MPI_IO_AGGREGATORS_OK, TARGET_USER, OK, issue)
+        )
+
+
+# Layout and export
+
+def display_content():
+    if insights_metadata:
+        console.print(
+            Panel(
+                Padding(
+                    Group(
+                        *insights_metadata
+                    ),
+                    (1, 1)
+                ),
+                title='METADATA',
+                title_align='left'
+            )
+        )
+
+    if insights_operation:
+        console.print(
+            Panel(
+                Padding(
+                    Group(
+                        *insights_operation
+                    ),
+                    (1, 1)
+                ),
+                title='OPERATIONS',
+                title_align='left'
+            )
+        )
+
+    if insights_dxt:
+        console.print(
+            Panel(
+                Padding(
+                    Group(
+                        *insights_dxt
+                    ),
+                    (1, 1)
+                ),
+                title='DXT',
+                title_align='left'
+            )
+        )
+
+
+def display_footer(insights_start_time, insights_end_time):
+    console.print(
+        Panel(
+            ' {} | [white]LBNL[/white] | [white]Drishti report generated at {} in[/white] {:.3f} seconds'.format(
+                datetime.datetime.now().year,
+                datetime.datetime.now(),
+                insights_end_time - insights_start_time
+            ),
+            box=box.SIMPLE
+        )
+    )
+
+def export_html():
+    if args.export_html:
+        console.save_html(
+            '{}.html'.format(args.log_path),
+            theme=export_theme,
+            clear=False
+        )
+
+
+def export_svg():
+    if args.export_svg:
+        console.save_svg(
+            '{}.svg'.format(args.log_path),
+            title='Drishti',
+            theme=export_theme,
+            clear=False
+        )
+
+
+def export_csv(filename, jobid=None):
+    if args.export_csv:
+        issues = [
+            'JOB',
+            INSIGHTS_STDIO_HIGH_USAGE,
+            INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE,
+            INSIGHTS_POSIX_READ_COUNT_INTENSIVE,
+            INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE,
+            INSIGHTS_POSIX_READ_SIZE_INTENSIVE,
+            INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE,
+            INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE,
+            INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE,
+            INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE,
+            INSIGHTS_POSIX_REDUNDANT_READ_USAGE,
+            INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE,
+            INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE,
+            INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE,
+            INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE,
+            INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE,
+            INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE,
+            INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE,
+            INSIGHTS_POSIX_HIGH_METADATA_TIME,
+            INSIGHTS_POSIX_SIZE_IMBALANCE,
+            INSIGHTS_POSIX_TIME_IMBALANCE,
+            INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE,
+            INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE,
+            INSIGHTS_MPI_IO_NO_USAGE,
+            INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE,
+            INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE,
+            INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE,
+            INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE,
+            INSIGHTS_MPI_IO_BLOCKING_READ_USAGE,
+            INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE,
+            INSIGHTS_MPI_IO_AGGREGATORS_INTRA,
+            INSIGHTS_MPI_IO_AGGREGATORS_INTER,
+            INSIGHTS_MPI_IO_AGGREGATORS_OK
+        ]
+        if codes:
+            issues.extend(codes)
+
+        detected_issues = dict.fromkeys(issues, False)
+        detected_issues['JOB'] = jobid
+
+        for report in csv_report:
+            detected_issues[report] = True
+
+        with open(filename, 'w') as f:
+            w = csv.writer(f)
+            w.writerow(detected_issues.keys())
+            w.writerow(detected_issues.values())
+

From a20d74f7fc54d150d7f0c9b61345e33cb0b6f818 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Wed, 29 Nov 2023 19:13:13 -0800
Subject: [PATCH 09/19] Darshan handler cleanup

---
 drishti/handle_darshan.py | 912 ++++----------------------------------
 1 file changed, 82 insertions(+), 830 deletions(-)

diff --git a/drishti/handle_darshan.py b/drishti/handle_darshan.py
index 6d4e70f..a5d8fbc 100644
--- a/drishti/handle_darshan.py
+++ b/drishti/handle_darshan.py
@@ -1,32 +1,18 @@
 #!/usr/bin/env python3
 
-import os
 import io
 import sys
-import csv
 import time
-import json
 import shlex
 import shutil
-import datetime
 import subprocess
-
 import pandas as pd
-
 import darshan
 import darshan.backend.cffi_backend as darshanll
 
-from rich import print, box
-from rich.console import Group
-from rich.padding import Padding
-from rich.syntax import Syntax
-from rich.panel import Panel
-from rich.terminal_theme import TerminalTheme
-from rich.terminal_theme import MONOKAI
-
+from rich import print
 from packaging import version
-
-from .includes import *
+from .module import *
 
 
 def is_available(name):
@@ -84,8 +70,8 @@ def check_log_version(file, log_version, library_version):
     return use_file
 
 
-def handler(args):
-    init_console(args)
+def handler():
+    init_console()
     validate_thresholds()
 
     insights_start_time = time.time()
@@ -207,34 +193,8 @@ def handler(args):
             'mpiio': uses_mpiio
         }
 
-    if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
-        issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
-            total_size_stdio / total_size * 100.0,
-            convert_bytes(total_size_stdio)
-        )
-
-        recommendation = [
-            {
-                'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
-            }
-        ]
-
-        insights_operation.append(
-            message(args, INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-        )
-
-    if 'MPI-IO' not in modules:
-        issue = 'Application is using low-performance interface'
-
-        recommendation = [
-            {
-                'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
-            }
-        ]
-
-        insights_operation.append(
-            message(args, INSIGHTS_MPI_IO_NO_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-        )
+    check_stdio(total_size, total_size_stdio)
+    check_mpiio(modules)
 
     #########################################################################################################################################################################
 
@@ -251,46 +211,14 @@ def handler(args):
         total_operations = total_writes + total_reads 
 
         # To check whether the application is write-intersive or read-intensive we only look at the POSIX level and check if the difference between reads and writes is larger than 10% (for more or less), otherwise we assume a balance
-        if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
-            issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
-                total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-            )
-
-        if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
-            issue = 'Application is read operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
-                total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_READ_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-            )
+        check_operation_intensive(total_operations, total_reads, total_writes)
 
         total_read_size = df['counters']['POSIX_BYTES_READ'].sum()
         total_written_size = df['counters']['POSIX_BYTES_WRITTEN'].sum()
 
         total_size = total_written_size + total_read_size
 
-        if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
-            issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
-                total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-            )
-
-        if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
-            issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
-                total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_READ_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-            )
+        check_size_intensive(total_size, total_read_size, total_written_size)
 
         #########################################################################################################################################################################
 
@@ -303,6 +231,14 @@ def handler(args):
             df['counters']['POSIX_SIZE_READ_100K_1M'].sum()
         )
 
+        total_writes_small = (
+            df['counters']['POSIX_SIZE_WRITE_0_100'].sum() +
+            df['counters']['POSIX_SIZE_WRITE_100_1K'].sum() +
+            df['counters']['POSIX_SIZE_WRITE_1K_10K'].sum() +
+            df['counters']['POSIX_SIZE_WRITE_10K_100K'].sum() +
+            df['counters']['POSIX_SIZE_WRITE_100K_1M'].sum()
+        )
+
         # Get the files responsible for more than half of these accesses
         files = []
 
@@ -326,102 +262,7 @@ def handler(args):
         detected_files.columns = ['id', 'total_reads', 'total_writes']
         detected_files.loc[:, 'id'] = detected_files.loc[:, 'id'].astype(str)
 
-        if total_reads_small and total_reads_small / total_reads > THRESHOLD_SMALL_REQUESTS and total_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-            issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
-                total_reads_small, total_reads_small / total_reads * 100.0
-            )
-
-            detail = []
-            recommendation = []
-
-            for index, row in detected_files.iterrows():
-                if row['total_reads'] > (total_reads * THRESHOLD_SMALL_REQUESTS / 2):
-                    detail.append(
-                        {
-                            'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
-                                row['total_reads'],
-                                row['total_reads'] / total_reads * 100.0,
-                                file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
-                            ) 
-                        }
-                    )
-
-            recommendation.append(
-                {
-                    'message': 'Consider buffering read operations into larger more contiguous ones'
-                }
-            )
-
-            if 'MPI-IO' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since the appplication already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_read_all() or MPI_File_read_at_all()) to aggregate requests into larger ones',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                    }
-                )
-            else:
-                recommendation.append(
-                    {
-                        'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
-                    }
-                )
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
-
-        # Get the number of small I/O operations (less than the stripe size)
-        total_writes_small = (
-            df['counters']['POSIX_SIZE_WRITE_0_100'].sum() +
-            df['counters']['POSIX_SIZE_WRITE_100_1K'].sum() +
-            df['counters']['POSIX_SIZE_WRITE_1K_10K'].sum() +
-            df['counters']['POSIX_SIZE_WRITE_10K_100K'].sum() +
-            df['counters']['POSIX_SIZE_WRITE_100K_1M'].sum()
-        )
-
-        if total_writes_small and total_writes_small / total_writes > THRESHOLD_SMALL_REQUESTS and total_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-            issue = 'Application issues a high number ({}) of small write requests (i.e., < 1MB) which represents {:.2f}% of all write requests'.format(
-                total_writes_small, total_writes_small / total_writes * 100.0
-            )
-
-            detail = []
-            recommendation = []
-
-            for index, row in detected_files.iterrows():
-                if row['total_writes'] > (total_writes * THRESHOLD_SMALL_REQUESTS / 2):
-                    detail.append(
-                        {
-                            'message': '{} ({:.2f}%) small write requests are to "{}"'.format(
-                                row['total_writes'],
-                                row['total_writes'] / total_writes * 100.0,
-                                file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
-                            ) 
-                        }
-                    )
-
-            recommendation.append(
-                {
-                    'message': 'Consider buffering write operations into larger more contiguous ones'
-                }
-            )
-
-            if 'MPI-IO' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_write_all() or MPI_File_write_at_all()) to aggregate requests into larger ones',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                    }
-                )
-            else:
-                recommendation.append(
-                    {
-                        'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
-                    }
-                )
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
+        check_small_operation(total_reads, total_reads_small, total_writes, total_writes_small, detected_files, modules, file_map)
 
         #########################################################################################################################################################################
 
@@ -430,70 +271,16 @@ def handler(args):
         total_mem_not_aligned = df['counters']['POSIX_MEM_NOT_ALIGNED'].sum()
         total_file_not_aligned = df['counters']['POSIX_FILE_NOT_ALIGNED'].sum()
 
-        if total_operations and total_mem_not_aligned / total_operations > THRESHOLD_MISALIGNED_REQUESTS:
-            issue = 'Application has a high number ({:.2f}%) of misaligned memory requests'.format(
-                total_mem_not_aligned / total_operations * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE, TARGET_DEVELOPER, HIGH, issue, None)
-            )
-
-        if total_operations and total_file_not_aligned / total_operations > THRESHOLD_MISALIGNED_REQUESTS:
-            issue = 'Application issues a high number ({:.2f}%) of misaligned file requests'.format(
-                total_file_not_aligned / total_operations * 100.0
-            )
-
-            recommendation = [
-                {
-                    'message': 'Consider aligning the requests to the file system block boundaries'
-                }
-            ]
-
-            if 'HF5' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since the appplication uses HDF5, consider using H5Pset_alignment() in a file access property list',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-alignment.c'), line_numbers=True, background_color='default')
-                    },
-                    {
-                        'message': 'Any file object greater than or equal in size to threshold bytes will be aligned on an address which is a multiple of alignment'
-                    }
-                )
-
-            if 'LUSTRE' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Consider using a Lustre alignment that matches the file system stripe configuration',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-            )
+        check_misaligned(total_operations, total_mem_not_aligned, total_file_not_aligned, modules)
 
         #########################################################################################################################################################################
 
         # Redundant read-traffic (based on Phill)
         # POSIX_MAX_BYTE_READ (Highest offset in the file that was read)
         max_read_offset = df['counters']['POSIX_MAX_BYTE_READ'].max()
-
-        if max_read_offset > total_read_size:
-            issue = 'Application might have redundant read traffic (more data read than the highest offset)'
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_REDUNDANT_READ_USAGE, TARGET_DEVELOPER, WARN, issue, None)
-            )
-
         max_write_offset = df['counters']['POSIX_MAX_BYTE_WRITTEN'].max()
 
-        if max_write_offset > total_written_size:
-            issue = 'Application might have redundant write traffic (more data written than the highest offset)'
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, None)
-            )
+        check_traffic(max_read_offset, total_read_size, max_write_offset, total_written_size)
 
         #########################################################################################################################################################################
 
@@ -502,7 +289,6 @@ def handler(args):
         read_consecutive = df['counters']['POSIX_CONSEC_READS'].sum()
         #print('READ Consecutive: {} ({:.2f}%)'.format(read_consecutive, read_consecutive / total_reads * 100))
 
-
         read_sequential = df['counters']['POSIX_SEQ_READS'].sum()
         read_sequential -= read_consecutive
         #print('READ Sequential: {} ({:.2f}%)'.format(read_sequential, read_sequential / total_reads * 100))
@@ -510,30 +296,6 @@ def handler(args):
         read_random = total_reads - read_consecutive - read_sequential
         #print('READ Random: {} ({:.2f}%)'.format(read_random, read_random / total_reads * 100))
 
-        if total_reads:
-            if read_random and read_random / total_reads > THRESHOLD_RANDOM_OPERATIONS and read_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
-                issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
-                    read_random, read_random / total_reads * 100.0
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider changing your data model to have consecutive or sequential reads'
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-            else:
-                issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) read requests'.format(
-                    read_consecutive / total_reads * 100.0,
-                    read_sequential / total_reads * 100.0
-                )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE, TARGET_DEVELOPER, OK, issue, None)
-                )
 
         write_consecutive = df['counters']['POSIX_CONSEC_WRITES'].sum()
 
@@ -543,30 +305,7 @@ def handler(args):
         write_random = total_writes - write_consecutive - write_sequential
         #print('WRITE Random: {} ({:.2f}%)'.format(write_random, write_random / total_writes * 100))
 
-        if total_writes:
-            if write_random and write_random / total_writes > THRESHOLD_RANDOM_OPERATIONS and write_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
-                issue = 'Application is issuing a high number ({}) of random write operations ({:.2f}%)'.format(
-                    write_random, write_random / total_writes * 100.0
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider changing your data model to have consecutive or sequential writes'
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-            else:
-                issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) write requests'.format(
-                    write_consecutive / total_writes * 100.0,
-                    write_sequential / total_writes * 100.0
-                )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE, TARGET_DEVELOPER, OK, issue, None)
-                )
+        check_random_operation(read_consecutive, read_sequential, read_random, total_reads, write_consecutive, write_sequential, write_random, total_writes)
 
         #########################################################################################################################################################################
 
@@ -594,35 +333,6 @@ def handler(args):
                 shared_files['POSIX_SIZE_READ_100K_1M']
             )
 
-            if total_shared_reads and total_shared_reads_small / total_shared_reads > THRESHOLD_SMALL_REQUESTS and total_shared_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-                issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
-                    total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
-                )
-
-                detail = []
-
-                for index, row in shared_files.iterrows():
-                    if row['INSIGHTS_POSIX_SMALL_READS'] > (total_shared_reads * THRESHOLD_SMALL_REQUESTS / 2):
-                        detail.append(
-                            {
-                                'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
-                                    row['INSIGHTS_POSIX_SMALL_READS'],
-                                    row['INSIGHTS_POSIX_SMALL_READS'] / total_shared_reads * 100.0,
-                                    file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
-                                ) 
-                            }
-                        )
-
-                recommendation = [
-                    {
-                        'message': 'Consider coalesceing read requests into larger more contiguous ones using MPI-IO collective operations',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-                )
 
             total_shared_writes = shared_files['POSIX_WRITES'].sum()
             total_shared_writes_small = (
@@ -641,66 +351,13 @@ def handler(args):
                 shared_files['POSIX_SIZE_WRITE_100K_1M']
             )
 
-            if total_shared_writes and total_shared_writes_small / total_shared_writes > THRESHOLD_SMALL_REQUESTS and total_shared_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-                issue = 'Application issues a high number ({}) of small write requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file write requests'.format(
-                    total_shared_writes_small, total_shared_writes_small / total_shared_writes * 100.0
-                )
-
-                detail = []
-
-                for index, row in shared_files.iterrows():
-                    if row['INSIGHTS_POSIX_SMALL_WRITES'] > (total_shared_writes * THRESHOLD_SMALL_REQUESTS / 2):
-                        detail.append(
-                            {
-                                'message': '{} ({:.2f}%) small writes requests are to "{}"'.format(
-                                    row['INSIGHTS_POSIX_SMALL_WRITES'],
-                                    row['INSIGHTS_POSIX_SMALL_WRITES'] / total_shared_writes * 100.0,
-                                    file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
-                                ) 
-                            }
-                        )
-
-                recommendation = [
-                    {
-                        'message': 'Consider coalescing write requests into larger more contiguous ones using MPI-IO collective operations',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-                )
+            check_shared_small_operation(total_shared_reads, total_shared_reads_small, total_shared_writes, total_shared_writes_small, shared_files, file_map)
 
         #########################################################################################################################################################################
 
-        has_long_metadata = df['fcounters'][(df['fcounters']['POSIX_F_META_TIME'] > THRESHOLD_METADATA_TIME_RANK)]
+        count_long_metadata = len(df['fcounters'][(df['fcounters']['POSIX_F_META_TIME'] > THRESHOLD_METADATA_TIME_RANK)])
 
-        if not has_long_metadata.empty:
-            issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
-                len(has_long_metadata), THRESHOLD_METADATA_TIME_RANK
-            )
-
-            recommendation = [
-                {
-                    'message': 'Attempt to combine files, reduce, or cache metadata operations'
-                }
-            ]
-
-            if 'HF5' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since your appplication uses HDF5, try enabling collective metadata calls with H5Pset_coll_metadata_write() and H5Pset_all_coll_metadata_ops()',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-collective-metadata.c'), line_numbers=True, background_color='default')
-                    },
-                    {
-                        'message': 'Since your appplication uses HDF5, try using metadata cache to defer metadata operations',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-cache.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_HIGH_METADATA_TIME, TARGET_DEVELOPER, HIGH, issue, recommendation)
-            )
+        check_long_metadata(count_long_metadata, modules)
 
         # We already have a single line for each shared-file access
         # To check for stragglers, we can check the difference between the 
@@ -726,36 +383,9 @@ def handler(args):
                     row['id'], abs(row['POSIX_SLOWEST_RANK_BYTES'] - row['POSIX_FASTEST_RANK_BYTES']) / total_transfer_size * 100
                 ])
 
-        if stragglers_count:
-            issue = 'Detected data transfer imbalance caused by stragglers when accessing {} shared file.'.format(
-                stragglers_count
-            )
-
-            detail = []
-            
-            for file in detected_files:
-                detail.append(
-                    {
-                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
-                            file[1],
-                            file_map[int(file[0])] if args.full_path else os.path.basename(file_map[int(file[0])])
-                        ) 
-                    }
-                )
-
-            recommendation = [
-                {
-                    'message': 'Consider better balancing the data transfer between the application ranks'
-                },
-                {
-                    'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
-            )
+        column_names = ['id', 'data_imbalance']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+        check_shared_data_imblance(stragglers_count, detected_files, file_map)
 
         # POSIX_F_FASTEST_RANK_TIME
         # POSIX_F_SLOWEST_RANK_TIME
@@ -781,36 +411,9 @@ def handler(args):
                     row['id'], abs(row['POSIX_F_SLOWEST_RANK_TIME'] - row['POSIX_F_FASTEST_RANK_TIME']) / total_transfer_time * 100
                 ])
 
-        if stragglers_count:
-            issue = 'Detected time imbalance caused by stragglers when accessing {} shared file.'.format(
-                stragglers_count
-            )
-
-            detail = []
-            
-            for file in detected_files:
-                detail.append(
-                    {
-                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
-                            file[1],
-                            file_map[int(file[0])] if args.full_path else os.path.basename(file_map[int(file[0])])
-                        ) 
-                    }
-                )
-
-            recommendation = [
-                {
-                    'message': 'Consider better distributing the data in the parallel file system' # needs to review what suggestion to give
-                },
-                {
-                    'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
-            )
+        column_names = ['id', 'time_imbalance']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+        check_shared_time_imbalance(stragglers_count, detected_files, file_map)
 
         aggregated = df['counters'].loc[(df['counters']['rank'] != -1)][
             ['rank', 'id', 'POSIX_BYTES_WRITTEN', 'POSIX_BYTES_READ']
@@ -837,43 +440,9 @@ def handler(args):
                     row['id'], abs(row['POSIX_BYTES_WRITTEN_max'] - row['POSIX_BYTES_WRITTEN_min']) / row['POSIX_BYTES_WRITTEN_max'] * 100
                 ])
 
-        if imbalance_count:
-            issue = 'Detected write imbalance when accessing {} individual files'.format(
-                imbalance_count
-            )
-
-            detail = []
-            
-            for file in detected_files:
-                detail.append(
-                    {
-                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
-                            file[1],
-                            file_map[int(file[0])] if args.full_path else os.path.basename(file_map[int(file[0])])
-                        ) 
-                    }
-                )
-
-            recommendation = [
-                {
-                    'message': 'Consider better balancing the data transfer between the application ranks'
-                },
-                {
-                    'message': 'Consider tuning the stripe size and count to better distribute the data',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                },
-                {
-                    'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
-                },
-                {
-                    'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
+        column_names = ['id', 'write_imbalance']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+        check_individual_write_imbalance(imbalance_count, detected_files, file_map)
 
         imbalance_count = 0
 
@@ -887,43 +456,9 @@ def handler(args):
                     row['id'], abs(row['POSIX_BYTES_READ_max'] - row['POSIX_BYTES_READ_min']) / row['POSIX_BYTES_READ_max'] * 100
                 ])
 
-        if imbalance_count:
-            issue = 'Detected read imbalance when accessing {} individual files.'.format(
-                imbalance_count
-            )
-
-            detail = []
-            
-            for file in detected_files:
-                detail.append(
-                    {
-                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
-                            file[1],
-                            file_map[int(file[0])] if args.full_path else os.path.basename(file_map[int(file[0])])
-                        ) 
-                    }
-                )
-
-            recommendation = [
-                {
-                    'message': 'Consider better balancing the data transfer between the application ranks'
-                },
-                {
-                    'message': 'Consider tuning the stripe size and count to better distribute the data',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                },
-                {
-                    'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
-                },
-                {
-                    'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
+        column_names = ['id', 'read_imbalance']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+        check_individual_read_imbalance(imbalance_count, detected_files, file_map)
 
     #########################################################################################################################################################################
 
@@ -940,95 +475,50 @@ def handler(args):
 
         total_mpiio_read_operations = df_mpiio['counters']['MPIIO_INDEP_READS'].sum() + df_mpiio['counters']['MPIIO_COLL_READS'].sum()
 
-        if df_mpiio['counters']['MPIIO_COLL_READS'].sum() == 0:
-            if total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
-                    df_mpiio['counters']['MPIIO_INDEP_READS'].sum(),
-                    df_mpiio['counters']['MPIIO_INDEP_READS'].sum() / (total_mpiio_read_operations) * 100
-                )
+        mpiio_coll_reads = df_mpiio['counters']['MPIIO_COLL_READS'].sum()
+        mpiio_indep_reads = df_mpiio['counters']['MPIIO_INDEP_READS'].sum()
 
-                detail = []
-
-                files = pd.DataFrame(df_mpiio_collective_reads.groupby('id').sum()).reset_index()
-
-                for index, row in df_mpiio_collective_reads.iterrows():
-                    if (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS and (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                        detail.append(
-                            {
-                                'message': '{} ({}%) of independent reads to "{}"'.format(
-                                    row['MPIIO_INDEP_READS'],
-                                    row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100,
-                                    file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
-                                ) 
-                            }
-                        )
-
-                recommendation = [
-                    {
-                        'message': 'Use collective read operations (e.g. MPI_File_read_all() or MPI_File_read_at_all()) and set one aggregator per compute node',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-                )
-        else:
-            issue = 'Application uses MPI-IO and read data using {} ({:.2f}%) collective operations'.format(
-                df_mpiio['counters']['MPIIO_COLL_READS'].sum(),
-                df_mpiio['counters']['MPIIO_COLL_READS'].sum() / (df_mpiio['counters']['MPIIO_INDEP_READS'].sum() + df_mpiio['counters']['MPIIO_COLL_READS'].sum()) * 100
-            )
+        detected_files = []
+        if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+            files = pd.DataFrame(df_mpiio_collective_reads.groupby('id').sum()).reset_index()
+            for index, row in df_mpiio_collective_reads.iterrows():
+                if ((row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and
+                    row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS and
+                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE):
+
+                    detected_files.append([
+                        row['id'], row['MPIIO_INDEP_READS'], row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100
+                    ])
+        
+        column_names = ['id', 'absolute_indep_reads', 'percent_indep_reads']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, OK, issue)
-            )
+        check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, total_mpiio_read_operations, detected_files, file_map)
 
         df_mpiio_collective_writes = df_mpiio['counters']  #.loc[(df_mpiio['counters']['MPIIO_COLL_WRITES'] > 0)]
 
         total_mpiio_write_operations = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum() + df_mpiio['counters']['MPIIO_COLL_WRITES'].sum()
 
-        if df_mpiio['counters']['MPIIO_COLL_WRITES'].sum() == 0:
-            if total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
-                    df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum(),
-                    df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum() / (total_mpiio_write_operations) * 100
-                )
+        mpi_coll_writes = df_mpiio['counters']['MPIIO_COLL_WRITES'].sum()
+        mpi_indep_writes = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum()
 
-                detail = []
-
-                files = pd.DataFrame(df_mpiio_collective_writes.groupby('id').sum()).reset_index()
-
-                for index, row in df_mpiio_collective_writes.iterrows():
-                    if (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS and (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                        detail.append(
-                            {
-                                'message': '{} ({}%) independent writes to "{}"'.format(
-                                    row['MPIIO_INDEP_WRITES'],
-                                    row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100,
-                                    file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
-                                ) 
-                            }
-                        )
-
-                recommendation = [
-                    {
-                        'message': 'Use collective write operations (e.g. MPI_File_write_all() or MPI_File_write_at_all()) and set one aggregator per compute node',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-                )
-        else:
-            issue = 'Application uses MPI-IO and write data using {} ({:.2f}%) collective operations'.format(
-                df_mpiio['counters']['MPIIO_COLL_WRITES'].sum(),
-                df_mpiio['counters']['MPIIO_COLL_WRITES'].sum() / (df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum() + df_mpiio['counters']['MPIIO_COLL_WRITES'].sum()) * 100
-            )
+        detected_files = []
+        if mpi_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+            files = pd.DataFrame(df_mpiio_collective_writes.groupby('id').sum()).reset_index()
 
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, OK, issue)
-            )
+            for index, row in df_mpiio_collective_writes.iterrows():
+                if ((row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and 
+                    row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS and 
+                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE):
+
+                    detected_files.append([
+                        row['id'], row['MPIIO_INDEP_WRITES'], row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100
+                    ])
+
+        column_names = ['id', 'absolute_indep_writes', 'percent_indep_writes']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+
+        check_mpi_collective_write_operation(mpi_coll_writes, mpi_indep_writes, total_mpiio_write_operations, detected_files, file_map)
 
         #########################################################################################################################################################################
 
@@ -1042,55 +532,10 @@ def handler(args):
             if file_map[int(row['id'])].endswith('.h5') or file_map[int(row['id'])].endswith('.hdf5'):
                 has_hdf5_extension = True
 
-        if df_mpiio['counters']['MPIIO_NB_READS'].sum() == 0:
-            issue = 'Application could benefit from non-blocking (asynchronous) reads'
-
-            recommendation = []
-
-            if 'H5F' in modules or has_hdf5_extension:
-                recommendation.append(
-                    {
-                        'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-read.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            if 'MPI-IO' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations', # (e.g., MPI_File_iread(), MPI_File_read_all_begin/end(), or MPI_File_read_at_all_begin/end())',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iread.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_BLOCKING_READ_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-            )
-
-        if df_mpiio['counters']['MPIIO_NB_WRITES'].sum() == 0:
-            issue = 'Application could benefit from non-blocking (asynchronous) writes'
-
-            recommendation = []
+        mpiio_nb_reads = df_mpiio['counters']['MPIIO_NB_READS'].sum()
+        mpiio_nb_writes = df_mpiio['counters']['MPIIO_NB_WRITES'].sum()
 
-            if 'H5F' in modules or has_hdf5_extension:
-                recommendation.append(
-                    {
-                        'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-write.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            if 'MPI-IO' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations',  # (e.g., MPI_File_iwrite(), MPI_File_write_all_begin/end(), or MPI_File_write_at_all_begin/end())',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iwrite.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-            )
+        check_mpi_none_block_operation(mpiio_nb_reads, mpiio_nb_writes, has_hdf5_extension, modules)
 
     #########################################################################################################################################################################
 
@@ -1106,8 +551,6 @@ def handler(args):
 
     # print('Hints: ', hints)
 
-    #########################################################################################################################################################################
-
     NUMBER_OF_COMPUTE_NODES = 0
 
     if 'MPI-IO' in modules:
@@ -1141,66 +584,13 @@ def handler(args):
                         NUMBER_OF_COMPUTE_NODES = first['NNodes']
 
                         # Do we have one MPI-IO aggregator per node?
-                        if cb_nodes > NUMBER_OF_COMPUTE_NODES:
-                            issue = 'Application is using inter-node aggregators (which require network communication)'
-
-                            recommendation = [
-                                {
-                                    'message': 'Set the MPI hints for the number of aggregators as one per compute node (e.g., cb_nodes={})'.format(
-                                        NUMBER_OF_COMPUTE_NODES
-                                    ),
-                                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-hints.bash'), line_numbers=True, background_color='default')
-                                }
-                            ]
-
-                            insights_operation.append(
-                                message(args, INSIGHTS_MPI_IO_AGGREGATORS_INTER, TARGET_USER, HIGH, issue, recommendation)
-                            )
-
-                        if cb_nodes < NUMBER_OF_COMPUTE_NODES:
-                            issue = 'Application is using intra-node aggregators'
-
-                            insights_operation.append(
-                                message(args, INSIGHTS_MPI_IO_AGGREGATORS_INTRA, TARGET_USER, OK, issue)
-                            )
-
-                        if cb_nodes == NUMBER_OF_COMPUTE_NODES:
-                            issue = 'Application is using one aggregator per compute node'
-
-                            insights_operation.append(
-                                message(args, INSIGHTS_MPI_IO_AGGREGATORS_OK, TARGET_USER, OK, issue)
-                            )
-
-
+                        check_mpi_aggregator(cb_nodes, NUMBER_OF_COMPUTE_NODES)
                 except StopIteration:
                     pass
         except FileNotFoundError:
             pass
     
     #########################################################################################################################################################################
-    
-    codes = []
-    if args.json:
-        f = open(args.json)
-        data = json.load(f)
-
-        for key, values in data.items():
-            for value in values:
-                code = value['code']
-                codes.append(code)
-
-                level = value['level']
-                issue = value['issue']
-                recommendation = []
-                for rec in value['recommendations']:
-                    new_message = {'message': rec}
-                    recommendation.append(new_message)
-
-                insights_dxt.append(
-                    message(args, code, TARGET_DEVELOPER, level, issue, recommendation)
-                )
-
-    #########################################################################################################################################################################
 
     insights_end_time = time.time()
 
@@ -1261,153 +651,15 @@ def handler(args):
 
     console.print()
 
-    if insights_metadata:
-        console.print(
-            Panel(
-                Padding(
-                    Group(
-                        *insights_metadata
-                    ),
-                    (1, 1)
-                ),
-                title='METADATA',
-                title_align='left'
-            )
-        )
+    display_content()
+    display_footer(insights_start_time, insights_end_time)
 
-    if insights_operation:
-        console.print(
-            Panel(
-                Padding(
-                    Group(
-                        *insights_operation
-                    ),
-                    (1, 1)
-                ),
-                title='OPERATIONS',
-                title_align='left'
-            )
-        )
+    export_html()
+    export_svg()
 
-    if insights_dxt:
-        console.print(
-            Panel(
-                Padding(
-                    Group(
-                        *insights_dxt
-                    ),
-                    (1, 1)
-                ),
-                title='DXT',
-                title_align='left'
-            )
-        )
-        
-    console.print(
-        Panel(
-            ' {} | [white]LBNL[/white] | [white]Drishti report generated at {} in[/white] {:.3f} seconds'.format(
-                datetime.datetime.now().year,
-                datetime.datetime.now(),
-                insights_end_time - insights_start_time
-            ),
-            box=box.SIMPLE
-        )
+    filename = '{}-summary.csv'.format(
+        args.log_path.replace('.darshan', '')
     )
-
-    if args.export_theme_light:
-        export_theme = TerminalTheme(
-            (255, 255, 255),
-            (0, 0, 0),
-            [
-                (26, 26, 26),
-                (244, 0, 95),
-                (152, 224, 36),
-                (253, 151, 31),
-                (157, 101, 255),
-                (244, 0, 95),
-                (88, 209, 235),
-                (120, 120, 120),
-                (98, 94, 76),
-            ],
-            [
-                (244, 0, 95),
-                (152, 224, 36),
-                (224, 213, 97),
-                (157, 101, 255),
-                (244, 0, 95),
-                (88, 209, 235),
-                (246, 246, 239),
-            ],
-        )
-    else:
-        export_theme = MONOKAI
-
-    if args.export_html:
-        console.save_html(
-            '{}.html'.format(args.log_path),
-            theme=export_theme,
-            clear=False
-        )
-
-    if args.export_svg:
-        console.save_svg(
-            '{}.svg'.format(args.log_path),
-            title='Drishti',
-            theme=export_theme,
-            clear=False
-        )
-
-    if args.export_csv:
-        issues = [
-            'JOB',
-            INSIGHTS_STDIO_HIGH_USAGE,
-            INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE,
-            INSIGHTS_POSIX_READ_COUNT_INTENSIVE,
-            INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE,
-            INSIGHTS_POSIX_READ_SIZE_INTENSIVE,
-            INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE,
-            INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE,
-            INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE,
-            INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE,
-            INSIGHTS_POSIX_REDUNDANT_READ_USAGE,
-            INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE,
-            INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE,
-            INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE,
-            INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE,
-            INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE,
-            INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE,
-            INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE,
-            INSIGHTS_POSIX_HIGH_METADATA_TIME,
-            INSIGHTS_POSIX_SIZE_IMBALANCE,
-            INSIGHTS_POSIX_TIME_IMBALANCE,
-            INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE,
-            INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE,
-            INSIGHTS_MPI_IO_NO_USAGE,
-            INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE,
-            INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE,
-            INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE,
-            INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE,
-            INSIGHTS_MPI_IO_BLOCKING_READ_USAGE,
-            INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE,
-            INSIGHTS_MPI_IO_AGGREGATORS_INTRA,
-            INSIGHTS_MPI_IO_AGGREGATORS_INTER,
-            INSIGHTS_MPI_IO_AGGREGATORS_OK
-        ]
-        if codes:
-            issues.extend(codes)
-
-        detected_issues = dict.fromkeys(issues, False)
-        detected_issues['JOB'] = job['job']['jobid']
-
-        for report in csv_report:
-            detected_issues[report] = True
-
-        filename = '{}-summary.csv'.format(
-            args.log_path.replace('.darshan', '')
-        )
-
-        with open(filename, 'w') as f:
-            w = csv.writer(f)
-            w.writerow(detected_issues.keys())
-            w.writerow(detected_issues.values())
+    
+    export_csv(filename, job['job']['jobid'])
 

From cee980c006f118e2d6d469c1a92522aee58a6ab2 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Wed, 29 Nov 2023 21:27:42 -0800
Subject: [PATCH 10/19] Fix bug

---
 drishti/handle_darshan.py |   8 +--
 drishti/module.py         | 108 +++++++++++++++++++-------------------
 2 files changed, 59 insertions(+), 57 deletions(-)

diff --git a/drishti/handle_darshan.py b/drishti/handle_darshan.py
index a5d8fbc..6daa7b5 100644
--- a/drishti/handle_darshan.py
+++ b/drishti/handle_darshan.py
@@ -499,11 +499,11 @@ def handler():
 
         total_mpiio_write_operations = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum() + df_mpiio['counters']['MPIIO_COLL_WRITES'].sum()
 
-        mpi_coll_writes = df_mpiio['counters']['MPIIO_COLL_WRITES'].sum()
-        mpi_indep_writes = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum()
+        mpiio_coll_writes = df_mpiio['counters']['MPIIO_COLL_WRITES'].sum()
+        mpiio_indep_writes = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum()
 
         detected_files = []
-        if mpi_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+        if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
             files = pd.DataFrame(df_mpiio_collective_writes.groupby('id').sum()).reset_index()
 
             for index, row in df_mpiio_collective_writes.iterrows():
@@ -518,7 +518,7 @@ def handler():
         column_names = ['id', 'absolute_indep_writes', 'percent_indep_writes']
         detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-        check_mpi_collective_write_operation(mpi_coll_writes, mpi_indep_writes, total_mpiio_write_operations, detected_files, file_map)
+        check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes, total_mpiio_write_operations, detected_files, file_map)
 
         #########################################################################################################################################################################
 
diff --git a/drishti/module.py b/drishti/module.py
index fe21a18..3a58a06 100644
--- a/drishti/module.py
+++ b/drishti/module.py
@@ -565,35 +565,36 @@ def check_individual_read_imbalance(imbalance_count, detected_files, file_map):
 ['id', 'absolute_indep_reads', 'percent_indep_reads']
 '''
 def check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, total_mpiio_read_operations, detected_files, file_map):
-    if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-        issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
-            mpiio_indep_reads,
-            mpiio_indep_reads / total_mpiio_read_operations * 100
-        )
+    if mpiio_coll_reads == 0:
+        if total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+            issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
+                mpiio_indep_reads,
+                mpiio_indep_reads / total_mpiio_read_operations * 100
+            )
 
-        detail = []
+            detail = []
 
-        for index, row in detected_files.iterrows():
-                detail.append(
-                    {
-                        'message': '{} ({}%) of independent reads to "{}"'.format(
-                            row['absolute_indep_reads'],
-                            row['percent_indep_reads'],
-                            file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
-                        ) 
-                    }
-                )
+            for index, row in detected_files.iterrows():
+                    detail.append(
+                        {
+                            'message': '{} ({}%) of independent reads to "{}"'.format(
+                                row['absolute_indep_reads'],
+                                row['percent_indep_reads'],
+                                file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                            ) 
+                        }
+                    )
 
-        recommendation = [
-            {
-                'message': 'Use collective read operations (e.g. MPI_File_read_all() or MPI_File_read_at_all()) and set one aggregator per compute node',
-                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-            }
-        ]
+            recommendation = [
+                {
+                    'message': 'Use collective read operations (e.g. MPI_File_read_all() or MPI_File_read_at_all()) and set one aggregator per compute node',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
+                }
+            ]
 
-        insights_operation.append(
-            message(INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-        )
+            insights_operation.append(
+                message(INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+            )
     else:
         issue = 'Application uses MPI-IO and read data using {} ({:.2f}%) collective operations'.format(
             mpiio_coll_reads,
@@ -609,40 +610,41 @@ def check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, tot
 detected_files required columns:
 ['id', 'absolute_indep_writes', 'percent_indep_writes']
 '''
-def check_mpi_collective_write_operation(mpi_coll_writes, mpi_indep_writes, total_mpiio_write_operations, detected_files, file_map):
-    if mpi_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-        issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
-            mpi_indep_writes,
-            mpi_indep_writes / total_mpiio_write_operations * 100
-        )
+def check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes, total_mpiio_write_operations, detected_files, file_map):
+    if mpiio_coll_writes == 0:
+        if total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+            issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
+                mpiio_indep_writes,
+                mpiio_indep_writes / total_mpiio_write_operations * 100
+            )
 
-        detail = []
+            detail = []
 
-        for index, row in detected_files.iterrows():
-                detail.append(
-                    {
-                        'message': '{} ({}%) independent writes to "{}"'.format(
-                            row['absolute_indep_writes'],
-                            row['percent_indep_writes'],
-                            file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
-                        ) 
-                    }
-                )
+            for index, row in detected_files.iterrows():
+                    detail.append(
+                        {
+                            'message': '{} ({}%) independent writes to "{}"'.format(
+                                row['absolute_indep_writes'],
+                                row['percent_indep_writes'],
+                                file_map[int(row['id'])] if args.full_path else os.path.basename(file_map[int(row['id'])])
+                            ) 
+                        }
+                    )
 
-        recommendation = [
-            {
-                'message': 'Use collective write operations (e.g. MPI_File_write_all() or MPI_File_write_at_all()) and set one aggregator per compute node',
-                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-            }
-        ]
+            recommendation = [
+                {
+                    'message': 'Use collective write operations (e.g. MPI_File_write_all() or MPI_File_write_at_all()) and set one aggregator per compute node',
+                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
+                }
+            ]
 
-        insights_operation.append(
-            message(INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-        )
+            insights_operation.append(
+                message(INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
+            )
     else:
         issue = 'Application uses MPI-IO and write data using {} ({:.2f}%) collective operations'.format(
-            mpi_coll_writes,
-            mpi_coll_writes / total_mpiio_write_operations * 100
+            mpiio_coll_writes,
+            mpiio_coll_writes / total_mpiio_write_operations * 100
         )
 
         insights_operation.append(

From d30a3247e58922e969c902a77a5573ad1f503782 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Wed, 29 Nov 2023 21:28:20 -0800
Subject: [PATCH 11/19] Recorder cleanup

---
 drishti/handle_recorder.py | 944 +++++--------------------------------
 1 file changed, 125 insertions(+), 819 deletions(-)

diff --git a/drishti/handle_recorder.py b/drishti/handle_recorder.py
index 59462af..b864e0a 100644
--- a/drishti/handle_recorder.py
+++ b/drishti/handle_recorder.py
@@ -1,35 +1,19 @@
 #!/usr/bin/env python3
 
 import os
-import csv
 import time
-import json
-
-import datetime
-
 import pandas as pd
-
-from rich import print, box
-from rich.console import Group
-from rich.padding import Padding
-from rich.syntax import Syntax
-from rich.panel import Panel
-from rich.terminal_theme import TerminalTheme
-from rich.terminal_theme import MONOKAI
-
 from recorder_utils import RecorderReader
 from recorder_utils.build_offset_intervals import build_offset_intervals
-
-from .includes import *
+from .module import *
 
 
 def get_accessed_files(reader):
     ranks = reader.GM.total_ranks
-    filemap = {}
+    file_map = {}
     for rank in range(ranks):
-        filemap.update(reader.LMs[rank].filemap)
-
-    return filemap
+        file_map.update(reader.LMs[rank].filemap)
+    return file_map
 
 
 def init_df_posix_recordes(reader):
@@ -49,8 +33,8 @@ def init_df_posix_recordes(reader):
     return df_posix_records
 
 
-def handler(args):
-    init_console(args)
+def handler():
+    init_console()
     validate_thresholds()
 
     insights_start_time = time.time()
@@ -59,21 +43,20 @@ def handler(args):
     df_intervals = build_offset_intervals(reader)
     df_posix_records = init_df_posix_recordes(reader)
 
-    unique_files = get_accessed_files(reader)
+    file_map = get_accessed_files(reader)
 
     def add_api(row):
         if 'MPI' in row['function']:
-            return 'MPIIO'
+            return 'MPI-IO'
         elif 'H5' in row['function']:
             return 'H5F'
         else:
             return 'POSIX'
 
-    df_intervals['api'] = df_intervals.apply(add_api, axis=1)
-
     def add_duration(row):
         return row['end'] - row['start']
     
+    df_intervals['api'] = df_intervals.apply(add_api, axis=1)
     df_intervals['duration'] = df_intervals.apply(add_duration, axis=1)
     df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1)
 
@@ -87,16 +70,16 @@ def add_duration(row):
     total_size_mpiio = 0
     total_size = 0
 
-    total_files = len(unique_files)
+    total_files = len(file_map)
     total_files_stdio = 0
     total_files_posix = 0
     total_files_mpiio = 0
 
-    for fid in unique_files.keys():
-        df_intervals_in_one_file = df_intervals[(df_intervals['file_id'] == fid)]
+    for id in file_map.keys():
+        df_intervals_in_one_file = df_intervals[(df_intervals['file_id'] == id)]
         df_stdio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'STDIO')]
         df_posix_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'POSIX')]
-        df_mpiio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'MPIIO')]
+        df_mpiio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'MPI-IO')]
 
         if len(df_stdio_intervals_in_one_file):
             total_files_stdio += 1
@@ -121,34 +104,8 @@ def add_duration(row):
     assert(total_size_posix >= 0)
     assert(total_size_mpiio >= 0)
 
-    if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
-        issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
-            total_size_stdio / total_size * 100.0,
-            convert_bytes(total_size_stdio)
-        )
-
-        recommendation = [
-            {
-                'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
-            }
-        ]
-
-        insights_operation.append(
-            message(args, INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-        )
-
-    if 'MPIIO' not in modules:
-        issue = 'Application is using low-performance interface'
-
-        recommendation = [
-            {
-                'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
-            }
-        ]
-
-        insights_operation.append(
-            message(args, INSIGHTS_MPI_IO_NO_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-        )
+    check_stdio(total_size, total_size_stdio)
+    check_mpiio(modules)
 
     #########################################################################################################################################################################
 
@@ -165,46 +122,14 @@ def add_duration(row):
         total_operations = total_writes + total_reads 
 
         # To check whether the application is write-intersive or read-intensive we only look at the POSIX level and check if the difference between reads and writes is larger than 10% (for more or less), otherwise we assume a balance
-        if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
-            issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
-                total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-            )
-
-        if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
-            issue = 'Application is read operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
-                total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_READ_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-            )
+        check_operation_intensive(total_operations, total_reads, total_writes)
 
         total_read_size = df_posix[(df_posix['function'].str.contains('read'))]['size'].sum()
         total_written_size = df_posix[~(df_posix['function'].str.contains('read'))]['size'].sum()
 
         total_size = total_written_size + total_read_size
 
-        if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
-            issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
-                total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-            )
-
-        if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
-            issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
-                total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
-            )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_READ_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-            )
+        check_size_intensive(total_size, total_read_size, total_written_size)
 
         #########################################################################################################################################################################
 
@@ -213,99 +138,16 @@ def add_duration(row):
         total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
         total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
 
-        detected_files = [] # [fname, num of read, num of write]
-        for fid in unique_files.keys():
-            read_cnt = len(df_posix[(df_posix['file_id'] == fid) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            write_cnt = len(df_posix[(df_posix['file_id'] == fid) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            detected_files.append([unique_files[fid], read_cnt, write_cnt])
+        detected_files = []
+        for id in file_map.keys():
+            read_cnt = len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+            write_cnt = len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+            detected_files.append([id, read_cnt, write_cnt])
 
-        if total_reads_small and total_reads_small / total_reads > THRESHOLD_SMALL_REQUESTS and total_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-            issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
-                total_reads_small, total_reads_small / total_reads * 100.0
-            )
-        
-            detail = []
-            recommendation = []
-
-            for file in detected_files:
-                if file[1] > (total_reads * THRESHOLD_SMALL_REQUESTS / 2):
-                    detail.append(
-                        {
-                            'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
-                                file[1],
-                                file[1] / total_reads * 100.0,
-                                file[0] if args.full_path else os.path.basename(file[0])
-                            ) 
-                        }
-                    )
-
-            recommendation.append(
-                {
-                    'message': 'Consider buffering read operations into larger more contiguous ones'
-                }
-            )
-
-            if 'MPIIO' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since the appplication already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_read_all() or MPI_File_read_at_all()) to aggregate requests into larger ones',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                    }
-                )
-            else:
-                recommendation.append(
-                    {
-                        'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
-                    }
-                )
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
-
-        if total_writes_small and total_writes_small / total_writes > THRESHOLD_SMALL_REQUESTS and total_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-            issue = 'Application issues a high number ({}) of small write requests (i.e., < 1MB) which represents {:.2f}% of all write requests'.format(
-                total_writes_small, total_writes_small / total_writes * 100.0
-            )
-
-            detail = []
-            recommendation = []
-
-            for file in detected_files:
-                if file[2] > (total_writes * THRESHOLD_SMALL_REQUESTS / 2):
-                    detail.append(
-                        {
-                            'message': '{} ({:.2f}%) small write requests are to "{}"'.format(
-                                file[2],
-                                file[2] / total_writes * 100.0,
-                                file[0] if args.full_path else os.path.basename(file[0])
-                            ) 
-                        }
-                    )
-
-            recommendation.append(
-                {
-                    'message': 'Consider buffering write operations into larger more contiguous ones'
-                }
-            )
-
-            if 'MPIIO' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_write_all() or MPI_File_write_at_all()) to aggregate requests into larger ones',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                    }
-                )
-            else:
-                recommendation.append(
-                    {
-                        'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
-                    }
-                )
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
+        column_names = ['id', 'total_reads', 'total_writes']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+
+        check_small_operation(total_reads, total_reads_small, total_writes, total_writes_small, detected_files, modules, file_map)
 
         #########################################################################################################################################################################
 
@@ -317,34 +159,21 @@ def add_duration(row):
         # Redundant read-traffic (based on Phill)
         # POSIX_MAX_BYTE_READ (Highest offset in the file that was read)
         max_read_offset = df_posix[(df_posix['function'].str.contains('read'))]['offset'].max()
-
-        if max_read_offset > total_read_size:
-            issue = 'Application might have redundant read traffic (more data read than the highest offset)'
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_REDUNDANT_READ_USAGE, TARGET_DEVELOPER, WARN, issue, None)
-            )
-
         max_write_offset = df_posix[~(df_posix['function'].str.contains('read'))]['offset'].max()
-
-        if max_write_offset > total_written_size:
-            issue = 'Application might have redundant write traffic (more data written than the highest offset)'
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, None)
-            )
+        
+        check_traffic(max_read_offset, total_read_size, max_write_offset, total_written_size)
 
         #########################################################################################################################################################################
 
         # Check for a lot of random operations
 
-        grp_posix_by_fid = df_posix.groupby('file_id')
+        grp_posix_by_id = df_posix.groupby('file_id')
 
         read_consecutive = 0
         read_sequential = 0
         read_random = 0
 
-        for fid, df_filtered in grp_posix_by_fid:
+        for id, df_filtered in grp_posix_by_id:
             df_filtered = df_filtered[(df_filtered['function'].str.contains('read'))].sort_values('start')
 
             for i in range(len(df_filtered) - 1):
@@ -357,36 +186,11 @@ def add_duration(row):
                 else:
                     read_random += 1
 
-        if total_reads:
-            if read_random and read_random / total_reads > THRESHOLD_RANDOM_OPERATIONS and read_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
-                issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
-                    read_random, read_random / total_reads * 100.0
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider changing your data model to have consecutive or sequential reads'
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-            else:
-                issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) read requests'.format(
-                    read_consecutive / total_reads * 100.0,
-                    read_sequential / total_reads * 100.0
-                )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE, TARGET_DEVELOPER, OK, issue, None)
-                )
-
         write_consecutive = 0
         write_sequential = 0
         write_random = 0
 
-        for fid, df_filtered in grp_posix_by_fid:
+        for id, df_filtered in grp_posix_by_id:
             df_filtered = df_filtered[~(df_filtered['function'].str.contains('read'))].sort_values('start')
 
             for i in range(len(df_filtered) - 1):
@@ -399,37 +203,14 @@ def add_duration(row):
                 else:
                     write_random += 1
 
-        if total_writes:
-            if write_random and write_random / total_writes > THRESHOLD_RANDOM_OPERATIONS and write_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
-                issue = 'Application is issuing a high number ({}) of random write operations ({:.2f}%)'.format(
-                    write_random, write_random / total_writes * 100.0
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider changing your data model to have consecutive or sequential writes'
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-            else:
-                issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) write requests'.format(
-                    write_consecutive / total_writes * 100.0,
-                    write_sequential / total_writes * 100.0
-                )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE, TARGET_DEVELOPER, OK, issue, None)
-                )
+        check_random_operation(read_consecutive, read_sequential, read_random, total_reads, write_consecutive, write_sequential, write_random, total_writes)
 
         #########################################################################################################################################################################
 
         # Shared file with small operations
 
         # A file is shared if it's been read/written by more than 1 rank
-        detected_files = grp_posix_by_fid['rank'].nunique()
+        detected_files = grp_posix_by_id['rank'].nunique()
         shared_files = set(detected_files[detected_files > 1].index)
 
         total_shared_reads = 0
@@ -437,116 +218,36 @@ def add_duration(row):
         total_shared_writes = 0
         total_shared_writes_small = 0
 
-        detected_files = [] # [fname, num of read, num of write]
-        for fid in shared_files:
-            total_shared_reads += len(df_posix[(df_posix['file_id'] == fid) & (df_posix['function'].str.contains('read'))])
-            total_shared_writes += len(df_posix[(df_posix['file_id'] == fid) & ~(df_posix['function'].str.contains('read'))])
+        detected_files = []
+        for id in shared_files:
+            total_shared_reads += len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read'))])
+            total_shared_writes += len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read'))])
 
-            read_cnt = len(df_posix[(df_posix['file_id'] == fid) 
+            read_cnt = len(df_posix[(df_posix['file_id'] == id) 
                                     & (df_posix['function'].str.contains('read')) 
                                     & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            write_cnt = len(df_posix[(df_posix['file_id'] == fid) 
+            write_cnt = len(df_posix[(df_posix['file_id'] == id) 
                                     & ~(df_posix['function'].str.contains('read')) 
                                     & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            detected_files.append([unique_files[fid], read_cnt, write_cnt])
+            detected_files.append([id, read_cnt, write_cnt])
 
             total_shared_reads_small += read_cnt
             total_shared_writes_small += write_cnt
-
-        if total_shared_reads and total_shared_reads_small / total_shared_reads > THRESHOLD_SMALL_REQUESTS and total_shared_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-            issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
-                total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
-            )
-
-            detail = []
-
-            for file in detected_files:
-                if file[1] > (total_reads * THRESHOLD_SMALL_REQUESTS / 2):
-                    detail.append(
-                        {
-                            'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
-                                file[1],
-                                file[1] / total_reads * 100.0,
-                                file[0] if args.full_path else os.path.basename(file[0])
-                            ) 
-                        }
-                    )
-            
-            recommendation = [
-                {
-                    'message': 'Consider coalesceing read requests into larger more contiguous ones using MPI-IO collective operations',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                }
-            ]
         
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
-
-        if total_shared_writes and total_shared_writes_small / total_shared_writes > THRESHOLD_SMALL_REQUESTS and total_shared_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-            issue = 'Application issues a high number ({}) of small write requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file write requests'.format(
-                total_shared_writes_small, total_shared_writes_small / total_shared_writes * 100.0
-            )
-
-            detail = []
-
-            for file in detected_files:
-                if file[2] > (total_writes * THRESHOLD_SMALL_REQUESTS / 2):
-                    detail.append(
-                        {
-                            'message': '{} ({:.2f}%) small write requests are to "{}"'.format(
-                                file[2],
-                                file[2] / total_writes * 100.0,
-                                file[0] if args.full_path else os.path.basename(file[0])
-                            ) 
-                        }
-                    )
-
-            recommendation = [
-                {
-                    'message': 'Consider coalescing write requests into larger more contiguous ones using MPI-IO collective operations',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
+        column_names = ['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+
+        check_shared_small_operation(total_shared_reads, total_shared_reads_small, total_shared_writes, total_shared_writes_small, detected_files, file_map)
 
         #########################################################################################################################################################################
 
         # TODO: Here I assume all operations other than write/read are metadata operations
         df_posix_metadata = df_posix_records[~(df_posix_records['function'].str.contains('read|write|print', na=False))]
         df_detected = df_posix_metadata.groupby('rank')['duration'].sum().reset_index()
-        has_long_metadata = df_detected[(df_detected['duration'] > THRESHOLD_METADATA_TIME_RANK)]
-
-        if not has_long_metadata.empty:
-            issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
-                len(has_long_metadata), THRESHOLD_METADATA_TIME_RANK
-            )
-
-            recommendation = [
-                {
-                    'message': 'Attempt to combine files, reduce, or cache metadata operations'
-                }
-            ]
-
-            if 'H5F' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since your appplication uses HDF5, try enabling collective metadata calls with H5Pset_coll_metadata_write() and H5Pset_all_coll_metadata_ops()',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-collective-metadata.c'), line_numbers=True, background_color='default')
-                    },
-                    {
-                        'message': 'Since your appplication uses HDF5, try using metadata cache to defer metadata operations',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-cache.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            insights_metadata.append(
-                message(args, INSIGHTS_POSIX_HIGH_METADATA_TIME, TARGET_DEVELOPER, HIGH, issue, recommendation)
-            )
+        count_long_metadata = len(df_detected[(df_detected['duration'] > THRESHOLD_METADATA_TIME_RANK)])
 
+        check_long_metadata(count_long_metadata, modules)
+  
         # We already have a single line for each shared-file access
         # To check for stragglers, we can check the difference between the 
 
@@ -557,9 +258,8 @@ def add_duration(row):
         stragglers_count = 0
         
         detected_files = []
-
-        for fid in shared_files:
-            df_posix_in_one_file = df_posix[(df_posix['file_id'] == fid)]
+        for id in shared_files:
+            df_posix_in_one_file = df_posix[(df_posix['file_id'] == id)]
             total_transfer_size = df_posix_in_one_file['size'].sum()
 
             df_detected = df_posix_in_one_file.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index()
@@ -570,39 +270,13 @@ def add_duration(row):
                 stragglers_count += 1
 
                 detected_files.append([
-                    unique_files[fid], abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
+                    id, abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
                 ])
+        
+        column_names = ['id', 'data_imbalance']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-        if stragglers_count:
-            issue = 'Detected data transfer imbalance caused by stragglers when accessing {} shared file.'.format(
-                stragglers_count
-            )
-
-            detail = []
-            
-            for file in detected_files:
-                detail.append(
-                    {
-                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
-                            file[1],
-                            file[0] if args.full_path else os.path.basename(file[0])
-                        ) 
-                    }
-                )
-
-            recommendation = [
-                {
-                    'message': 'Consider better balancing the data transfer between the application ranks'
-                },
-                {
-                    'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
-            )
+        check_shared_data_imblance(stragglers_count, detected_files, file_map)
     
         # POSIX_F_FASTEST_RANK_TIME
         # POSIX_F_SLOWEST_RANK_TIME
@@ -611,9 +285,8 @@ def add_duration(row):
         stragglers_count = 0
         
         detected_files = []
-
-        for fid in shared_files:
-            df_posix_in_one_file = df_posix[(df_posix['file_id'] == fid)]
+        for id in shared_files:
+            df_posix_in_one_file = df_posix[(df_posix['file_id'] == id)]
             total_transfer_time = df_posix_in_one_file['duration'].sum()
 
             df_detected = df_posix_in_one_file.groupby('rank')['duration'].sum().reset_index()
@@ -625,48 +298,21 @@ def add_duration(row):
                 stragglers_count += 1
 
                 detected_files.append([
-                    unique_files[fid], abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
+                    id, abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
                 ])
 
-        if stragglers_count:
-            issue = 'Detected time imbalance caused by stragglers when accessing {} shared file.'.format(
-                stragglers_count
-            )
+        column_names = ['id', 'time_imbalance']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-            detail = []
-            
-            for file in detected_files:
-                detail.append(
-                    {
-                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
-                            file[1],
-                            file[0] if args.full_path else os.path.basename(file[0])
-                        ) 
-                    }
-                )
-
-            recommendation = [
-                {
-                    'message': 'Consider better distributing the data in the parallel file system' # needs to review what suggestion to give
-                },
-                {
-                    'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation, detail)
-            )
+        check_shared_time_imbalance(stragglers_count, detected_files, file_map)
 
         # Get the individual files responsible for imbalance
         imbalance_count = 0
 
         detected_files = []
-
-        for fid in unique_files.keys():
-            if fid in shared_files: continue
-            df_detected = df_posix[(df_posix['file_id'] == fid) & ~(df_posix['function'].str.contains('read'))]
+        for id in file_map.keys():
+            if id in shared_files: continue
+            df_detected = df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read'))]
             
             max_bytes_written = df_detected['size'].max()
             min_bytes_written = df_detected['size'].min()
@@ -675,53 +321,19 @@ def add_duration(row):
                 imbalance_count += 1
 
                 detected_files.append([
-                    unique_files[fid], abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
+                    id, abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
                 ])
 
-        if imbalance_count:
-            issue = 'Detected write imbalance when accessing {} individual files'.format(
-                imbalance_count
-            )
+        column_names = ['id', 'write_imbalance']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-            detail = []
-            
-            for file in detected_files:
-                detail.append(
-                    {
-                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
-                            file[1],
-                            file[0] if args.full_path else os.path.basename(file[0])
-                        ) 
-                    }
-                )
-
-            recommendation = [
-                {
-                    'message': 'Consider better balancing the data transfer between the application ranks'
-                },
-                {
-                    'message': 'Consider tuning the stripe size and count to better distribute the data',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                },
-                {
-                    'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
-                },
-                {
-                    'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
+        check_individual_write_imbalance(imbalance_count, detected_files, file_map)
 
         imbalance_count = 0
 
         detected_files = []
-
-        for fid in shared_files:
-            df_detected = df_posix[(df_posix['file_id'] == fid) & (df_posix['function'].str.contains('read'))]
+        for id in shared_files:
+            df_detected = df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read'))]
             
             max_bytes_read = df_detected['size'].max()
             min_bytes_read = df_detected['size'].min()
@@ -730,152 +342,62 @@ def add_duration(row):
                 imbalance_count += 1
 
                 detected_files.append([
-                    unique_files[fid], abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
-                ])        
+                    id, abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
+                ])
 
-        if imbalance_count:
-            issue = 'Detected read imbalance when accessing {} individual files.'.format(
-                imbalance_count
-            )
+        column_names = ['id', 'read_imbalance']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-            detail = []
-            
-            for file in detected_files:
-                detail.append(
-                    {
-                        'message': 'Load imbalance of {:.2f}% detected while accessing "{}"'.format(
-                            file[1],
-                            file[0] if args.full_path else os.path.basename(file[0])
-                        ) 
-                    }
-                )
-
-            recommendation = [
-                {
-                    'message': 'Consider better balancing the data transfer between the application ranks'
-                },
-                {
-                    'message': 'Consider tuning the stripe size and count to better distribute the data',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                },
-                {
-                    'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
-                    'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
-                },
-                {
-                    'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-            )
+        check_individual_read_imbalance(imbalance_count, detected_files, file_map)
 
     #########################################################################################################################################################################
 
-    if df_intervals['api'].eq('MPIIO').any():
-        df_mpiio = df_intervals[(df_intervals['api'] == 'MPIIO')]
+    if df_intervals['api'].eq('MPI-IO').any():
+        df_mpiio = df_intervals[(df_intervals['api'] == 'MPI-IO')]
 
         df_mpiio_reads = df_mpiio[(df_mpiio['function'].str.contains('read'))]
-        mpiio_indp_reads = len(df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all'))])
+        mpiio_indep_reads = len(df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all'))])
         mpiio_coll_reads = len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('_all'))])
-        total_mpiio_read_operations = mpiio_indp_reads + mpiio_coll_reads
+        total_mpiio_read_operations = mpiio_indep_reads + mpiio_coll_reads
 
         df_mpiio_writes = df_mpiio[~(df_mpiio['function'].str.contains('read'))]
-        mpiio_indp_writes = len(df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all'))])
+        mpiio_indep_writes = len(df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all'))])
         mpiio_coll_writes = len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('_all'))])
-        total_mpiio_write_operations = mpiio_indp_writes + mpiio_coll_writes
-
-        detected_files = [] # [fname, total_read, total_write]
-        for fid in unique_files.keys():
-            read_cnt = len(df_mpiio_reads[(df_mpiio_reads['file_id'] == fid) & (df_mpiio_reads['function'].str.contains('read'))])
-            write_cnt = len(df_mpiio_reads[(df_mpiio_reads['file_id'] == fid) & ~(df_mpiio_reads['function'].str.contains('read'))])
-            detected_files.append([unique_files[fid], read_cnt, write_cnt])
-
-        if mpiio_coll_reads == 0:
-            if total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
-                    mpiio_indp_reads,
-                    mpiio_indp_reads / (total_mpiio_read_operations) * 100
-                )
-
-                detail = []
-
-                for file in detected_files:
-                    total_cnt = file[1] + file[2]
-                    if total_cnt and file[1] / total_cnt > THRESHOLD_COLLECTIVE_OPERATIONS and total_cnt > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                        detail.append(
-                            {
-                                'message': '{} ({}%) of independent reads to "{}"'.format(
-                                    file[1],
-                                    file[1] / total_cnt * 100,
-                                    file[0] if args.full_path else os.path.basename(file[0])
-                                ) 
-                            }
-                        )
-
-                recommendation = [
-                    {
-                        'message': 'Use collective read operations (e.g. MPI_File_read_all() or MPI_File_read_at_all()) and set one aggregator per compute node',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-                )
-        else:
-            issue = 'Application uses MPI-IO and read data using {} ({:.2f}%) collective operations'.format(
-                mpiio_coll_reads,
-                mpiio_coll_reads / total_mpiio_read_operations * 100
-            )
-
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, OK, issue)
-            )
-
-        if mpiio_coll_writes == 0:
-            if total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
-                    mpiio_indp_writes,
-                    mpiio_indp_writes / (total_mpiio_write_operations) * 100
-                )
-
-                detail = []
-
-                for file in detected_files:
-                    total_cnt = file[1] + file[2]
-                    if total_cnt and file[2] / total_cnt > THRESHOLD_COLLECTIVE_OPERATIONS and total_cnt > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                        detail.append(
-                            {
-                                'message': '{} ({}%) of independent writes to "{}"'.format(
-                                    file[2],
-                                    file[2] / total_cnt * 100,
-                                    file[0] if args.full_path else os.path.basename(file[0])
-                                ) 
-                            }
-                        )
-
-                recommendation = [
-                    {
-                        'message': 'Use collective write operations (e.g. MPI_File_write_all() or MPI_File_write_at_all()) and set one aggregator per compute node',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
-                )
+        total_mpiio_write_operations = mpiio_indep_writes + mpiio_coll_writes
 
-        else:
-            issue = 'Application uses MPI-IO and write data using {} ({:.2f}%) collective operations'.format(
-                mpiio_coll_writes,
-                mpiio_coll_writes / total_mpiio_write_operations * 100
-            )
+        detected_files = []
+        if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+            for id in file_map.keys():
+                indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
+                indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
+                indep_total_count = indep_read_count + indep_write_count;
+
+                if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_read_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
+                    detected_files.append([
+                        id, indep_read_count, indep_read_count / indep_total_count * 100
+                    ])
+
+        column_names = ['id', 'absolute_indep_reads', 'percent_indep_reads']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+
+        check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, total_mpiio_read_operations, detected_files, file_map)
+
+        detected_files = []
+        if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+            for id in file_map.keys():
+                indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
+                indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
+                indep_total_count = indep_read_count + indep_write_count;
+
+                if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_write_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
+                    detected_files.append([
+                        id, indep_write_count, indep_write_count / indep_total_count * 100
+                    ])
 
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, OK, issue)
-            )
+        column_names = ['id', 'absolute_indep_writes', 'percent_indep_writes']
+        detected_files = pd.DataFrame(detected_files, columns=column_names)
+
+        check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes, total_mpiio_write_operations, detected_files, file_map)
 
         #########################################################################################################################################################################
 
@@ -885,60 +407,15 @@ def add_duration(row):
 
         has_hdf5_extension = False
 
-        for fid in unique_files.keys():
-            fname = unique_files[fid]
+        for id in file_map.keys():
+            fname = file_map[id]
             if fname.endswith('.h5') or fname.endswith('.hdf5'):
                 has_hdf5_extension = True
 
-        if len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('iread|begin|end'))]) == 0:
-            issue = 'Application could benefit from non-blocking (asynchronous) reads'
-
-            recommendation = []
-
-            if 'H5F' in modules or has_hdf5_extension:
-                recommendation.append(
-                    {
-                        'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-read.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            if 'MPIIO' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations', # (e.g., MPI_File_iread(), MPI_File_read_all_begin/end(), or MPI_File_read_at_all_begin/end())',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iread.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_BLOCKING_READ_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-            )
-
-        if len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('iwrite|begin|end'))]) == 0:
-            issue = 'Application could benefit from non-blocking (asynchronous) writes'
-
-            recommendation = []
-
-            if 'H5F' in modules or has_hdf5_extension:
-                recommendation.append(
-                    {
-                        'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-write.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            if 'MPIIO' in modules:
-                recommendation.append(
-                    {
-                        'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations',  # (e.g., MPI_File_iwrite(), MPI_File_write_all_begin/end(), or MPI_File_write_at_all_begin/end())',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iwrite.c'), line_numbers=True, background_color='default')
-                    }
-                )
-
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-            )
+        mpiio_nb_reads = len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('iread|begin|end'))])
+        mpiio_nb_writes = len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('iwrite|begin|end'))])
+
+        check_mpi_none_block_operation(mpiio_nb_reads, mpiio_nb_writes, has_hdf5_extension, modules)
 
     #########################################################################################################################################################################
 
@@ -947,34 +424,6 @@ def add_duration(row):
     # TODO:
 
     #########################################################################################################################################################################
-    
-    NUMBER_OF_COMPUTE_NODES = 0
-
-    #########################################################################################################################################################################
-
-    codes = []
-    if args.json:
-        f = open(args.json)
-        data = json.load(f)
-
-        for key, values in data.items():
-            for value in values:
-                code = value['code']
-                codes.append(code)
-
-                level = value['level']
-                issue = value['issue']
-                recommendation = []
-                for rec in value['recommendations']:
-                    new_message = {'message': rec}
-                    recommendation.append(new_message)
-
-                insights_dxt.append(
-                    message(args, code, TARGET_DEVELOPER, level, issue, recommendation)
-                )
-
-    #########################################################################################################################################################################
-
 
     insights_end_time = time.time()
 
@@ -992,9 +441,6 @@ def add_duration(row):
                     total_files_posix - total_files_mpiio,  # Since MPI-IO files will always use POSIX, we can decrement to get a unique count
                     total_files_mpiio
                 ),
-                ' [b]COMPUTE NODES[/b]   [white]{}[/white]'.format(
-                    NUMBER_OF_COMPUTE_NODES
-                ),
                 ' [b]PROCESSES[/b]       [white]{}[/white]'.format(
                     reader.GM.total_ranks
                 ),
@@ -1013,154 +459,14 @@ def add_duration(row):
 
     console.print()
 
-    if insights_metadata:
-        console.print(
-            Panel(
-                Padding(
-                    Group(
-                        *insights_metadata
-                    ),
-                    (1, 1)
-                ),
-                title='METADATA',
-                title_align='left'
-            )
-        )
+    display_content()
+    display_footer(insights_start_time, insights_end_time)
 
-    if insights_operation:
-        console.print(
-            Panel(
-                Padding(
-                    Group(
-                        *insights_operation
-                    ),
-                    (1, 1)
-                ),
-                title='OPERATIONS',
-                title_align='left'
-            )
-        )
+    export_html()
+    export_svg()
 
-    if insights_dxt:
-        console.print(
-            Panel(
-                Padding(
-                    Group(
-                        *insights_dxt
-                    ),
-                    (1, 1)
-                ),
-                title='DXT',
-                title_align='left'
-            )
-        )
-        
-    console.print(
-        Panel(
-            ' {} | [white]LBNL[/white] | [white]Drishti report generated at {} in[/white] {:.3f} seconds'.format(
-                datetime.datetime.now().year,
-                datetime.datetime.now(),
-                insights_end_time - insights_start_time
-            ),
-            box=box.SIMPLE
-        )
+    filename = '{}-summary.csv'.format(
+        args.log_path
     )
-
-    if args.export_theme_light:
-        export_theme = TerminalTheme(
-            (255, 255, 255),
-            (0, 0, 0),
-            [
-                (26, 26, 26),
-                (244, 0, 95),
-                (152, 224, 36),
-                (253, 151, 31),
-                (157, 101, 255),
-                (244, 0, 95),
-                (88, 209, 235),
-                (120, 120, 120),
-                (98, 94, 76),
-            ],
-            [
-                (244, 0, 95),
-                (152, 224, 36),
-                (224, 213, 97),
-                (157, 101, 255),
-                (244, 0, 95),
-                (88, 209, 235),
-                (246, 246, 239),
-            ],
-        )
-    else:
-        export_theme = MONOKAI
-
-    if args.export_html:
-        console.save_html(
-            '{}.html'.format(args.log_path),
-            theme=export_theme,
-            clear=False
-        )
-
-    if args.export_svg:
-        console.save_svg(
-            '{}.svg'.format(args.log_path),
-            title='Drishti',
-            theme=export_theme,
-            clear=False
-        )
-
-    if args.export_csv:
-        issues = [
-            'JOB',
-            INSIGHTS_STDIO_HIGH_USAGE,
-            INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE,
-            INSIGHTS_POSIX_READ_COUNT_INTENSIVE,
-            INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE,
-            INSIGHTS_POSIX_READ_SIZE_INTENSIVE,
-            INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE,
-            INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE,
-            INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE,
-            INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE,
-            INSIGHTS_POSIX_REDUNDANT_READ_USAGE,
-            INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE,
-            INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE,
-            INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE,
-            INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE,
-            INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE,
-            INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE,
-            INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE,
-            INSIGHTS_POSIX_HIGH_METADATA_TIME,
-            INSIGHTS_POSIX_SIZE_IMBALANCE,
-            INSIGHTS_POSIX_TIME_IMBALANCE,
-            INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE,
-            INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE,
-            INSIGHTS_MPI_IO_NO_USAGE,
-            INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE,
-            INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE,
-            INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE,
-            INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE,
-            INSIGHTS_MPI_IO_BLOCKING_READ_USAGE,
-            INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE,
-            INSIGHTS_MPI_IO_AGGREGATORS_INTRA,
-            INSIGHTS_MPI_IO_AGGREGATORS_INTER,
-            INSIGHTS_MPI_IO_AGGREGATORS_OK
-        ]
-        if codes:
-            issues.extend(codes)
-
-        detected_issues = dict.fromkeys(issues, False)
-        detected_issues['JOB'] = None
-
-        for report in csv_report:
-            detected_issues[report] = True
-
-        filename = '{}-summary.csv'.format(
-            args.log_path
-        )
-
-        with open(filename, 'w') as f:
-            w = csv.writer(f)
-            w.writerow(detected_issues.keys())
-            w.writerow(detected_issues.values())
-
+    export_csv(filename)
 

From d7fff3db6057068fb39c6f8a477d231725035f6f Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Thu, 30 Nov 2023 12:22:55 -0800
Subject: [PATCH 12/19] Accommodate split mode

---
 drishti/config.py         |  24 ++++----
 drishti/handle_darshan.py |  17 +++---
 drishti/module.py         | 114 +++++++++++++++++++++++++++++++++++---
 3 files changed, 127 insertions(+), 28 deletions(-)

diff --git a/drishti/config.py b/drishti/config.py
index aaf25b1..e11a824 100644
--- a/drishti/config.py
+++ b/drishti/config.py
@@ -84,19 +84,17 @@
 
 DETAILS_MAX_SIZE = 10
 
-# TODO: need to verify the threashold to be between 0 and 1
-# TODO: read thresholds from file
-
 
-console = Console(record=True)
 csv_report = []
 codes = []
-export_theme = MONOKAI
+
+# TODO: need to verify the threashold to be between 0 and 1
+# TODO: read thresholds from file
 
 
 def init_console():
-    set_export_size()
-    set_export_theme()
+    console = Console(record=True)
+    if args.export_size: console.width = int(args.export_size)
 
     insights_operation.clear()
     insights_metadata.clear()
@@ -104,10 +102,10 @@ def init_console():
     insights_total[HIGH] = 0
     insights_total[WARN] = 0
     insights_total[RECOMMENDATIONS] = 0
+    return console
 
 
 def set_export_theme():
-    global export_theme
     if args.export_theme_light:
         export_theme = TerminalTheme(
             (255, 255, 255),
@@ -133,10 +131,9 @@ def set_export_theme():
                 (246, 246, 239),
             ],
         )
-
-
-def set_export_size():
-    if args.export_size: console.width = int(args.export_size)
+    else:
+        export_theme = MONOKAI
+    return export_theme
 
 
 def load_json():
@@ -268,5 +265,6 @@ def message(code, target, level, issue, recommendations=None, details=None):
 '''
 Pre-load
 '''
-load_json()
+if not args.split_files:
+    load_json()
 
diff --git a/drishti/handle_darshan.py b/drishti/handle_darshan.py
index 6daa7b5..98cc63b 100644
--- a/drishti/handle_darshan.py
+++ b/drishti/handle_darshan.py
@@ -21,7 +21,7 @@ def is_available(name):
     return shutil.which(name) is not None
 
 
-def check_log_version(file, log_version, library_version):
+def check_log_version(console, file, log_version, library_version):
     use_file = file
 
     if version.parse(log_version) < version.parse('3.4.0'):
@@ -71,7 +71,7 @@ def check_log_version(file, log_version, library_version):
 
 
 def handler():
-    init_console()
+    console = init_console()
     validate_thresholds()
 
     insights_start_time = time.time()
@@ -86,7 +86,7 @@ def handler():
     library_version = darshanll.darshan.backend.cffi_backend.get_lib_version()
 
     # Make sure log format is of the same version
-    filename = check_log_version(args.log_path, log_version, library_version)
+    filename = check_log_version(console, args.log_path, log_version, library_version)
  
     darshanll.log_close(log)
 
@@ -651,11 +651,14 @@ def handler():
 
     console.print()
 
-    display_content()
-    display_footer(insights_start_time, insights_end_time)
+    display_content(console)
+    display_footer(console, insights_start_time, insights_end_time)
 
-    export_html()
-    export_svg()
+    filename = '{}.html'.format(args.log_path)
+    export_html(console, filename)
+
+    filename = '{}.svg'.format(args.log_path)
+    export_svg(console, filename)
 
     filename = '{}-summary.csv'.format(
         args.log_path.replace('.darshan', '')
diff --git a/drishti/module.py b/drishti/module.py
index 3a58a06..a75d574 100644
--- a/drishti/module.py
+++ b/drishti/module.py
@@ -433,6 +433,27 @@ def check_shared_data_imblance(stragglers_count, detected_files, file_map):
         )
 
 
+def check_shared_data_imblance_split(slowest_rank_bytes, fastest_rank_bytes, total_transfer_size):
+    if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
+        issue = 'Load imbalance of {:.2f}% detected'.format(
+            abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
+        )
+
+        recommendation = [
+            {
+                'message': 'Consider better balancing the data transfer between the application ranks'
+            },
+            {
+                'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation)
+        )
+
+
 '''
 detected_files required columns:
 ['id', 'time_imbalance']
@@ -470,6 +491,27 @@ def check_shared_time_imbalance(stragglers_count, detected_files, file_map):
         )
 
 
+def check_shared_time_imbalance_split(slowest_rank_time, fastest_rank_time, total_transfer_time):
+    if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
+        issue = 'Load imbalance of {:.2f}% detected'.format(
+            abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
+        )
+
+        recommendation = [
+            {
+                'message': 'Consider better distributing the data in the parallel file system' # needs to review what suggestion to give
+            },
+            {
+                'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation)
+        )
+
+
 '''
 detected_files required columns:
 ['id', 'write_imbalance']
@@ -514,6 +556,34 @@ def check_individual_write_imbalance(imbalance_count, detected_files, file_map):
         )
 
 
+def check_individual_write_imbalance_split(max_bytes_written, min_bytes_written):
+    if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
+        issue = 'Load imbalance of {:.2f}% detected'.format(
+            abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
+        )
+
+        recommendation = [
+            {
+                'message': 'Consider better balancing the data transfer between the application ranks'
+            },
+            {
+                'message': 'Consider tuning the stripe size and count to better distribute the data',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+            },
+            {
+                'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
+            },
+            {
+                'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+        )
+
+
 '''
 detected_files required columns:
 ['id', 'read_imbalance']
@@ -558,6 +628,34 @@ def check_individual_read_imbalance(imbalance_count, detected_files, file_map):
         )
 
 
+def check_individual_read_imbalance_split(max_bytes_read, min_bytes_read):
+    if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
+        issue = 'Load imbalance of {:.2f}% detected'.format(
+            abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
+        )
+
+        recommendation = [
+            {
+                'message': 'Consider better balancing the data transfer between the application ranks'
+            },
+            {
+                'message': 'Consider tuning the stripe size and count to better distribute the data',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
+            },
+            {
+                'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
+                'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
+            },
+            {
+                'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
+            }
+        ]
+
+        insights_operation.append(
+            message(INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation)
+        )
+
+
 # MPIIO level check
 
 '''
@@ -738,7 +836,7 @@ def check_mpi_aggregator(cb_nodes, NUMBER_OF_COMPUTE_NODES):
 
 # Layout and export
 
-def display_content():
+def display_content(console):
     if insights_metadata:
         console.print(
             Panel(
@@ -782,7 +880,7 @@ def display_content():
         )
 
 
-def display_footer(insights_start_time, insights_end_time):
+def display_footer(console, insights_start_time, insights_end_time):
     console.print(
         Panel(
             ' {} | [white]LBNL[/white] | [white]Drishti report generated at {} in[/white] {:.3f} seconds'.format(
@@ -794,21 +892,21 @@ def display_footer(insights_start_time, insights_end_time):
         )
     )
 
-def export_html():
+def export_html(console, filename):
     if args.export_html:
         console.save_html(
-            '{}.html'.format(args.log_path),
-            theme=export_theme,
+            filename,
+            theme=set_export_theme(),
             clear=False
         )
 
 
-def export_svg():
+def export_svg(console, filename):
     if args.export_svg:
         console.save_svg(
-            '{}.svg'.format(args.log_path),
+            filename,
             title='Drishti',
-            theme=export_theme,
+            theme=set_export_theme(),
             clear=False
         )
 

From 5aac80c0d326aeb4ebcad6fa4f714abbe86f905e Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Thu, 30 Nov 2023 12:23:43 -0800
Subject: [PATCH 13/19] Embedded split mode

---
 drishti/handle_recorder.py       | 449 ++++++++------
 drishti/handle_recorder_split.py | 982 -------------------------------
 drishti/includes.py              | 203 -------
 drishti/reporter.py              |  17 +-
 4 files changed, 290 insertions(+), 1361 deletions(-)
 delete mode 100644 drishti/handle_recorder_split.py
 delete mode 100644 drishti/includes.py

diff --git a/drishti/handle_recorder.py b/drishti/handle_recorder.py
index b864e0a..a9af622 100644
--- a/drishti/handle_recorder.py
+++ b/drishti/handle_recorder.py
@@ -26,19 +26,21 @@ def init_df_posix_recordes(reader):
             func_name = func_list[record.func_id]
 
             if 'MPI' not in func_name and 'H5' not in func_name:
-                records.append( [rank, func_name, record.tstart, record.tend] )
+                filename = None
+                if "open" in func_name or "close" in func_name or "creat" in func_name \
+                                or "seek" in func_name or "sync" in func_name:
+                                fstr = record.args[0]
+                                filename = fstr if type(fstr)==str else fstr.decode('utf-8')
+                                filename = filename.replace('./', '')
 
-    head = ['rank', 'function', 'start', 'end']
+                records.append( [filename, rank, func_name, record.tstart, record.tend] )
+
+    head = ['fname', 'rank', 'function', 'start', 'end']
     df_posix_records = pd.DataFrame(records, columns=head)
     return df_posix_records
 
 
 def handler():
-    init_console()
-    validate_thresholds()
-
-    insights_start_time = time.time()
-
     reader = RecorderReader(args.log_path)
     df_intervals = build_offset_intervals(reader)
     df_posix_records = init_df_posix_recordes(reader)
@@ -60,10 +62,23 @@ def add_duration(row):
     df_intervals['duration'] = df_intervals.apply(add_duration, axis=1)
     df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1)
 
-    modules = set(df_intervals['api'].unique())
+    if args.split_files:
+        for fid in file_map:
+            process_helper(file_map, df_intervals[(df_intervals['file_id'] == fid)], 
+                           df_posix_records[(df_posix_records['fname'] == file_map[fid])], fid)
+    else:
+        process_helper(file_map, df_intervals, df_posix_records)
 
-    #########################################################################################################################################################################
 
+def process_helper(file_map, df_intervals, df_posix_records, fid=None):
+    if not len(df_intervals): return
+    
+    insights_start_time = time.time()
+
+    console = init_console()
+    validate_thresholds()
+
+    modules = set(df_intervals['api'].unique())
     # Check usage of POSIX, and MPI-IO per file
     total_size_stdio = 0
     total_size_posix = 0
@@ -75,23 +90,28 @@ def add_duration(row):
     total_files_posix = 0
     total_files_mpiio = 0
 
-    for id in file_map.keys():
-        df_intervals_in_one_file = df_intervals[(df_intervals['file_id'] == id)]
-        df_stdio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'STDIO')]
-        df_posix_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'POSIX')]
-        df_mpiio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'MPI-IO')]
+    if args.split_files:
+        total_size_stdio = df_intervals[(df_intervals['api'] == 'STDIO')]['size'].sum()
+        total_size_posix = df_intervals[(df_intervals['api'] == 'POSIX')]['size'].sum()
+        total_size_mpiio = df_intervals[(df_intervals['api'] == 'MPI-IO')]['size'].sum()
+    else:
+        for id in file_map.keys():
+            df_intervals_in_one_file = df_intervals[(df_intervals['file_id'] == id)]
+            df_stdio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'STDIO')]
+            df_posix_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'POSIX')]
+            df_mpiio_intervals_in_one_file = df_intervals_in_one_file[(df_intervals_in_one_file['api'] == 'MPI-IO')]
 
-        if len(df_stdio_intervals_in_one_file):
-            total_files_stdio += 1
-            total_size_stdio += df_stdio_intervals_in_one_file['size'].sum()
+            if len(df_stdio_intervals_in_one_file):
+                total_files_stdio += 1
+                total_size_stdio += df_stdio_intervals_in_one_file['size'].sum()
 
-        if len(df_posix_intervals_in_one_file):
-            total_files_posix += 1
-            total_size_posix += df_posix_intervals_in_one_file['size'].sum()
+            if len(df_posix_intervals_in_one_file):
+                total_files_posix += 1
+                total_size_posix += df_posix_intervals_in_one_file['size'].sum()
 
-        if len(df_mpiio_intervals_in_one_file):
-            total_files_mpiio += 1
-            total_size_mpiio += df_mpiio_intervals_in_one_file['size'].sum()       
+            if len(df_mpiio_intervals_in_one_file):
+                total_files_mpiio += 1
+                total_size_mpiio += df_mpiio_intervals_in_one_file['size'].sum()       
 
 
     # Since POSIX will capture both POSIX-only accesses and those comming from MPI-IO, we can subtract those
@@ -138,14 +158,17 @@ def add_duration(row):
         total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
         total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
 
-        detected_files = []
-        for id in file_map.keys():
-            read_cnt = len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            write_cnt = len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            detected_files.append([id, read_cnt, write_cnt])
+        if args.split_files:
+            detected_files = pd.DataFrame()
+        else:
+            detected_files = []
+            for id in file_map.keys():
+                read_cnt = len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                write_cnt = len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                detected_files.append([id, read_cnt, write_cnt])
 
-        column_names = ['id', 'total_reads', 'total_writes']
-        detected_files = pd.DataFrame(detected_files, columns=column_names)
+            column_names = ['id', 'total_reads', 'total_writes']
+            detected_files = pd.DataFrame(detected_files, columns=column_names)
 
         check_small_operation(total_reads, total_reads_small, total_writes, total_writes_small, detected_files, modules, file_map)
 
@@ -213,37 +236,38 @@ def add_duration(row):
         detected_files = grp_posix_by_id['rank'].nunique()
         shared_files = set(detected_files[detected_files > 1].index)
 
-        total_shared_reads = 0
-        total_shared_reads_small = 0
-        total_shared_writes = 0
-        total_shared_writes_small = 0
-
-        detected_files = []
-        for id in shared_files:
-            total_shared_reads += len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read'))])
-            total_shared_writes += len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read'))])
-
-            read_cnt = len(df_posix[(df_posix['file_id'] == id) 
+        total_shared_reads = len(df_posix[(df_posix['file_id'].isin(shared_files)) & (df_posix['function'].str.contains('read'))])
+        total_shared_reads_small = len(df_posix[(df_posix['file_id'].isin(shared_files)) 
                                     & (df_posix['function'].str.contains('read')) 
                                     & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            write_cnt = len(df_posix[(df_posix['file_id'] == id) 
+        
+        total_shared_writes = len(df_posix[(df_posix['file_id'].isin(shared_files)) & ~(df_posix['function'].str.contains('read'))])
+        total_shared_writes_small = len(df_posix[(df_posix['file_id'].isin(shared_files)) 
                                     & ~(df_posix['function'].str.contains('read')) 
                                     & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            detected_files.append([id, read_cnt, write_cnt])
 
-            total_shared_reads_small += read_cnt
-            total_shared_writes_small += write_cnt
-        
-        column_names = ['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES']
-        detected_files = pd.DataFrame(detected_files, columns=column_names)
+        if args.split_files:
+            detected_files = pd.DataFrame()
+        else:
+            detected_files = []
+            for id in shared_files:
+                read_cnt = len(df_posix[(df_posix['file_id'] == id) 
+                                        & (df_posix['function'].str.contains('read')) 
+                                        & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                write_cnt = len(df_posix[(df_posix['file_id'] == id) 
+                                        & ~(df_posix['function'].str.contains('read')) 
+                                        & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                detected_files.append([id, read_cnt, write_cnt])
+            
+            column_names = ['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES']
+            detected_files = pd.DataFrame(detected_files, columns=column_names)
 
         check_shared_small_operation(total_shared_reads, total_shared_reads_small, total_shared_writes, total_shared_writes_small, detected_files, file_map)
 
         #########################################################################################################################################################################
 
-        # TODO: Here I assume all operations other than write/read are metadata operations
-        df_posix_metadata = df_posix_records[~(df_posix_records['function'].str.contains('read|write|print', na=False))]
-        df_detected = df_posix_metadata.groupby('rank')['duration'].sum().reset_index()
+        # TODO: Assumed metadata operations: open, close, sync, create, seek
+        df_detected = df_posix_records.groupby('rank')['duration'].sum().reset_index()
         count_long_metadata = len(df_detected[(df_detected['duration'] > THRESHOLD_METADATA_TIME_RANK)])
 
         check_long_metadata(count_long_metadata, modules)
@@ -254,101 +278,137 @@ def add_duration(row):
         # POSIX_FASTEST_RANK_BYTES
         # POSIX_SLOWEST_RANK_BYTES
         # POSIX_VARIANCE_RANK_BYTES
+        if args.split_files:
+            if df_posix['rank'].nunique() > 1:
+                total_transfer_size = df_posix['size'].sum()
 
-        stragglers_count = 0
-        
-        detected_files = []
-        for id in shared_files:
-            df_posix_in_one_file = df_posix[(df_posix['file_id'] == id)]
-            total_transfer_size = df_posix_in_one_file['size'].sum()
+                df_detected = df_posix.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index()
+                slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size']
+                fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size']
+            
+                check_shared_data_imblance_split(slowest_rank_bytes, fastest_rank_bytes, total_transfer_size)
+        else:
+            stragglers_count = 0
+            
+            detected_files = []
+            for id in shared_files:
+                df_posix_in_one_file = df_posix[(df_posix['file_id'] == id)]
+                total_transfer_size = df_posix_in_one_file['size'].sum()
 
-            df_detected = df_posix_in_one_file.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index()
-            slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size']
-            fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size']
+                df_detected = df_posix_in_one_file.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index()
+                slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size']
+                fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size']
 
-            if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
-                stragglers_count += 1
+                if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
+                    stragglers_count += 1
 
-                detected_files.append([
-                    id, abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
-                ])
-        
-        column_names = ['id', 'data_imbalance']
-        detected_files = pd.DataFrame(detected_files, columns=column_names)
+                    detected_files.append([
+                        id, abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
+                    ])
+            
+            column_names = ['id', 'data_imbalance']
+            detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-        check_shared_data_imblance(stragglers_count, detected_files, file_map)
+            check_shared_data_imblance(stragglers_count, detected_files, file_map)
     
         # POSIX_F_FASTEST_RANK_TIME
         # POSIX_F_SLOWEST_RANK_TIME
         # POSIX_F_VARIANCE_RANK_TIME
+        if args.split_files:
+            if df_posix['rank'].nunique() > 1:
+                total_transfer_time = df_posix['duration'].sum()
 
-        stragglers_count = 0
-        
-        detected_files = []
-        for id in shared_files:
-            df_posix_in_one_file = df_posix[(df_posix['file_id'] == id)]
-            total_transfer_time = df_posix_in_one_file['duration'].sum()
+                df_detected = df_posix.groupby('rank')['duration'].sum().reset_index()
+
+                slowest_rank_time = df_detected['duration'].max()
+                fastest_rank_time = df_detected['duration'].min()
+
+                check_shared_time_imbalance_split(slowest_rank_time, fastest_rank_time, total_transfer_time)
+        else:
+            stragglers_count = 0
+            
+            detected_files = []
+            for id in shared_files:
+                df_posix_in_one_file = df_posix[(df_posix['file_id'] == id)]
+                total_transfer_time = df_posix_in_one_file['duration'].sum()
 
-            df_detected = df_posix_in_one_file.groupby('rank')['duration'].sum().reset_index()
+                df_detected = df_posix_in_one_file.groupby('rank')['duration'].sum().reset_index()
 
-            slowest_rank_time = df_detected['duration'].max()
-            fastest_rank_time = df_detected['duration'].min()
+                slowest_rank_time = df_detected['duration'].max()
+                fastest_rank_time = df_detected['duration'].min()
 
-            if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
-                stragglers_count += 1
+                if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
+                    stragglers_count += 1
 
-                detected_files.append([
-                    id, abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
-                ])
+                    detected_files.append([
+                        id, abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
+                    ])
 
-        column_names = ['id', 'time_imbalance']
-        detected_files = pd.DataFrame(detected_files, columns=column_names)
+            column_names = ['id', 'time_imbalance']
+            detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-        check_shared_time_imbalance(stragglers_count, detected_files, file_map)
+            check_shared_time_imbalance(stragglers_count, detected_files, file_map)
 
         # Get the individual files responsible for imbalance
-        imbalance_count = 0
+        if args.split_files:
+            if df_posix['rank'].nunique() == 1:
+                df_detected = df_posix[~(df_posix['function'].str.contains('read'))]
+                
+                max_bytes_written = df_detected['size'].max()
+                min_bytes_written = df_detected['size'].min()
+
+                check_individual_write_imbalance_split(max_bytes_written, min_bytes_written)
+
+            if df_posix['rank'].nunique() == 1:
+                df_detected = df_posix[(df_posix['function'].str.contains('read'))]
+                
+                max_bytes_read = df_detected['size'].max()
+                min_bytes_read = df_detected['size'].min()
+                
+                check_individual_read_imbalance_split(max_bytes_read, min_bytes_read)
+        else:
+            imbalance_count = 0
 
-        detected_files = []
-        for id in file_map.keys():
-            if id in shared_files: continue
-            df_detected = df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read'))]
-            
-            max_bytes_written = df_detected['size'].max()
-            min_bytes_written = df_detected['size'].min()
+            detected_files = []
+            for id in file_map.keys():
+                if id in shared_files: continue
+                df_detected = df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read'))]
+                
+                max_bytes_written = df_detected['size'].max()
+                min_bytes_written = df_detected['size'].min()
 
-            if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
-                imbalance_count += 1
+                if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
+                    imbalance_count += 1
 
-                detected_files.append([
-                    id, abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
-                ])
+                    detected_files.append([
+                        id, abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
+                    ])
 
-        column_names = ['id', 'write_imbalance']
-        detected_files = pd.DataFrame(detected_files, columns=column_names)
+            column_names = ['id', 'write_imbalance']
+            detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-        check_individual_write_imbalance(imbalance_count, detected_files, file_map)
+            check_individual_write_imbalance(imbalance_count, detected_files, file_map)
 
-        imbalance_count = 0
+            imbalance_count = 0
 
-        detected_files = []
-        for id in shared_files:
-            df_detected = df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read'))]
-            
-            max_bytes_read = df_detected['size'].max()
-            min_bytes_read = df_detected['size'].min()
+            detected_files = []
+            for id in shared_files:
+                df_detected = df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read'))]
+                
+                max_bytes_read = df_detected['size'].max()
+                min_bytes_read = df_detected['size'].min()
 
-            if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
-                imbalance_count += 1
+                if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
+                    imbalance_count += 1
 
-                detected_files.append([
-                    id, abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
-                ])
+                    detected_files.append([
+                        id, abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
+                    ])
 
-        column_names = ['id', 'read_imbalance']
-        detected_files = pd.DataFrame(detected_files, columns=column_names)
+            column_names = ['id', 'read_imbalance']
+            detected_files = pd.DataFrame(detected_files, columns=column_names)
 
-        check_individual_read_imbalance(imbalance_count, detected_files, file_map)
+            check_individual_read_imbalance(imbalance_count, detected_files, file_map)
 
     #########################################################################################################################################################################
 
@@ -365,37 +425,43 @@ def add_duration(row):
         mpiio_coll_writes = len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('_all'))])
         total_mpiio_write_operations = mpiio_indep_writes + mpiio_coll_writes
 
-        detected_files = []
-        if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-            for id in file_map.keys():
-                indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
-                indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
-                indep_total_count = indep_read_count + indep_write_count;
+        if args.split_files:
+            detected_files = pd.DataFrame()
+        else:
+            detected_files = []
+            if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+                for id in file_map.keys():
+                    indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
+                    indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
+                    indep_total_count = indep_read_count + indep_write_count;
 
-                if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_read_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
-                    detected_files.append([
-                        id, indep_read_count, indep_read_count / indep_total_count * 100
-                    ])
+                    if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_read_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
+                        detected_files.append([
+                            id, indep_read_count, indep_read_count / indep_total_count * 100
+                        ])
 
-        column_names = ['id', 'absolute_indep_reads', 'percent_indep_reads']
-        detected_files = pd.DataFrame(detected_files, columns=column_names)
+            column_names = ['id', 'absolute_indep_reads', 'percent_indep_reads']
+            detected_files = pd.DataFrame(detected_files, columns=column_names)
 
         check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, total_mpiio_read_operations, detected_files, file_map)
 
-        detected_files = []
-        if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-            for id in file_map.keys():
-                indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
-                indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
-                indep_total_count = indep_read_count + indep_write_count;
+        if args.split_files:
+            detected_files = pd.DataFrame()
+        else:
+            detected_files = []
+            if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+                for id in file_map.keys():
+                    indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
+                    indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
+                    indep_total_count = indep_read_count + indep_write_count;
 
-                if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_write_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
-                    detected_files.append([
-                        id, indep_write_count, indep_write_count / indep_total_count * 100
-                    ])
+                    if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_write_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
+                        detected_files.append([
+                            id, indep_write_count, indep_write_count / indep_total_count * 100
+                        ])
 
-        column_names = ['id', 'absolute_indep_writes', 'percent_indep_writes']
-        detected_files = pd.DataFrame(detected_files, columns=column_names)
+            column_names = ['id', 'absolute_indep_writes', 'percent_indep_writes']
+            detected_files = pd.DataFrame(detected_files, columns=column_names)
 
         check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes, total_mpiio_write_operations, detected_files, file_map)
 
@@ -429,44 +495,83 @@ def add_duration(row):
 
     console.print()
 
-    console.print(
-        Panel(
-            '\n'.join([
-                ' [b]RECORDER[/b]:       [white]{}[/white]'.format(
-                    os.path.basename(args.log_path)
+    if args.split_files:
+        console.print(
+            Panel(
+                '\n'.join([
+                    ' [b]RECORDER[/b]:       [white]{}[/white]'.format(
+                        os.path.basename(args.log_path)
+                    ),
+                    ' [b]FILE[/b]:          [white]{} ({})[/white]'.format(
+                        file_map[fid],
+                        fid,
+                    ),
+                    ' [b]PROCESSES[/b]       [white]{}[/white]'.format(
+                        df_intervals['rank'].nunique()
+                    ),
+                ]),
+                title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]',
+                title_align='left',
+                subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format(
+                    insights_total[HIGH],
+                    insights_total[WARN],
+                    insights_total[RECOMMENDATIONS],
                 ),
-                ' [b]FILES[/b]:          [white]{} files ({} use STDIO, {} use POSIX, {} use MPI-IO)[/white]'.format(
-                    total_files,
-                    total_files_stdio,
-                    total_files_posix - total_files_mpiio,  # Since MPI-IO files will always use POSIX, we can decrement to get a unique count
-                    total_files_mpiio
-                ),
-                ' [b]PROCESSES[/b]       [white]{}[/white]'.format(
-                    reader.GM.total_ranks
+                subtitle_align='left',
+                padding=1
+            )
+        )
+    else:
+        console.print(
+            Panel(
+                '\n'.join([
+                    ' [b]RECORDER[/b]:       [white]{}[/white]'.format(
+                        os.path.basename(args.log_path)
+                    ),
+                    ' [b]FILES[/b]:          [white]{} files ({} use STDIO, {} use POSIX, {} use MPI-IO)[/white]'.format(
+                        total_files,
+                        total_files_stdio,
+                        total_files_posix - total_files_mpiio,  # Since MPI-IO files will always use POSIX, we can decrement to get a unique count
+                        total_files_mpiio
+                    ),
+                    ' [b]PROCESSES[/b]       [white]{}[/white]'.format(
+                        df_intervals['rank'].nunique()
+                    ),
+                ]),
+                title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]',
+                title_align='left',
+                subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format(
+                    insights_total[HIGH],
+                    insights_total[WARN],
+                    insights_total[RECOMMENDATIONS],
                 ),
-            ]),
-            title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]',
-            title_align='left',
-            subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format(
-                insights_total[HIGH],
-                insights_total[WARN],
-                insights_total[RECOMMENDATIONS],
-            ),
-            subtitle_align='left',
-            padding=1
+                subtitle_align='left',
+                padding=1
+            )
         )
-    )
 
     console.print()
 
-    display_content()
-    display_footer(insights_start_time, insights_end_time)
+    display_content(console)
+    display_footer(console, insights_start_time, insights_end_time)
+
+    if args.split_files:
+        filename = '{}.{}.html'.format(args.log_path, fid)
+    else:
+        filename = '{}.html'.format(args.log_path)
+
+    export_html(console, filename)
+
+    if args.split_files:
+        filename = '{}.{}.svg'.format(args.log_path, fid)
+    else:
+        filename = '{}.svg'.format(args.log_path)
 
-    export_html()
-    export_svg()
+    export_svg(console, filename)
 
-    filename = '{}-summary.csv'.format(
-        args.log_path
-    )
+    if args.split_files:
+        filename = '{}.{}.summary.csv'.format(args.log_path, fid)
+    else:
+        filename = '{}-summary.csv'.format(args.log_path)
     export_csv(filename)
 
diff --git a/drishti/handle_recorder_split.py b/drishti/handle_recorder_split.py
deleted file mode 100644
index 74bc899..0000000
--- a/drishti/handle_recorder_split.py
+++ /dev/null
@@ -1,982 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import csv
-import time
-import json
-
-import datetime
-
-import pandas as pd
-
-from rich import print, box
-from rich.console import Group
-from rich.padding import Padding
-from rich.syntax import Syntax
-from rich.panel import Panel
-from rich.terminal_theme import TerminalTheme
-from rich.terminal_theme import MONOKAI
-
-from recorder_utils import RecorderReader
-from recorder_utils.build_offset_intervals import build_offset_intervals
-
-from .includes import *
-
-
-def get_accessed_files(reader):
-    ranks = reader.GM.total_ranks
-    filemap = {}
-    for rank in range(ranks):
-        filemap.update(reader.LMs[rank].filemap)
-
-    return filemap
-
-
-def init_df_posix_recordes(reader):
-    func_list = reader.funcs
-    ranks = reader.GM.total_ranks
-    records = []
-    for rank in range(ranks):
-        for i in range(reader.LMs[rank].total_records):
-            record = reader.records[rank][i]
-            func_name = func_list[record.func_id]
-
-            if 'MPI' not in func_name and 'H5' not in func_name:
-                filename = None
-                if "open" in func_name or "close" in func_name or "creat" in func_name \
-                                or "seek" in func_name or "sync" in func_name:
-                                fstr = record.args[0]
-                                filename = fstr if type(fstr)==str else fstr.decode('utf-8')
-                                filename = filename.replace('./', '')
-
-                records.append( [filename, rank, func_name, record.tstart, record.tend] )
-
-    head = ['fname', 'rank', 'function', 'start', 'end']
-    df_posix_records = pd.DataFrame(records, columns=head)
-    return df_posix_records
-
-
-def handler(args):
-    reader = RecorderReader(args.log_path)
-    df_intervals = build_offset_intervals(reader)
-    df_posix_records = init_df_posix_recordes(reader)
-
-    unique_files = get_accessed_files(reader)
-
-    def add_api(row):
-        if 'MPI' in row['function']:
-            return 'MPIIO'
-        elif 'H5' in row['function']:
-            return 'H5F'
-        else:
-            return 'POSIX'
-
-    df_intervals['api'] = df_intervals.apply(add_api, axis=1)
-
-    def add_duration(row):
-        return row['end'] - row['start']
-    
-    df_intervals['duration'] = df_intervals.apply(add_duration, axis=1)
-    df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1)
-
-    #########################################################################################################################################################################
-    for fid, fname in unique_files.items():
-        console = Console(record=True)
-        init_console(args)
-        validate_thresholds()
-        insights_start_time = time.time()
-
-        df_intervals_temp = df_intervals[(df_intervals['file_id'] == fid)]
-        if not len(df_intervals_temp): continue
-
-        df_posix_records = df_posix_records[(df_posix_records['fname'] == fname)]
-        modules = set(df_intervals_temp['api'].unique())
-
-        # Check usage of POSIX, and MPI-IO per file
-        total_size_stdio = 0
-        total_size_posix = 0
-        total_size_mpiio = 0
-        total_size = 0
-
-        df_stdio_intervals = df_intervals_temp[(df_intervals_temp['api'] == 'STDIO')]
-        df_posix_intervals = df_intervals_temp[(df_intervals_temp['api'] == 'POSIX')]
-        df_mpiio_intervals = df_intervals_temp[(df_intervals_temp['api'] == 'MPIIO')]
-
-        if len(df_stdio_intervals):
-            total_size_stdio += df_stdio_intervals['size'].sum()
-
-        if len(df_posix_intervals):
-            total_size_posix += df_posix_intervals['size'].sum()
-
-        if len(df_mpiio_intervals):
-            total_size_mpiio += df_mpiio_intervals['size'].sum()   
-
-
-        # Since POSIX will capture both POSIX-only accesses and those comming from MPI-IO, we can subtract those
-        if total_size_posix > 0 and total_size_posix >= total_size_mpiio:
-            total_size_posix -= total_size_mpiio
-
-        total_size = total_size_stdio + total_size_posix + total_size_mpiio
-
-        assert(total_size_stdio >= 0)
-        assert(total_size_posix >= 0)
-        assert(total_size_mpiio >= 0)
-
-        if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
-            issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
-                total_size_stdio / total_size * 100.0,
-                convert_bytes(total_size_stdio)
-            )
-
-            recommendation = [
-                {
-                    'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_STDIO_HIGH_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-            )
-
-        if 'MPIIO' not in modules:
-            issue = 'Application is using low-performance interface'
-
-            recommendation = [
-                {
-                    'message': 'Consider switching to a high-performance I/O interface such as MPI-IO'
-                }
-            ]
-
-            insights_operation.append(
-                message(args, INSIGHTS_MPI_IO_NO_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-            )
-
-        #########################################################################################################################################################################
-
-        if df_intervals_temp['api'].eq('POSIX').any():
-            df_posix = df_intervals_temp[(df_intervals_temp['api'] == 'POSIX')]
-
-            #########################################################################################################################################################################
-
-            # Get number of write/read operations
-            total_reads = len(df_posix[(df_posix['function'].str.contains('read'))])
-            total_writes = len(df_posix[~(df_posix['function'].str.contains('read'))])
-
-            # Get total number of I/O operations
-            total_operations = total_writes + total_reads 
-
-            # To check whether the application is write-intersive or read-intensive we only look at the POSIX level and check if the difference between reads and writes is larger than 10% (for more or less), otherwise we assume a balance
-            if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
-                issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
-                    total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
-                )
-
-                insights_metadata.append(
-                    message(args, INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-                )
-
-            if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
-                issue = 'Application is read operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
-                    total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
-                )
-
-                insights_metadata.append(
-                    message(args, INSIGHTS_POSIX_READ_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-                )
-
-            total_read_size = df_posix[(df_posix['function'].str.contains('read'))]['size'].sum()
-            total_written_size = df_posix[~(df_posix['function'].str.contains('read'))]['size'].sum()
-
-            total_size = total_written_size + total_read_size
-
-            if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
-                issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
-                    total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
-                )
-
-                insights_metadata.append(
-                    message(args, INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-                )
-
-            if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
-                issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
-                    total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
-                )
-
-                insights_metadata.append(
-                    message(args, INSIGHTS_POSIX_READ_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
-                )
-
-            #########################################################################################################################################################################
-
-            # Get the number of small I/O operations (less than 1 MB)
-
-            total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-            total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-
-            if total_reads_small and total_reads_small / total_reads > THRESHOLD_SMALL_REQUESTS and total_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-                issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
-                    total_reads_small, total_reads_small / total_reads * 100.0
-                )
-            
-                recommendation = []
-
-                recommendation.append(
-                    {
-                        'message': 'Consider buffering read operations into larger more contiguous ones'
-                    }
-                )
-
-                if 'MPIIO' in modules:
-                    recommendation.append(
-                        {
-                            'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_read_all() or MPI_File_read_at_all()) to aggregate requests into larger ones',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                        }
-                    )
-                else:
-                    recommendation.append(
-                        {
-                            'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
-                        }
-                    )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-
-            if total_writes_small and total_writes_small / total_writes > THRESHOLD_SMALL_REQUESTS and total_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-                issue = 'Application issues a high number ({}) of small write requests (i.e., < 1MB) which represents {:.2f}% of all write requests'.format(
-                    total_writes_small, total_writes_small / total_writes * 100.0
-                )
-
-                recommendation = []
-
-                recommendation.append(
-                    {
-                        'message': 'Consider buffering write operations into larger more contiguous ones'
-                    }
-                )
-
-                if 'MPIIO' in modules:
-                    recommendation.append(
-                        {
-                            'message': 'Since the application already uses MPI-IO, consider using collective I/O calls (e.g. MPI_File_write_all() or MPI_File_write_at_all()) to aggregate requests into larger ones',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                        }
-                    )
-                else:
-                    recommendation.append(
-                        {
-                            'message': 'Application does not use MPI-IO for operations, consider use this interface instead to harness collective operations'
-                        }
-                    )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-
-            #########################################################################################################################################################################
-
-            # How many requests are misaligned?
-            # TODO: 
-
-            #########################################################################################################################################################################
-
-            # Redundant read-traffic (based on Phill)
-            # POSIX_MAX_BYTE_READ (Highest offset in the file that was read)
-            max_read_offset = df_posix[(df_posix['function'].str.contains('read'))]['offset'].max()
-
-            if max_read_offset > total_read_size:
-                issue = 'Application might have redundant read traffic (more data read than the highest offset)'
-
-                insights_metadata.append(
-                    message(args, INSIGHTS_POSIX_REDUNDANT_READ_USAGE, TARGET_DEVELOPER, WARN, issue, None)
-                )
-
-            max_write_offset = df_posix[~(df_posix['function'].str.contains('read'))]['offset'].max()
-
-            if max_write_offset > total_written_size:
-                issue = 'Application might have redundant write traffic (more data written than the highest offset)'
-
-                insights_metadata.append(
-                    message(args, INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, None)
-                )
-
-            #########################################################################################################################################################################
-
-            # Check for a lot of random operations
-
-            read_consecutive = 0
-            read_sequential = 0
-            read_random = 0
-
-            df_filtered = df_posix[(df_posix['function'].str.contains('read'))].sort_values('start')
-
-            for i in range(len(df_filtered) - 1):
-                curr_interval = df_filtered.iloc[i]
-                next_interval = df_filtered.iloc[i + 1]
-                if curr_interval['offset'] + curr_interval['size'] == next_interval['offset']:
-                    read_consecutive += 1
-                elif curr_interval['offset'] + curr_interval['size'] < next_interval['offset']:
-                    read_sequential += 1
-                else:
-                    read_random += 1
-
-            if total_reads:
-                if read_random and read_random / total_reads > THRESHOLD_RANDOM_OPERATIONS and read_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
-                    issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
-                        read_random, read_random / total_reads * 100.0
-                    )
-
-                    recommendation = [
-                        {
-                            'message': 'Consider changing your data model to have consecutive or sequential reads'
-                        }
-                    ]
-
-                    insights_operation.append(
-                        message(args, INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                    )
-                else:
-                    issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) read requests'.format(
-                        read_consecutive / total_reads * 100.0,
-                        read_sequential / total_reads * 100.0
-                    )
-
-                    insights_operation.append(
-                        message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE, TARGET_DEVELOPER, OK, issue, None)
-                    )
-
-            write_consecutive = 0
-            write_sequential = 0
-            write_random = 0
-
-
-            df_filtered = df_posix[~(df_posix['function'].str.contains('read'))].sort_values('start')
-
-            for i in range(len(df_filtered) - 1):
-                curr_interval = df_filtered.iloc[i]
-                next_interval = df_filtered.iloc[i + 1]
-                if curr_interval['offset'] + curr_interval['size'] == next_interval['offset']:
-                    write_consecutive += 1
-                elif curr_interval['offset'] + curr_interval['size'] < next_interval['offset']:
-                    write_sequential += 1
-                else:
-                    write_random += 1
-
-            if total_writes:
-                if write_random and write_random / total_writes > THRESHOLD_RANDOM_OPERATIONS and write_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
-                    issue = 'Application is issuing a high number ({}) of random write operations ({:.2f}%)'.format(
-                        write_random, write_random / total_writes * 100.0
-                    )
-
-                    recommendation = [
-                        {
-                            'message': 'Consider changing your data model to have consecutive or sequential writes'
-                        }
-                    ]
-
-                    insights_operation.append(
-                        message(args, INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                    )
-                else:
-                    issue = 'Application mostly uses consecutive ({:.2f}%) and sequential ({:.2f}%) write requests'.format(
-                        write_consecutive / total_writes * 100.0,
-                        write_sequential / total_writes * 100.0
-                    )
-
-                    insights_operation.append(
-                        message(args, INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE, TARGET_DEVELOPER, OK, issue, None)
-                    )
-
-            #########################################################################################################################################################################
-
-            # Shared file with small operations
-
-            # A file is shared if it's been read/written by more than 1 rank
-            detected_files = df_posix['rank'].nunique()
-
-            total_shared_reads = 0
-            total_shared_reads_small = 0
-            total_shared_writes = 0
-            total_shared_writes_small = 0
-
-            if df_posix['rank'].nunique() > 1:
-                total_shared_reads += len(df_posix[(df_posix['function'].str.contains('read'))])
-                total_shared_writes += len(df_posix[~(df_posix['function'].str.contains('read'))])
-
-                total_shared_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) 
-                                        & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-                total_shared_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) 
-                                        & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-
-            if total_shared_reads and total_shared_reads_small / total_shared_reads > THRESHOLD_SMALL_REQUESTS and total_shared_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-                issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
-                    total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
-                )
-                
-                recommendation = [
-                    {
-                        'message': 'Consider coalesceing read requests into larger more contiguous ones using MPI-IO collective operations',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                    }
-                ]
-            
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-
-            if total_shared_writes and total_shared_writes_small / total_shared_writes > THRESHOLD_SMALL_REQUESTS and total_shared_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
-                issue = 'Application issues a high number ({}) of small write requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file write requests'.format(
-                    total_shared_writes_small, total_shared_writes_small / total_shared_writes * 100.0
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider coalescing write requests into larger more contiguous ones using MPI-IO collective operations',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-
-            #########################################################################################################################################################################
-
-            # TODO: Here I assume all operations other than write/read are metadata operations
-            df_posix_metadata = df_posix_records[~(df_posix_records['function'].str.contains('read|write|print', na=False))]
-            df_detected = df_posix_metadata.groupby('rank')['duration'].sum().reset_index()
-            has_long_metadata = df_detected[(df_detected['duration'] > THRESHOLD_METADATA_TIME_RANK)]
-
-            if not has_long_metadata.empty:
-                issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
-                    len(has_long_metadata), THRESHOLD_METADATA_TIME_RANK
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Attempt to combine files, reduce, or cache metadata operations'
-                    }
-                ]
-
-                if 'H5F' in modules:
-                    recommendation.append(
-                        {
-                            'message': 'Since your appplication uses HDF5, try enabling collective metadata calls with H5Pset_coll_metadata_write() and H5Pset_all_coll_metadata_ops()',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-collective-metadata.c'), line_numbers=True, background_color='default')
-                        },
-                        {
-                            'message': 'Since your appplication uses HDF5, try using metadata cache to defer metadata operations',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-cache.c'), line_numbers=True, background_color='default')
-                        }
-                    )
-
-                insights_metadata.append(
-                    message(args, INSIGHTS_POSIX_HIGH_METADATA_TIME, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-
-            # We already have a single line for each shared-file access
-            # To check for stragglers, we can check the difference between the 
-
-            # POSIX_FASTEST_RANK_BYTES
-            # POSIX_SLOWEST_RANK_BYTES
-            # POSIX_VARIANCE_RANK_BYTES
-
-            stragglers = False
-
-            if df_posix['rank'].nunique() > 1:
-                total_transfer_size = df_posix['size'].sum()
-
-                df_detected = df_posix.groupby('rank').agg({'size': 'sum', 'duration': 'sum'}).reset_index()
-                slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size']
-                fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size']
-
-                if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
-                    stragglers = True
-
-            if stragglers:
-                issue = 'Load imbalance of {:.2f}% detected'.format(
-                    abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider better balancing the data transfer between the application ranks'
-                    },
-                    {
-                        'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_SIZE_IMBALANCE, TARGET_USER, HIGH, issue, recommendation)
-                )
-        
-            # POSIX_F_FASTEST_RANK_TIME
-            # POSIX_F_SLOWEST_RANK_TIME
-            # POSIX_F_VARIANCE_RANK_TIME
-
-            stragglers = False
-
-            if df_posix['rank'].nunique() > 1:
-                total_transfer_time = df_posix['duration'].sum()
-
-                df_detected = df_posix.groupby('rank')['duration'].sum().reset_index()
-
-                slowest_rank_time = df_detected['duration'].max()
-                fastest_rank_time = df_detected['duration'].min()
-
-                if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
-                    stragglers = True
-
-            if stragglers:
-                issue = 'Load imbalance of {:.2f}% detected'.format(
-                    abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider better distributing the data in the parallel file system' # needs to review what suggestion to give
-                    },
-                    {
-                        'message': 'Consider tuning how your data is distributed in the file system by changing the stripe size and count',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_TIME_IMBALANCE, TARGET_USER, HIGH, issue, recommendation)
-                )
-
-            # Get the individual files responsible for imbalance
-            imbalance = False
-
-            if df_posix['rank'].nunique() == 1:
-                df_detected = df_posix[~(df_posix['function'].str.contains('read'))]
-                
-                max_bytes_written = df_detected['size'].max()
-                min_bytes_written = df_detected['size'].min()
-
-                if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
-                    imbalance = True
-
-            if imbalance:
-                issue = 'Load imbalance of {:.2f}% detected'.format(
-                    abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider better balancing the data transfer between the application ranks'
-                    },
-                    {
-                        'message': 'Consider tuning the stripe size and count to better distribute the data',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                    },
-                    {
-                        'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
-                    },
-                    {
-                        'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-
-            imbalance = False
-
-            if df_posix['rank'].nunique() == 1:
-                df_detected = df_posix[(df_posix['function'].str.contains('read'))]
-                
-                max_bytes_read = df_detected['size'].max()
-                min_bytes_read = df_detected['size'].min()
-
-                if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
-                    imbalance = True   
-
-            if imbalance:
-                issue = 'Load imbalance of {:.2f}% detected'.format(
-                    abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
-                )
-
-                recommendation = [
-                    {
-                        'message': 'Consider better balancing the data transfer between the application ranks'
-                    },
-                    {
-                        'message': 'Consider tuning the stripe size and count to better distribute the data',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/lustre-striping.bash'), line_numbers=True, background_color='default')
-                    },
-                    {
-                        'message': 'If the application uses netCDF and HDF5 double-check the need to set NO_FILL values',
-                        'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/pnetcdf-hdf5-no-fill.c'), line_numbers=True, background_color='default')
-                    },
-                    {
-                        'message': 'If rank 0 is the only one opening the file, consider using MPI-IO collectives'
-                    }
-                ]
-
-                insights_operation.append(
-                    message(args, INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                )
-
-        #########################################################################################################################################################################
-
-        if df_intervals_temp['api'].eq('MPIIO').any():
-            df_mpiio = df_intervals_temp[(df_intervals_temp['api'] == 'MPIIO')]
-
-            df_mpiio_reads = df_mpiio[(df_mpiio['function'].str.contains('read'))]
-            mpiio_indp_reads = len(df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all'))])
-            mpiio_coll_reads = len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('_all'))])
-            total_mpiio_read_operations = mpiio_indp_reads + mpiio_coll_reads
-
-            df_mpiio_writes = df_mpiio[~(df_mpiio['function'].str.contains('read'))]
-            mpiio_indp_writes = len(df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all'))])
-            mpiio_coll_writes = len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('_all'))])
-            total_mpiio_write_operations = mpiio_indp_writes + mpiio_coll_writes
-
-            if mpiio_coll_reads == 0:
-                if total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                    issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
-                        mpiio_indp_reads,
-                        mpiio_indp_reads / (total_mpiio_read_operations) * 100
-                    )
-
-                    recommendation = [
-                        {
-                            'message': 'Use collective read operations (e.g. MPI_File_read_all() or MPI_File_read_at_all()) and set one aggregator per compute node',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-read.c'), line_numbers=True, background_color='default')
-                        }
-                    ]
-
-                    insights_operation.append(
-                        message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                    )
-            else:
-                issue = 'Application uses MPI-IO and read data using {} ({:.2f}%) collective operations'.format(
-                    mpiio_coll_reads,
-                    mpiio_coll_reads / total_mpiio_read_operations * 100
-                )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE, TARGET_DEVELOPER, OK, issue)
-                )
-
-            if mpiio_coll_writes == 0:
-                if total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
-                    issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
-                        mpiio_indp_writes,
-                        mpiio_indp_writes / (total_mpiio_write_operations) * 100
-                    )
-
-                    recommendation = [
-                        {
-                            'message': 'Use collective write operations (e.g. MPI_File_write_all() or MPI_File_write_at_all()) and set one aggregator per compute node',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-collective-write.c'), line_numbers=True, background_color='default')
-                        }
-                    ]
-
-                    insights_operation.append(
-                        message(args, INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation)
-                    )
-
-            else:
-                issue = 'Application uses MPI-IO and write data using {} ({:.2f}%) collective operations'.format(
-                    mpiio_coll_writes,
-                    mpiio_coll_writes / total_mpiio_write_operations * 100
-                )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE, TARGET_DEVELOPER, OK, issue)
-                )
-
-            #########################################################################################################################################################################
-
-            # Look for usage of non-block operations
-
-            # Look for HDF5 file extension
-
-            has_hdf5_extension = False
-
-            if fname.endswith('.h5') or fname.endswith('.hdf5'):
-                has_hdf5_extension = True
-
-            if len(df_mpiio_reads[(df_mpiio_reads['function'].str.contains('iread|begin|end'))]) == 0:
-                issue = 'Application could benefit from non-blocking (asynchronous) reads'
-
-                recommendation = []
-
-                if 'H5F' in modules or has_hdf5_extension:
-                    recommendation.append(
-                        {
-                            'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-read.c'), line_numbers=True, background_color='default')
-                        }
-                    )
-
-                if 'MPIIO' in modules:
-                    recommendation.append(
-                        {
-                            'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations', # (e.g., MPI_File_iread(), MPI_File_read_all_begin/end(), or MPI_File_read_at_all_begin/end())',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iread.c'), line_numbers=True, background_color='default')
-                        }
-                    )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_MPI_IO_BLOCKING_READ_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-                )
-
-            if len(df_mpiio_writes[(df_mpiio_writes['function'].str.contains('iwrite|begin|end'))]) == 0:
-                issue = 'Application could benefit from non-blocking (asynchronous) writes'
-
-                recommendation = []
-
-                if 'H5F' in modules or has_hdf5_extension:
-                    recommendation.append(
-                        {
-                            'message': 'Since you use HDF5, consider using the ASYNC I/O VOL connector (https://github.com/hpc-io/vol-async)',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/hdf5-vol-async-write.c'), line_numbers=True, background_color='default')
-                        }
-                    )
-
-                if 'MPIIO' in modules:
-                    recommendation.append(
-                        {
-                            'message': 'Since you use MPI-IO, consider non-blocking/asynchronous I/O operations',  # (e.g., MPI_File_iwrite(), MPI_File_write_all_begin/end(), or MPI_File_write_at_all_begin/end())',
-                            'sample': Syntax.from_path(os.path.join(ROOT, 'snippets/mpi-io-iwrite.c'), line_numbers=True, background_color='default')
-                        }
-                    )
-
-                insights_operation.append(
-                    message(args, INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE, TARGET_DEVELOPER, WARN, issue, recommendation)
-                )
-
-        #########################################################################################################################################################################
-
-        # Nodes and MPI-IO aggregators
-        # If the application uses collective reads or collective writes, look for the number of aggregators
-        # TODO:
-
-        #########################################################################################################################################################################
-        
-        NUMBER_OF_COMPUTE_NODES = 0
-
-        #########################################################################################################################################################################
-
-        codes = []
-        if args.json:
-            f = open(args.json)
-            data = json.load(f)
-
-            for key, values in data.items():
-                for value in values:
-                    code = value['code']
-                    codes.append(code)
-
-                    level = value['level']
-                    issue = value['issue']
-                    recommendation = []
-                    for rec in value['recommendations']:
-                        new_message = {'message': rec}
-                        recommendation.append(new_message)
-
-                    insights_dxt.append(
-                        message(args, code, TARGET_DEVELOPER, level, issue, recommendation)
-                    )
-
-        #########################################################################################################################################################################
-
-        insights_end_time = time.time()
-
-        console.print()
-
-        console.print(
-            Panel(
-                '\n'.join([
-                    ' [b]RECORDER[/b]:       [white]{}[/white]'.format(
-                        os.path.basename(args.log_path)
-                    ),
-                    ' [b]FILE[/b]:          [white]{} ({})[/white]'.format(
-                        fname,
-                        fid,
-                    ),
-                    # ' [b]COMPUTE NODES[/b]   [white]{}[/white]'.format(
-                    #     NUMBER_OF_COMPUTE_NODES
-                    # ),
-                    ' [b]PROCESSES[/b]       [white]{}[/white]'.format(
-                        reader.GM.total_ranks
-                    ),
-                ]),
-                title='[b][slate_blue3]DRISHTI[/slate_blue3] v.0.5[/b]',
-                title_align='left',
-                subtitle='[red][b]{} critical issues[/b][/red], [orange1][b]{} warnings[/b][/orange1], and [white][b]{} recommendations[/b][/white]'.format(
-                    insights_total[HIGH],
-                    insights_total[WARN],
-                    insights_total[RECOMMENDATIONS],
-                ),
-                subtitle_align='left',
-                padding=1
-            )
-        )
-
-        console.print()
-
-        if insights_metadata:
-            console.print(
-                Panel(
-                    Padding(
-                        Group(
-                            *insights_metadata
-                        ),
-                        (1, 1)
-                    ),
-                    title='METADATA',
-                    title_align='left'
-                )
-            )
-
-        if insights_operation:
-            console.print(
-                Panel(
-                    Padding(
-                        Group(
-                            *insights_operation
-                        ),
-                        (1, 1)
-                    ),
-                    title='OPERATIONS',
-                    title_align='left'
-                )
-            )
-
-        if insights_dxt:
-            console.print(
-                Panel(
-                    Padding(
-                        Group(
-                            *insights_dxt
-                        ),
-                        (1, 1)
-                    ),
-                    title='DXT',
-                    title_align='left'
-                )
-            )
-            
-        console.print(
-            Panel(
-                ' {} | [white]LBNL[/white] | [white]Drishti report generated at {} in[/white] {:.3f} seconds'.format(
-                    datetime.datetime.now().year,
-                    datetime.datetime.now(),
-                    insights_end_time - insights_start_time
-                ),
-                box=box.SIMPLE
-            )
-        )
-
-        if args.export_theme_light:
-            export_theme = TerminalTheme(
-                (255, 255, 255),
-                (0, 0, 0),
-                [
-                    (26, 26, 26),
-                    (244, 0, 95),
-                    (152, 224, 36),
-                    (253, 151, 31),
-                    (157, 101, 255),
-                    (244, 0, 95),
-                    (88, 209, 235),
-                    (120, 120, 120),
-                    (98, 94, 76),
-                ],
-                [
-                    (244, 0, 95),
-                    (152, 224, 36),
-                    (224, 213, 97),
-                    (157, 101, 255),
-                    (244, 0, 95),
-                    (88, 209, 235),
-                    (246, 246, 239),
-                ],
-            )
-        else:
-            export_theme = MONOKAI
-
-        if args.export_html:
-            console.save_html(
-                '{}.{}.html'.format(args.log_path, fid),
-                theme=export_theme,
-                clear=False
-            )
-
-        if args.export_svg:
-            console.save_svg(
-                '{}.{}.svg'.format(args.log_path, fid),
-                title='Drishti',
-                theme=export_theme,
-                clear=False
-            )
-
-        if args.export_csv:
-            issues = [
-                'JOB',
-                INSIGHTS_STDIO_HIGH_USAGE,
-                INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE,
-                INSIGHTS_POSIX_READ_COUNT_INTENSIVE,
-                INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE,
-                INSIGHTS_POSIX_READ_SIZE_INTENSIVE,
-                INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE,
-                INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE,
-                INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE,
-                INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE,
-                INSIGHTS_POSIX_REDUNDANT_READ_USAGE,
-                INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE,
-                INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE,
-                INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE,
-                INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE,
-                INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE,
-                INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE,
-                INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE,
-                INSIGHTS_POSIX_HIGH_METADATA_TIME,
-                INSIGHTS_POSIX_SIZE_IMBALANCE,
-                INSIGHTS_POSIX_TIME_IMBALANCE,
-                INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE,
-                INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE,
-                INSIGHTS_MPI_IO_NO_USAGE,
-                INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE,
-                INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE,
-                INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE,
-                INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE,
-                INSIGHTS_MPI_IO_BLOCKING_READ_USAGE,
-                INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE,
-                INSIGHTS_MPI_IO_AGGREGATORS_INTRA,
-                INSIGHTS_MPI_IO_AGGREGATORS_INTER,
-                INSIGHTS_MPI_IO_AGGREGATORS_OK
-            ]
-            if codes:
-                issues.extend(codes)
-
-            detected_issues = dict.fromkeys(issues, False)
-            detected_issues['JOB'] = None
-
-            for report in csv_report:
-                detected_issues[report] = True
-
-            filename = '{}.{}.summary.csv'.format(
-                args.log_path,
-                fid
-            )
-
-            with open(filename, 'w') as f:
-                w = csv.writer(f)
-                w.writerow(detected_issues.keys())
-                w.writerow(detected_issues.values())
-
-        
-
diff --git a/drishti/includes.py b/drishti/includes.py
deleted file mode 100644
index 3b921aa..0000000
--- a/drishti/includes.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-
-from rich.console import Console, Group
-from rich.padding import Padding
-from rich.panel import Panel
-
-
-RECOMMENDATIONS = 0
-HIGH = 1
-WARN = 2
-INFO = 3
-OK = 4
-
-ROOT = os.path.abspath(os.path.dirname(__file__))
-
-TARGET_USER = 1
-TARGET_DEVELOPER = 2
-TARGET_SYSTEM = 3
-
-insights_operation = []
-insights_metadata = []
-insights_dxt = []
-
-insights_total = dict()
-
-insights_total[HIGH] = 0
-insights_total[WARN] = 0
-insights_total[RECOMMENDATIONS] = 0
-
-THRESHOLD_OPERATION_IMBALANCE = 0.1
-THRESHOLD_SMALL_REQUESTS = 0.1
-THRESHOLD_SMALL_REQUESTS_ABSOLUTE = 1000
-THRESHOLD_MISALIGNED_REQUESTS = 0.1
-THRESHOLD_METADATA = 0.1
-THRESHOLD_METADATA_TIME_RANK = 30  # seconds
-THRESHOLD_RANDOM_OPERATIONS = 0.2
-THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE = 1000
-THRESHOLD_STRAGGLERS = 0.15
-THRESHOLD_IMBALANCE = 0.30
-THRESHOLD_INTERFACE_STDIO = 0.1
-THRESHOLD_COLLECTIVE_OPERATIONS = 0.5
-THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE = 1000
-THRESHOLD_SMALL_BYTES = 1048576 # 1 MB
-
-INSIGHTS_STDIO_HIGH_USAGE = 'S01'
-INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE = 'P01'
-INSIGHTS_POSIX_READ_COUNT_INTENSIVE = 'P02'
-INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE = 'P03'
-INSIGHTS_POSIX_READ_SIZE_INTENSIVE = 'P04'
-INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE = 'P05'
-INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_USAGE = 'P06'
-INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE = 'P07'
-INSIGHTS_POSIX_HIGH_MISALIGNED_FILE_USAGE = 'P08'
-INSIGHTS_POSIX_REDUNDANT_READ_USAGE = 'P09'
-INSIGHTS_POSIX_REDUNDANT_WRITE_USAGE = 'P10'
-INSIGHTS_POSIX_HIGH_RANDOM_READ_USAGE = 'P11'
-INSIGHTS_POSIX_HIGH_SEQUENTIAL_READ_USAGE = 'P12'
-INSIGHTS_POSIX_HIGH_RANDOM_WRITE_USAGE = 'P13'
-INSIGHTS_POSIX_HIGH_SEQUENTIAL_WRITE_USAGE = 'P14'
-INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE = 'P15'
-INSIGHTS_POSIX_HIGH_SMALL_WRITE_REQUESTS_SHARED_FILE_USAGE = 'P16'
-INSIGHTS_POSIX_HIGH_METADATA_TIME = 'P17'
-INSIGHTS_POSIX_SIZE_IMBALANCE = 'P18'
-INSIGHTS_POSIX_TIME_IMBALANCE = 'P19'
-INSIGHTS_POSIX_INDIVIDUAL_WRITE_SIZE_IMBALANCE = 'P21'
-INSIGHTS_POSIX_INDIVIDUAL_READ_SIZE_IMBALANCE = 'P22'
-INSIGHTS_MPI_IO_NO_USAGE = 'M01'
-INSIGHTS_MPI_IO_NO_COLLECTIVE_READ_USAGE = 'M02'
-INSIGHTS_MPI_IO_NO_COLLECTIVE_WRITE_USAGE = 'M03'
-INSIGHTS_MPI_IO_COLLECTIVE_READ_USAGE = 'M04'
-INSIGHTS_MPI_IO_COLLECTIVE_WRITE_USAGE = 'M05'
-INSIGHTS_MPI_IO_BLOCKING_READ_USAGE = 'M06'
-INSIGHTS_MPI_IO_BLOCKING_WRITE_USAGE = 'M07'
-INSIGHTS_MPI_IO_AGGREGATORS_INTRA = 'M08'
-INSIGHTS_MPI_IO_AGGREGATORS_INTER = 'M09'
-INSIGHTS_MPI_IO_AGGREGATORS_OK = 'M10'
-
-DETAILS_MAX_SIZE = 10
-
-# TODO: need to verify the threashold to be between 0 and 1
-# TODO: read thresholds from file
-
-
-console = Console(record=True)
-csv_report = []
-
-
-def init_console(args):
-    if args.export_size: console.width = int(args.export_size)
-
-    insights_operation.clear()
-    insights_metadata.clear()
-    insights_dxt.clear()
-
-    insights_total[HIGH] = 0
-    insights_total[WARN] = 0
-    insights_total[RECOMMENDATIONS] = 0
-
-    csv_report.clear()
-
-def validate_thresholds():
-    """
-    Validate thresholds defined by the user.
-    """
-    assert(THRESHOLD_OPERATION_IMBALANCE >= 0.0 and THRESHOLD_OPERATION_IMBALANCE <= 1.0)
-    assert(THRESHOLD_SMALL_REQUESTS >= 0.0 and THRESHOLD_SMALL_REQUESTS <= 1.0)
-    assert(THRESHOLD_MISALIGNED_REQUESTS >= 0.0 and THRESHOLD_MISALIGNED_REQUESTS <= 1.0)
-    assert(THRESHOLD_METADATA >= 0.0 and THRESHOLD_METADATA <= 1.0)
-    assert(THRESHOLD_RANDOM_OPERATIONS >= 0.0 and THRESHOLD_RANDOM_OPERATIONS <= 1.0)
-
-    assert(THRESHOLD_METADATA_TIME_RANK >= 0.0)
-
-
-def convert_bytes(bytes_number):
-    """
-    Convert bytes into formatted string.
-    """
-    tags = [
-        'bytes',
-        'KB',
-        'MB',
-        'GB',
-        'TB',
-        'PB',
-        'EB'
-    ]
-
-    i = 0
-    double_bytes = bytes_number
-
-    while (i < len(tags) and  bytes_number >= 1024):
-        double_bytes = bytes_number / 1024.0
-        i = i + 1
-        bytes_number = bytes_number / 1024
-
-    return str(round(double_bytes, 2)) + ' ' + tags[i] 
-
-
-def message(args, code, target, level, issue, recommendations=None, details=None):
-    """
-    Display the message on the screen with level, issue, and recommendation.
-    """
-    icon = ':arrow_forward:'
-
-    if level in (HIGH, WARN):
-        insights_total[level] += 1
-
-    if level == HIGH:
-        color = '[red]'
-    elif level == WARN:
-        color = '[orange1]'
-    elif level == OK:
-        color = '[green]'
-    else:
-        color = ''
-
-    messages = [
-        '{}{}{} {}'.format(
-            color,
-            icon,
-            ' [' + code + ']' if args.code else '',
-            issue
-        )
-    ]
-
-    if args.export_csv:
-        csv_report.append(code)
-
-    if details:
-        for detail in details[:DETAILS_MAX_SIZE]:
-            messages.append('  {}:left_arrow_curving_right: {}'.format(
-                    color,
-                    detail['message']
-                )
-            )
-
-    if recommendations:
-        if not args.only_issues:
-            messages.append('  [white]:left_arrow_curving_right: [b]Recommendations:[/b]')
-
-            for recommendation in recommendations:
-                messages.append('    :left_arrow_curving_right: {}'.format(recommendation['message']))
-
-                if args.verbose and 'sample' in recommendation:
-                    messages.append(
-                        Padding(
-                            Panel(
-                                recommendation['sample'],
-                                title='Solution Example Snippet',
-                                title_align='left',
-                                padding=(1, 2)
-                            ),
-                            (1, 0, 1, 7)
-                        )
-                    )
-
-        insights_total[RECOMMENDATIONS] += len(recommendations)
-
-    return Group(
-        *messages
-    )
diff --git a/drishti/reporter.py b/drishti/reporter.py
index ef92d11..54c7b17 100644
--- a/drishti/reporter.py
+++ b/drishti/reporter.py
@@ -6,6 +6,18 @@
 from .parser import *
 
 
+'''
+                         |- handler_darshan   -|
+                         |                     |
+reporter -> /handlers -> |- handler_recorder  -|   -| 
+                         |                     |    |    
+                         |- handler_xxx ...   -|    |
+    ________________________________________________|
+    |
+    |-----> /includes -> module -> config -> parser
+'''
+
+
 LOG_TYPE_DARSHAN = 0
 LOG_TYPE_RECORDER = 1
 
@@ -37,10 +49,7 @@ def main():
         from .handle_darshan import handler
 
     elif log_type == LOG_TYPE_RECORDER:
-        if args.split_files:
-            from .handle_recorder_split import handler
-        else:
-            from .handle_recorder import handler
+        from .handle_recorder import handler
     
     handler()
 

From 440cd5c57b92c707633a8725f419f0ab8856e110 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Sat, 16 Dec 2023 08:26:55 -0800
Subject: [PATCH 14/19] Reorg the file structure

---
 MANIFEST.in                                          |  2 +-
 drishti/{snippets => handlers}/__init__.py           |  0
 drishti/{ => handlers}/handle_darshan.py             |  2 +-
 drishti/{ => handlers}/handle_recorder.py            |  2 +-
 drishti/includes/__init__.py                         |  0
 drishti/{ => includes}/config.py                     |  2 +-
 drishti/{ => includes}/module.py                     |  2 +-
 drishti/{ => includes}/parser.py                     |  0
 drishti/includes/snippets/__init__.py                |  0
 drishti/{ => includes}/snippets/hdf5-alignment.c     |  0
 drishti/{ => includes}/snippets/hdf5-cache.c         |  0
 .../snippets/hdf5-collective-metadata.c              |  0
 .../{ => includes}/snippets/hdf5-vol-async-read.c    |  0
 .../{ => includes}/snippets/hdf5-vol-async-write.c   |  0
 drishti/{ => includes}/snippets/lustre-striping.bash |  0
 .../{ => includes}/snippets/mpi-io-collective-read.c |  0
 .../snippets/mpi-io-collective-write.c               |  0
 drishti/{ => includes}/snippets/mpi-io-hints.bash    |  0
 drishti/{ => includes}/snippets/mpi-io-iread.c       |  0
 drishti/{ => includes}/snippets/mpi-io-iwrite.c      |  0
 .../{ => includes}/snippets/pnetcdf-hdf5-no-fill.c   |  0
 drishti/reporter.py                                  |  6 +++---
 setup.py                                             | 12 +++++-------
 23 files changed, 13 insertions(+), 15 deletions(-)
 rename drishti/{snippets => handlers}/__init__.py (100%)
 rename drishti/{ => handlers}/handle_darshan.py (99%)
 rename drishti/{ => handlers}/handle_recorder.py (99%)
 create mode 100644 drishti/includes/__init__.py
 rename drishti/{ => includes}/config.py (99%)
 rename drishti/{ => includes}/module.py (99%)
 rename drishti/{ => includes}/parser.py (100%)
 create mode 100644 drishti/includes/snippets/__init__.py
 rename drishti/{ => includes}/snippets/hdf5-alignment.c (100%)
 rename drishti/{ => includes}/snippets/hdf5-cache.c (100%)
 rename drishti/{ => includes}/snippets/hdf5-collective-metadata.c (100%)
 rename drishti/{ => includes}/snippets/hdf5-vol-async-read.c (100%)
 rename drishti/{ => includes}/snippets/hdf5-vol-async-write.c (100%)
 rename drishti/{ => includes}/snippets/lustre-striping.bash (100%)
 rename drishti/{ => includes}/snippets/mpi-io-collective-read.c (100%)
 rename drishti/{ => includes}/snippets/mpi-io-collective-write.c (100%)
 rename drishti/{ => includes}/snippets/mpi-io-hints.bash (100%)
 rename drishti/{ => includes}/snippets/mpi-io-iread.c (100%)
 rename drishti/{ => includes}/snippets/mpi-io-iwrite.c (100%)
 rename drishti/{ => includes}/snippets/pnetcdf-hdf5-no-fill.c (100%)

diff --git a/MANIFEST.in b/MANIFEST.in
index f354c46..5517bcc 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,2 @@
 include requirements.txt
-include drishti/snippets/*
+include drishti/includes/snippets/*
diff --git a/drishti/snippets/__init__.py b/drishti/handlers/__init__.py
similarity index 100%
rename from drishti/snippets/__init__.py
rename to drishti/handlers/__init__.py
diff --git a/drishti/handle_darshan.py b/drishti/handlers/handle_darshan.py
similarity index 99%
rename from drishti/handle_darshan.py
rename to drishti/handlers/handle_darshan.py
index 98cc63b..e533358 100644
--- a/drishti/handle_darshan.py
+++ b/drishti/handlers/handle_darshan.py
@@ -12,7 +12,7 @@
 
 from rich import print
 from packaging import version
-from .module import *
+from drishti.includes.module import *
 
 
 def is_available(name):
diff --git a/drishti/handle_recorder.py b/drishti/handlers/handle_recorder.py
similarity index 99%
rename from drishti/handle_recorder.py
rename to drishti/handlers/handle_recorder.py
index a9af622..83b132d 100644
--- a/drishti/handle_recorder.py
+++ b/drishti/handlers/handle_recorder.py
@@ -5,7 +5,7 @@
 import pandas as pd
 from recorder_utils import RecorderReader
 from recorder_utils.build_offset_intervals import build_offset_intervals
-from .module import *
+from drishti.includes.module import *
 
 
 def get_accessed_files(reader):
diff --git a/drishti/includes/__init__.py b/drishti/includes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/drishti/config.py b/drishti/includes/config.py
similarity index 99%
rename from drishti/config.py
rename to drishti/includes/config.py
index e11a824..0041980 100644
--- a/drishti/config.py
+++ b/drishti/includes/config.py
@@ -9,7 +9,7 @@
 from rich.terminal_theme import TerminalTheme
 from rich.terminal_theme import MONOKAI
 
-from .parser import *
+from drishti.includes.parser import *
 
 
 RECOMMENDATIONS = 0
diff --git a/drishti/module.py b/drishti/includes/module.py
similarity index 99%
rename from drishti/module.py
rename to drishti/includes/module.py
index a75d574..ae02c2e 100644
--- a/drishti/module.py
+++ b/drishti/includes/module.py
@@ -4,7 +4,7 @@
 import csv
 from rich import box
 from rich.syntax import Syntax
-from .config import *
+from drishti.includes.config import *
 
 '''
 Before calling the functions below
diff --git a/drishti/parser.py b/drishti/includes/parser.py
similarity index 100%
rename from drishti/parser.py
rename to drishti/includes/parser.py
diff --git a/drishti/includes/snippets/__init__.py b/drishti/includes/snippets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/drishti/snippets/hdf5-alignment.c b/drishti/includes/snippets/hdf5-alignment.c
similarity index 100%
rename from drishti/snippets/hdf5-alignment.c
rename to drishti/includes/snippets/hdf5-alignment.c
diff --git a/drishti/snippets/hdf5-cache.c b/drishti/includes/snippets/hdf5-cache.c
similarity index 100%
rename from drishti/snippets/hdf5-cache.c
rename to drishti/includes/snippets/hdf5-cache.c
diff --git a/drishti/snippets/hdf5-collective-metadata.c b/drishti/includes/snippets/hdf5-collective-metadata.c
similarity index 100%
rename from drishti/snippets/hdf5-collective-metadata.c
rename to drishti/includes/snippets/hdf5-collective-metadata.c
diff --git a/drishti/snippets/hdf5-vol-async-read.c b/drishti/includes/snippets/hdf5-vol-async-read.c
similarity index 100%
rename from drishti/snippets/hdf5-vol-async-read.c
rename to drishti/includes/snippets/hdf5-vol-async-read.c
diff --git a/drishti/snippets/hdf5-vol-async-write.c b/drishti/includes/snippets/hdf5-vol-async-write.c
similarity index 100%
rename from drishti/snippets/hdf5-vol-async-write.c
rename to drishti/includes/snippets/hdf5-vol-async-write.c
diff --git a/drishti/snippets/lustre-striping.bash b/drishti/includes/snippets/lustre-striping.bash
similarity index 100%
rename from drishti/snippets/lustre-striping.bash
rename to drishti/includes/snippets/lustre-striping.bash
diff --git a/drishti/snippets/mpi-io-collective-read.c b/drishti/includes/snippets/mpi-io-collective-read.c
similarity index 100%
rename from drishti/snippets/mpi-io-collective-read.c
rename to drishti/includes/snippets/mpi-io-collective-read.c
diff --git a/drishti/snippets/mpi-io-collective-write.c b/drishti/includes/snippets/mpi-io-collective-write.c
similarity index 100%
rename from drishti/snippets/mpi-io-collective-write.c
rename to drishti/includes/snippets/mpi-io-collective-write.c
diff --git a/drishti/snippets/mpi-io-hints.bash b/drishti/includes/snippets/mpi-io-hints.bash
similarity index 100%
rename from drishti/snippets/mpi-io-hints.bash
rename to drishti/includes/snippets/mpi-io-hints.bash
diff --git a/drishti/snippets/mpi-io-iread.c b/drishti/includes/snippets/mpi-io-iread.c
similarity index 100%
rename from drishti/snippets/mpi-io-iread.c
rename to drishti/includes/snippets/mpi-io-iread.c
diff --git a/drishti/snippets/mpi-io-iwrite.c b/drishti/includes/snippets/mpi-io-iwrite.c
similarity index 100%
rename from drishti/snippets/mpi-io-iwrite.c
rename to drishti/includes/snippets/mpi-io-iwrite.c
diff --git a/drishti/snippets/pnetcdf-hdf5-no-fill.c b/drishti/includes/snippets/pnetcdf-hdf5-no-fill.c
similarity index 100%
rename from drishti/snippets/pnetcdf-hdf5-no-fill.c
rename to drishti/includes/snippets/pnetcdf-hdf5-no-fill.c
diff --git a/drishti/reporter.py b/drishti/reporter.py
index 54c7b17..8455040 100644
--- a/drishti/reporter.py
+++ b/drishti/reporter.py
@@ -3,7 +3,7 @@
 import os
 import sys
 from subprocess import call
-from .parser import *
+from drishti.includes.parser import *
 
 
 '''
@@ -46,10 +46,10 @@ def main():
     log_type = check_log_type(args.log_path)
     
     if log_type == LOG_TYPE_DARSHAN:
-        from .handle_darshan import handler
+        from drishti.handlers.handle_darshan import handler
 
     elif log_type == LOG_TYPE_RECORDER:
-        from .handle_recorder import handler
+        from drishti.handlers.handle_recorder import handler
     
     handler()
 
diff --git a/setup.py b/setup.py
index dd18cb6..e8c33d6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-import setuptools
+from setuptools import setup, find_packages
 
 with open("README.md", "r") as f:
     long_description = f.read()
@@ -6,7 +6,7 @@
 with open("requirements.txt") as f:
     requirements = f.readlines()
 
-setuptools.setup(
+setup(
     name="drishti-io",
     keywords="drishti",
     version="0.5",
@@ -23,12 +23,10 @@
         'rich ==12.5.1',
         'recorder-utils',
     ],
-    packages=[
-        'drishti'
-    ],
+    packages=find_packages(),
     package_data={
-        'drishti': [
-            'drishti/snippets/*'
+        'drishti.includes': [
+            'drishti/includes/snippets/*'
         ],
     },
     include_package_data=True,

From c304100c9a5006252cd6a15ca6d8313a590a31a3 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Thu, 18 Jan 2024 19:53:24 -0800
Subject: [PATCH 15/19] Add documentations

---
 drishti/includes/module.py | 263 ++++++++++++++++++++++++++++++++-----
 1 file changed, 230 insertions(+), 33 deletions(-)

diff --git a/drishti/includes/module.py b/drishti/includes/module.py
index ae02c2e..68e68dc 100644
--- a/drishti/includes/module.py
+++ b/drishti/includes/module.py
@@ -17,6 +17,15 @@
 # Basic usage check
 
 def check_stdio(total_size, total_size_stdio):
+    '''
+    Check whether the application has excessively utilized standard input/output operations
+
+    Parameters:
+        total_size: total I/O size
+        total_size_stdio: total STDIO size
+    
+    '''
+
     if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
         issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
             total_size_stdio / total_size * 100.0,
@@ -35,6 +44,13 @@ def check_stdio(total_size, total_size_stdio):
 
 
 def check_mpiio(modules):
+    '''
+    Check whether the application has used MPI-IO or not
+
+    Parameter:
+        modules: all different mudules been used in the application
+    '''
+
     if 'MPI-IO' not in modules:
         issue = 'Application is using low-performance interface'
 
@@ -54,6 +70,15 @@ def check_mpiio(modules):
 
 
 def check_operation_intensive(total_operations, total_reads, total_writes):
+    '''
+    Check whether the application is read or write intensive
+
+    Parameters:
+        total_operations: number of I/O operations been executed by the application
+        total_reads: number of read operations been executed by the application
+        total_writes: number of write operations been executed by the application
+    '''
+
     if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
         issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
             total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
@@ -74,6 +99,15 @@ def check_operation_intensive(total_operations, total_reads, total_writes):
 
 
 def check_size_intensive(total_size, total_read_size, total_written_size):
+    '''
+    Check whether the application is read size intensive or written size intensive
+
+    Parameters:
+        total_size: Total I/O size measured in byte
+        total_read_size: Input I/O size measured in byte
+        total_written_size: Output I/O size measured in byte
+    '''
+
     if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
         issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
             total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
@@ -93,12 +127,22 @@ def check_size_intensive(total_size, total_read_size, total_written_size):
         )
 
 
-'''
-detected_files required columns:
-['id', 'total_reads', 'total_writes']
-detected_files.loc[:, 'id'] = detected_files.loc[:, 'id'].astype(str)
-'''
 def check_small_operation(total_reads, total_reads_small, total_writes, total_writes_small, detected_files, modules, file_map):
+    '''
+    Check whether application has performed an excessive number of small operations
+
+    Parameters:
+        total_reads: number of read operations been executed by the application
+        total_reads_small: number of read operations that has small size
+        total_writes: number of write operations been executed by the application
+        total_writes_small: number of write operations that has small size
+        detected_files: 
+            total_reads and total_writes in each file
+            required columns: ['id', 'total_reads', 'total_writes']
+        modules: all different mudules been used in the application
+        file_map: file id and file name pairing
+    '''
+
     if total_reads_small and total_reads_small / total_reads > THRESHOLD_SMALL_REQUESTS and total_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
         issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
             total_reads_small, total_reads_small / total_reads * 100.0
@@ -189,6 +233,16 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
 
 
 def check_misaligned(total_operations, total_mem_not_aligned, total_file_not_aligned, modules):
+    '''
+    Check whether application has excessive misaligned operations
+
+    Parameters:
+        total_operations: number of I/O operations been executed by the application
+        total_mem_not_aligned: number of memory requests not aligned
+        total_file_not_aligned: number of file requests not aligned
+        modules: all different mudules been used in the application
+    '''
+
     if total_operations and total_mem_not_aligned / total_operations > THRESHOLD_MISALIGNED_REQUESTS:
         issue = 'Application has a high number ({:.2f}%) of misaligned memory requests'.format(
             total_mem_not_aligned / total_operations * 100.0
@@ -234,6 +288,16 @@ def check_misaligned(total_operations, total_mem_not_aligned, total_file_not_ali
 
 
 def check_traffic(max_read_offset, total_read_size, max_write_offset, total_written_size):
+    '''
+    Check whether application has redundant read or write traffic
+
+    Parameters:
+        max_read_offset: max offset application is reading from
+        total_read_size: total size application has been read
+        max_write_offset: max offset application is writing to
+        total_written_size: total size application has been written
+    '''
+
     if max_read_offset > total_read_size:
         issue = 'Application might have redundant read traffic (more data read than the highest offset)'
 
@@ -250,6 +314,21 @@ def check_traffic(max_read_offset, total_read_size, max_write_offset, total_writ
 
 
 def check_random_operation(read_consecutive, read_sequential, read_random, total_reads, write_consecutive, write_sequential, write_random, total_writes):
+    '''
+    Check whether application has performed excessive random operations
+
+    Parameters:
+        read_consecutive: number of consecutive read operations
+        read_sequential: number of sequential read operations
+        read_random: number of random read operations
+        total_read: number of read operations been executed by the application
+        write_consecutive: number of consecutive write operations
+        write_sequential: number of sequential write operations
+        write_random: number of random write operations
+        total_write: number of write operations been executed by the application
+    '''
+
+
     if total_reads:
         if read_random and read_random / total_reads > THRESHOLD_RANDOM_OPERATIONS and read_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
             issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
@@ -301,11 +380,21 @@ def check_random_operation(read_consecutive, read_sequential, read_random, total
             )
 
 
-''''
-The shared_file required columns:
-['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES']
-'''
 def check_shared_small_operation(total_shared_reads, total_shared_reads_small, total_shared_writes, total_shared_writes_small, shared_files, file_map):
+    '''
+    Check whether there are excessive small requests in shared files
+
+    Parameters:
+        total_shared_reads: total read operations in shared files
+        total_shared_reads_small: small read operations in shared files
+        total_shared_writes: total write operations in shared files
+        total_shared_writes_small: small write operations in shared files
+        shared_files:
+            small reads an small writes in each shared file
+            required columns: ['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES']
+        file_map: file id and file name pairing
+    '''
+
     if total_shared_reads and total_shared_reads_small / total_shared_reads > THRESHOLD_SMALL_REQUESTS and total_shared_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
         issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
             total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
@@ -368,6 +457,14 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
 
 
 def check_long_metadata(count_long_metadata, modules):
+    '''
+    Check how many ranks have metadata operations taking too long
+
+    Parameters:
+        count_long_metadata: number of ranks that have metadata operations taking too long
+        modules: all different mudules been used in the application
+    '''
+
     if count_long_metadata > 0:
         issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
             count_long_metadata, THRESHOLD_METADATA_TIME_RANK
@@ -396,11 +493,18 @@ def check_long_metadata(count_long_metadata, modules):
         )
 
 
-'''
-detected_files required columns:
-['id', 'data_imbalance']
-'''
 def check_shared_data_imblance(stragglers_count, detected_files, file_map):
+    '''
+    Check how many shared files containing data transfer imbalance
+
+    Parameters:
+        stragglers_count: number of shared files that contain data transfer imbalane
+        detected_files:
+            data imbalance per file
+            required columns: ['id', 'data_imbalance']
+        file_map: file id and file name pairing
+    '''
+
     if stragglers_count:
         issue = 'Detected data transfer imbalance caused by stragglers when accessing {} shared file.'.format(
             stragglers_count
@@ -434,6 +538,15 @@ def check_shared_data_imblance(stragglers_count, detected_files, file_map):
 
 
 def check_shared_data_imblance_split(slowest_rank_bytes, fastest_rank_bytes, total_transfer_size):
+    '''
+    Check whether the specific shared file contains data imbalance
+
+    Parameters:
+        slowest_rank_bytes: the total request size of the rank that takes the longest data operation time
+        fastest_rank_bytes: the total request size of the rank that takes the shortest data operation time
+        total_transfer_size: total request size of that specific shared file
+    '''
+
     if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
@@ -454,11 +567,18 @@ def check_shared_data_imblance_split(slowest_rank_bytes, fastest_rank_bytes, tot
         )
 
 
-'''
-detected_files required columns:
-['id', 'time_imbalance']
-'''
 def check_shared_time_imbalance(stragglers_count, detected_files, file_map):
+    '''
+    Check how many shared files containing time transfer imbalance
+
+    Parameters:
+        stragglers_count: number of shared files that contain time transfer imbalane
+        detected_files:
+            data imbalance per file
+            required columns: ['id', 'time_imbalance']
+        file_map: file id and file name pairing
+    '''
+
     if stragglers_count:
         issue = 'Detected time imbalance caused by stragglers when accessing {} shared file.'.format(
             stragglers_count
@@ -492,6 +612,15 @@ def check_shared_time_imbalance(stragglers_count, detected_files, file_map):
 
 
 def check_shared_time_imbalance_split(slowest_rank_time, fastest_rank_time, total_transfer_time):
+    '''
+    Check whether the specific shared file contains time imbalance
+
+    Parameters:
+        slowest_rank_bytes: the total request time of the rank that takes the longest data operation time
+        fastest_rank_bytes: the total request time of the rank that takes the shortest data operation time
+        total_transfer_size: total request time of that specific shared file
+    '''
+
     if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
@@ -512,11 +641,17 @@ def check_shared_time_imbalance_split(slowest_rank_time, fastest_rank_time, tota
         )
 
 
-'''
-detected_files required columns:
-['id', 'write_imbalance']
-'''
 def check_individual_write_imbalance(imbalance_count, detected_files, file_map):
+    '''
+    Check how many write imbalance when accessing individual files
+
+    Parameters:
+        imbalance_count: number of individual files that have write imbalance
+        detected_files:
+            write imbalance per file
+            required columns: ['id', 'write_imbalance']
+    '''
+
     if imbalance_count:
         issue = 'Detected write imbalance when accessing {} individual files'.format(
             imbalance_count
@@ -557,6 +692,14 @@ def check_individual_write_imbalance(imbalance_count, detected_files, file_map):
 
 
 def check_individual_write_imbalance_split(max_bytes_written, min_bytes_written):
+    '''
+    Check whether there is write imbalance in the specific individual file
+
+    Parameters:
+        max_bytes_written: max byte written in the file
+        min_bytes_written: minimum byte written in the file
+    '''
+
     if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
@@ -584,11 +727,17 @@ def check_individual_write_imbalance_split(max_bytes_written, min_bytes_written)
         )
 
 
-'''
-detected_files required columns:
-['id', 'read_imbalance']
-'''
 def check_individual_read_imbalance(imbalance_count, detected_files, file_map):
+    '''
+    Check how many read imbalance when accessing individual files
+
+    Parameters:
+        imbalance_count: number of individual files that have read imbalance
+        detected_files:
+            read imbalance per file
+            required columns: ['id', 'read_imbalance']
+    '''
+
     if imbalance_count:
         issue = 'Detected read imbalance when accessing {} individual files.'.format(
             imbalance_count
@@ -629,6 +778,14 @@ def check_individual_read_imbalance(imbalance_count, detected_files, file_map):
 
 
 def check_individual_read_imbalance_split(max_bytes_read, min_bytes_read):
+    '''
+    Check whether there is read imbalance in the specific individual file
+
+    Parameters:
+        max_bytes_written: max byte read in the file
+        min_bytes_written: minimum byte read in the file
+    '''
+
     if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
@@ -658,11 +815,21 @@ def check_individual_read_imbalance_split(max_bytes_read, min_bytes_read):
 
 # MPIIO level check
 
-'''
-detected_files required columns:
-['id', 'absolute_indep_reads', 'percent_indep_reads']
-'''
+
 def check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, total_mpiio_read_operations, detected_files, file_map):
+    '''
+    Check whether application uses collective mpi read calls
+
+    Parameters:
+        mpiio_coll_reads: number of mpiio read operations that are collective
+        mpiio_indep_reads: number of mpiio read operations that are independent
+        total_mpiio_read_operations: total mpiio read operations
+        detected_files:
+            independent read operations and percentage per file
+            required columns: ['id', 'absolute_indep_reads', 'percent_indep_reads']
+        file_map: file id and file name pairing
+    '''
+
     if mpiio_coll_reads == 0:
         if total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
             issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
@@ -704,11 +871,20 @@ def check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, tot
         )
 
 
-'''
-detected_files required columns:
-['id', 'absolute_indep_writes', 'percent_indep_writes']
-'''
 def check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes, total_mpiio_write_operations, detected_files, file_map):
+    '''
+    Check whether application uses collective mpi write calls
+
+    Parameters:
+        mpiio_coll_writes: number of mpiio write operations that are collective
+        mpiio_indep_writes: number of mpiio write operations that are independent
+        total_mpiio_write_operations: total mpiio write operations
+        detected_files:
+            independent write operations and percentage per file
+            required columns: ['id', 'absolute_indep_writes', 'percent_indep_writes']
+        file_map: file id and file name pairing
+    '''
+
     if mpiio_coll_writes == 0:
         if total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
             issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
@@ -751,6 +927,16 @@ def check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes,
 
 
 def check_mpi_none_block_operation(mpiio_nb_reads, mpiio_nb_writes, has_hdf5_extension, modules):
+    '''
+    Check whether application can benefit from non-blocking requests
+
+    Parameters:
+        mpiio_nb_reads: number of non-blocking mpi read operations
+        mpiio_nb_writes: number of non-blocking mpi write operations
+        has_hdf5_extension: boolean value of whether the file in in hdf5 extension
+        modules: all different mudules been used in the application
+    '''
+
     if mpiio_nb_reads == 0:
         issue = 'Application could benefit from non-blocking (asynchronous) reads'
 
@@ -803,6 +989,14 @@ def check_mpi_none_block_operation(mpiio_nb_reads, mpiio_nb_writes, has_hdf5_ext
 
 
 def check_mpi_aggregator(cb_nodes, NUMBER_OF_COMPUTE_NODES):
+    '''
+    Check whether application has used inter-node aggregators
+
+    Parameters:
+        cb_nodes: 
+        NUMBER_OF_COMPUTE_NODES:
+    '''
+
     if cb_nodes > NUMBER_OF_COMPUTE_NODES:
         issue = 'Application is using inter-node aggregators (which require network communication)'
 
@@ -893,6 +1087,9 @@ def display_footer(console, insights_start_time, insights_end_time):
     )
 
 def export_html(console, filename):
+    '''
+    '''
+
     if args.export_html:
         console.save_html(
             filename,

From e3cf3f52d15e770b34ba3bcd34df5bb2e3cc8782 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Fri, 19 Jan 2024 05:01:10 -0800
Subject: [PATCH 16/19] Enable thresholds configurations

---
 drishti/handlers/handle_darshan.py  |  1 -
 drishti/handlers/handle_recorder.py |  5 +--
 drishti/includes/config.py          | 67 ++++++++++++++++-------------
 drishti/includes/parser.py          |  7 +++
 4 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/drishti/handlers/handle_darshan.py b/drishti/handlers/handle_darshan.py
index e533358..c735731 100644
--- a/drishti/handlers/handle_darshan.py
+++ b/drishti/handlers/handle_darshan.py
@@ -72,7 +72,6 @@ def check_log_version(console, file, log_version, library_version):
 
 def handler():
     console = init_console()
-    validate_thresholds()
 
     insights_start_time = time.time()
 
diff --git a/drishti/handlers/handle_recorder.py b/drishti/handlers/handle_recorder.py
index 83b132d..8b0f7b7 100644
--- a/drishti/handlers/handle_recorder.py
+++ b/drishti/handlers/handle_recorder.py
@@ -76,7 +76,6 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
     insights_start_time = time.time()
 
     console = init_console()
-    validate_thresholds()
 
     modules = set(df_intervals['api'].unique())
     # Check usage of POSIX, and MPI-IO per file
@@ -433,7 +432,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 for id in file_map.keys():
                     indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
                     indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
-                    indep_total_count = indep_read_count + indep_write_count;
+                    indep_total_count = indep_read_count + indep_write_count
 
                     if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_read_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
                         detected_files.append([
@@ -453,7 +452,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 for id in file_map.keys():
                     indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
                     indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
-                    indep_total_count = indep_read_count + indep_write_count;
+                    indep_total_count = indep_read_count + indep_write_count
 
                     if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_write_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
                         detected_files.append([
diff --git a/drishti/includes/config.py b/drishti/includes/config.py
index 0041980..17f81b2 100644
--- a/drishti/includes/config.py
+++ b/drishti/includes/config.py
@@ -88,9 +88,6 @@
 csv_report = []
 codes = []
 
-# TODO: need to verify the threashold to be between 0 and 1
-# TODO: read thresholds from file
-
 
 def init_console():
     console = Console(record=True)
@@ -138,38 +135,47 @@ def set_export_theme():
 
 def load_json():
     codes = []
-    if args.json:
-        f = open(args.json)
-        data = json.load(f)
-
-        for key, values in data.items():
-            for value in values:
-                code = value['code']
-                codes.append(code)
-
-                level = value['level']
-                issue = value['issue']
-                recommendation = []
-                for rec in value['recommendations']:
-                    new_message = {'message': rec}
-                    recommendation.append(new_message)
-
-                insights_dxt.append(
-                    message(code, TARGET_DEVELOPER, level, issue, recommendation)
-                )
+    if not args.split_files:
+        if args.json:
+            f = open(args.json)
+            data = json.load(f)
+
+            for key, values in data.items():
+                for value in values:
+                    code = value['code']
+                    codes.append(code)
+
+                    level = value['level']
+                    issue = value['issue']
+                    recommendation = []
+                    for rec in value['recommendations']:
+                        new_message = {'message': rec}
+                        recommendation.append(new_message)
+
+                    insights_dxt.append(
+                        message(code, TARGET_DEVELOPER, level, issue, recommendation)
+                    )
 
 
 def validate_thresholds():
     """
     Validate thresholds defined by the user.
     """
-    assert(THRESHOLD_OPERATION_IMBALANCE >= 0.0 and THRESHOLD_OPERATION_IMBALANCE <= 1.0)
-    assert(THRESHOLD_SMALL_REQUESTS >= 0.0 and THRESHOLD_SMALL_REQUESTS <= 1.0)
-    assert(THRESHOLD_MISALIGNED_REQUESTS >= 0.0 and THRESHOLD_MISALIGNED_REQUESTS <= 1.0)
-    assert(THRESHOLD_METADATA >= 0.0 and THRESHOLD_METADATA <= 1.0)
-    assert(THRESHOLD_RANDOM_OPERATIONS >= 0.0 and THRESHOLD_RANDOM_OPERATIONS <= 1.0)
+    if args.config:
+        f = open(args.config)
+        data = json.load(f)
+
+        for category, thresholds_spec in data.items():
+            for threshold_name, threshold_value in thresholds_spec.items():
+                globals()[threshold_name] = threshold_value
 
-    assert(THRESHOLD_METADATA_TIME_RANK >= 0.0)
+        assert(THRESHOLD_OPERATION_IMBALANCE >= 0.0 and THRESHOLD_OPERATION_IMBALANCE <= 1.0)
+        assert(THRESHOLD_SMALL_REQUESTS >= 0.0 and THRESHOLD_SMALL_REQUESTS <= 1.0)
+        assert(THRESHOLD_MISALIGNED_REQUESTS >= 0.0 and THRESHOLD_MISALIGNED_REQUESTS <= 1.0)
+        assert(THRESHOLD_METADATA >= 0.0 and THRESHOLD_METADATA <= 1.0)
+        assert(THRESHOLD_RANDOM_OPERATIONS >= 0.0 and THRESHOLD_RANDOM_OPERATIONS <= 1.0)
+
+        assert(THRESHOLD_METADATA_TIME_RANK >= 0.0)
 
 
 def convert_bytes(bytes_number):
@@ -265,6 +271,5 @@ def message(code, target, level, issue, recommendations=None, details=None):
 '''
 Pre-load
 '''
-if not args.split_files:
-    load_json()
-
+load_json()
+validate_thresholds()
diff --git a/drishti/includes/parser.py b/drishti/includes/parser.py
index 0261312..7ddfdd6 100644
--- a/drishti/includes/parser.py
+++ b/drishti/includes/parser.py
@@ -95,4 +95,11 @@
     help='Split the files and generate report for each file'
 )
 
+parser.add_argument(
+    '--config',
+    default=False,
+    dest='config',
+    help='Enable thresholds read from json file'
+)
+
 args = parser.parse_args()

From ba39b12a19d5540ebd38bc8351368b8895bc0ad1 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Fri, 19 Jan 2024 10:20:36 -0800
Subject: [PATCH 17/19] Use cache to speed up Recorder log parsing process

---
 drishti/handlers/handle_recorder.py | 56 +++++++++++++++++++----------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/drishti/handlers/handle_recorder.py b/drishti/handlers/handle_recorder.py
index 8b0f7b7..c4ffd2d 100644
--- a/drishti/handlers/handle_recorder.py
+++ b/drishti/handlers/handle_recorder.py
@@ -41,26 +41,46 @@ def init_df_posix_recordes(reader):
 
 
 def handler():
-    reader = RecorderReader(args.log_path)
-    df_intervals = build_offset_intervals(reader)
-    df_posix_records = init_df_posix_recordes(reader)
+    df_intervals = None
+    df_posix_records = None
+    df_file_map = None
+    file_map = None
+
+    if os.path.exists(args.log_path + '.intervals.csv') and os.path.exists(args.log_path + '.records.csv') and os.path.exists(args.log_path + '.filemap.csv'):
+        print('using existing parsed log file')
+        df_intervals = pd.read_csv(args.log_path + '.intervals.csv')
+        df_posix_records = pd.read_csv(args.log_path + '.records.csv')
+        df_file_map = pd.read_csv(args.log_path + '.filemap.csv')
+        file_map = {}
+        for index, row in df_file_map.iterrows():
+            file_map[row['file_id']] = row['file_name']
+    else:
+        reader = RecorderReader(args.log_path)
+        df_intervals = build_offset_intervals(reader)
+        df_posix_records = init_df_posix_recordes(reader)
+
+        file_map = get_accessed_files(reader)
+
+        def add_api(row):
+            if 'MPI' in row['function']:
+                return 'MPI-IO'
+            elif 'H5' in row['function']:
+                return 'H5F'
+            else:
+                return 'POSIX'
+
+        def add_duration(row):
+            return row['end'] - row['start']
+        
+        df_intervals['api'] = df_intervals.apply(add_api, axis=1)
+        df_intervals['duration'] = df_intervals.apply(add_duration, axis=1)
+        df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1)
 
-    file_map = get_accessed_files(reader)
+        df_intervals.to_csv(args.log_path + '.intervals.csv', mode='w', index=False, header=True)
+        df_posix_records.to_csv(args.log_path + '.records.csv', mode='w', index=False, header=True)
 
-    def add_api(row):
-        if 'MPI' in row['function']:
-            return 'MPI-IO'
-        elif 'H5' in row['function']:
-            return 'H5F'
-        else:
-            return 'POSIX'
-
-    def add_duration(row):
-        return row['end'] - row['start']
-    
-    df_intervals['api'] = df_intervals.apply(add_api, axis=1)
-    df_intervals['duration'] = df_intervals.apply(add_duration, axis=1)
-    df_posix_records['duration'] = df_posix_records.apply(add_duration, axis=1)
+        df_file_map = pd.DataFrame(list(file_map.items()), columns=['file_id', 'file_name'])
+        df_file_map.to_csv(args.log_path + '.filemap.csv', mode='w', index=False, header=True)
 
     if args.split_files:
         for fid in file_map:

From 872d801421b6287268ab62573cc8ceb52da13674 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Fri, 26 Jan 2024 04:25:06 -0800
Subject: [PATCH 18/19] Rename thresholds

---
 drishti/handlers/handle_darshan.py  | 22 ++++++-------
 drishti/handlers/handle_recorder.py | 36 +++++++++++-----------
 drishti/includes/config.py          | 38 +++++++++++------------
 drishti/includes/module.py          | 48 ++++++++++++++---------------
 4 files changed, 71 insertions(+), 73 deletions(-)

diff --git a/drishti/handlers/handle_darshan.py b/drishti/handlers/handle_darshan.py
index c735731..775a838 100644
--- a/drishti/handlers/handle_darshan.py
+++ b/drishti/handlers/handle_darshan.py
@@ -354,7 +354,7 @@ def handler():
 
         #########################################################################################################################################################################
 
-        count_long_metadata = len(df['fcounters'][(df['fcounters']['POSIX_F_META_TIME'] > THRESHOLD_METADATA_TIME_RANK)])
+        count_long_metadata = len(df['fcounters'][(df['fcounters']['POSIX_F_META_TIME'] > metadata_time_rank)])
 
         check_long_metadata(count_long_metadata, modules)
 
@@ -375,7 +375,7 @@ def handler():
         for index, row in shared_files.iterrows():
             total_transfer_size = row['POSIX_BYTES_WRITTEN'] + row['POSIX_BYTES_READ']
 
-            if total_transfer_size and abs(row['POSIX_SLOWEST_RANK_BYTES'] - row['POSIX_FASTEST_RANK_BYTES']) / total_transfer_size > THRESHOLD_STRAGGLERS:
+            if total_transfer_size and abs(row['POSIX_SLOWEST_RANK_BYTES'] - row['POSIX_FASTEST_RANK_BYTES']) / total_transfer_size > imbalance_stragglers:
                 stragglers_count += 1
 
                 detected_files.append([
@@ -403,7 +403,7 @@ def handler():
         for index, row in shared_files_times.iterrows():
             total_transfer_time = row['POSIX_F_WRITE_TIME'] + row['POSIX_F_READ_TIME'] + row['POSIX_F_META_TIME']
 
-            if total_transfer_time and abs(row['POSIX_F_SLOWEST_RANK_TIME'] - row['POSIX_F_FASTEST_RANK_TIME']) / total_transfer_time > THRESHOLD_STRAGGLERS:
+            if total_transfer_time and abs(row['POSIX_F_SLOWEST_RANK_TIME'] - row['POSIX_F_FASTEST_RANK_TIME']) / total_transfer_time > imbalance_stragglers:
                 stragglers_count += 1
 
                 detected_files.append([
@@ -432,7 +432,7 @@ def handler():
         detected_files = []
 
         for index, row in aggregated.iterrows():
-            if row['POSIX_BYTES_WRITTEN_max'] and abs(row['POSIX_BYTES_WRITTEN_max'] - row['POSIX_BYTES_WRITTEN_min']) / row['POSIX_BYTES_WRITTEN_max'] > THRESHOLD_IMBALANCE:
+            if row['POSIX_BYTES_WRITTEN_max'] and abs(row['POSIX_BYTES_WRITTEN_max'] - row['POSIX_BYTES_WRITTEN_min']) / row['POSIX_BYTES_WRITTEN_max'] > imbalance_size:
                 imbalance_count += 1
 
                 detected_files.append([
@@ -448,7 +448,7 @@ def handler():
         detected_files = []
 
         for index, row in aggregated.iterrows():
-            if row['POSIX_BYTES_READ_max'] and abs(row['POSIX_BYTES_READ_max'] - row['POSIX_BYTES_READ_min']) / row['POSIX_BYTES_READ_max'] > THRESHOLD_IMBALANCE:
+            if row['POSIX_BYTES_READ_max'] and abs(row['POSIX_BYTES_READ_max'] - row['POSIX_BYTES_READ_min']) / row['POSIX_BYTES_READ_max'] > imbalance_size:
                 imbalance_count += 1
 
                 detected_files.append([
@@ -478,12 +478,12 @@ def handler():
         mpiio_indep_reads = df_mpiio['counters']['MPIIO_INDEP_READS'].sum()
 
         detected_files = []
-        if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+        if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > collective_operations_absolute:
             files = pd.DataFrame(df_mpiio_collective_reads.groupby('id').sum()).reset_index()
             for index, row in df_mpiio_collective_reads.iterrows():
                 if ((row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and
-                    row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS and
-                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE):
+                    row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > collective_operations and
+                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > collective_operations_absolute):
 
                     detected_files.append([
                         row['id'], row['MPIIO_INDEP_READS'], row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100
@@ -502,13 +502,13 @@ def handler():
         mpiio_indep_writes = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum()
 
         detected_files = []
-        if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+        if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > collective_operations_absolute:
             files = pd.DataFrame(df_mpiio_collective_writes.groupby('id').sum()).reset_index()
 
             for index, row in df_mpiio_collective_writes.iterrows():
                 if ((row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and 
-                    row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS and 
-                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE):
+                    row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > collective_operations and 
+                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > collective_operations_absolute):
 
                     detected_files.append([
                         row['id'], row['MPIIO_INDEP_WRITES'], row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100
diff --git a/drishti/handlers/handle_recorder.py b/drishti/handlers/handle_recorder.py
index c4ffd2d..0007d11 100644
--- a/drishti/handlers/handle_recorder.py
+++ b/drishti/handlers/handle_recorder.py
@@ -47,7 +47,7 @@ def handler():
     file_map = None
 
     if os.path.exists(args.log_path + '.intervals.csv') and os.path.exists(args.log_path + '.records.csv') and os.path.exists(args.log_path + '.filemap.csv'):
-        print('using existing parsed log file')
+        print('Using existing parsed log file')
         df_intervals = pd.read_csv(args.log_path + '.intervals.csv')
         df_posix_records = pd.read_csv(args.log_path + '.records.csv')
         df_file_map = pd.read_csv(args.log_path + '.filemap.csv')
@@ -174,16 +174,16 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
 
         # Get the number of small I/O operations (less than 1 MB)
 
-        total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-        total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+        total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < small_bytes)])
+        total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < small_bytes)])
 
         if args.split_files:
             detected_files = pd.DataFrame()
         else:
             detected_files = []
             for id in file_map.keys():
-                read_cnt = len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
-                write_cnt = len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                read_cnt = len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < small_bytes)])
+                write_cnt = len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < small_bytes)])
                 detected_files.append([id, read_cnt, write_cnt])
 
             column_names = ['id', 'total_reads', 'total_writes']
@@ -258,12 +258,12 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
         total_shared_reads = len(df_posix[(df_posix['file_id'].isin(shared_files)) & (df_posix['function'].str.contains('read'))])
         total_shared_reads_small = len(df_posix[(df_posix['file_id'].isin(shared_files)) 
                                     & (df_posix['function'].str.contains('read')) 
-                                    & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                                    & (df_posix['size'] < small_bytes)])
         
         total_shared_writes = len(df_posix[(df_posix['file_id'].isin(shared_files)) & ~(df_posix['function'].str.contains('read'))])
         total_shared_writes_small = len(df_posix[(df_posix['file_id'].isin(shared_files)) 
                                     & ~(df_posix['function'].str.contains('read')) 
-                                    & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                                    & (df_posix['size'] < small_bytes)])
 
         if args.split_files:
             detected_files = pd.DataFrame()
@@ -272,10 +272,10 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
             for id in shared_files:
                 read_cnt = len(df_posix[(df_posix['file_id'] == id) 
                                         & (df_posix['function'].str.contains('read')) 
-                                        & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                                        & (df_posix['size'] < small_bytes)])
                 write_cnt = len(df_posix[(df_posix['file_id'] == id) 
                                         & ~(df_posix['function'].str.contains('read')) 
-                                        & (df_posix['size'] < THRESHOLD_SMALL_BYTES)])
+                                        & (df_posix['size'] < small_bytes)])
                 detected_files.append([id, read_cnt, write_cnt])
             
             column_names = ['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES']
@@ -287,7 +287,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
 
         # TODO: Assumed metadata operations: open, close, sync, create, seek
         df_detected = df_posix_records.groupby('rank')['duration'].sum().reset_index()
-        count_long_metadata = len(df_detected[(df_detected['duration'] > THRESHOLD_METADATA_TIME_RANK)])
+        count_long_metadata = len(df_detected[(df_detected['duration'] > metadata_time_rank)])
 
         check_long_metadata(count_long_metadata, modules)
   
@@ -318,7 +318,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size']
                 fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size']
 
-                if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
+                if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > imbalance_stragglers:
                     stragglers_count += 1
 
                     detected_files.append([
@@ -356,7 +356,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 slowest_rank_time = df_detected['duration'].max()
                 fastest_rank_time = df_detected['duration'].min()
 
-                if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
+                if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > imbalance_stragglers:
                     stragglers_count += 1
 
                     detected_files.append([
@@ -396,7 +396,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 max_bytes_written = df_detected['size'].max()
                 min_bytes_written = df_detected['size'].min()
 
-                if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
+                if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > imbalance_size:
                     imbalance_count += 1
 
                     detected_files.append([
@@ -417,7 +417,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 max_bytes_read = df_detected['size'].max()
                 min_bytes_read = df_detected['size'].min()
 
-                if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
+                if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > imbalance_size:
                     imbalance_count += 1
 
                     detected_files.append([
@@ -448,13 +448,13 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
             detected_files = pd.DataFrame()
         else:
             detected_files = []
-            if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+            if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > collective_operations_absolute:
                 for id in file_map.keys():
                     indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
                     indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
                     indep_total_count = indep_read_count + indep_write_count
 
-                    if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_read_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
+                    if (indep_total_count > collective_operations_absolute and indep_read_count / indep_total_count > collective_operations):
                         detected_files.append([
                             id, indep_read_count, indep_read_count / indep_total_count * 100
                         ])
@@ -468,13 +468,13 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
             detected_files = pd.DataFrame()
         else:
             detected_files = []
-            if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+            if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > collective_operations_absolute:
                 for id in file_map.keys():
                     indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
                     indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
                     indep_total_count = indep_read_count + indep_write_count
 
-                    if (indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE and indep_write_count / indep_total_count > THRESHOLD_COLLECTIVE_OPERATIONS):
+                    if (indep_total_count > collective_operations_absolute and indep_write_count / indep_total_count > collective_operations):
                         detected_files.append([
                             id, indep_write_count, indep_write_count / indep_total_count * 100
                         ])
diff --git a/drishti/includes/config.py b/drishti/includes/config.py
index 17f81b2..f362dc2 100644
--- a/drishti/includes/config.py
+++ b/drishti/includes/config.py
@@ -34,20 +34,19 @@
 insights_total[WARN] = 0
 insights_total[RECOMMENDATIONS] = 0
 
-THRESHOLD_OPERATION_IMBALANCE = 0.1
-THRESHOLD_SMALL_REQUESTS = 0.1
-THRESHOLD_SMALL_REQUESTS_ABSOLUTE = 1000
-THRESHOLD_MISALIGNED_REQUESTS = 0.1
-THRESHOLD_METADATA = 0.1
-THRESHOLD_METADATA_TIME_RANK = 30  # seconds
-THRESHOLD_RANDOM_OPERATIONS = 0.2
-THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE = 1000
-THRESHOLD_STRAGGLERS = 0.15
-THRESHOLD_IMBALANCE = 0.30
-THRESHOLD_INTERFACE_STDIO = 0.1
-THRESHOLD_COLLECTIVE_OPERATIONS = 0.5
-THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE = 1000
-THRESHOLD_SMALL_BYTES = 1048576 # 1 MB
+imbalance_operations = 0.1
+small_bytes = 1048576 # 1MB
+small_requests = 0.1
+small_requests_absolute = 1000
+misaligned_requests = 0.1
+metadata_time_rank = 30 # seconds
+random_operations = 0.2
+random_operations_absolute = 1000
+imbalance_stragglers = 0.15
+imbalance_size = 0.30
+interface_stdio = 0.1
+collective_operations = 0.5
+collective_operations_absolute = 1000
 
 INSIGHTS_STDIO_HIGH_USAGE = 'S01'
 INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE = 'P01'
@@ -169,13 +168,12 @@ def validate_thresholds():
             for threshold_name, threshold_value in thresholds_spec.items():
                 globals()[threshold_name] = threshold_value
 
-        assert(THRESHOLD_OPERATION_IMBALANCE >= 0.0 and THRESHOLD_OPERATION_IMBALANCE <= 1.0)
-        assert(THRESHOLD_SMALL_REQUESTS >= 0.0 and THRESHOLD_SMALL_REQUESTS <= 1.0)
-        assert(THRESHOLD_MISALIGNED_REQUESTS >= 0.0 and THRESHOLD_MISALIGNED_REQUESTS <= 1.0)
-        assert(THRESHOLD_METADATA >= 0.0 and THRESHOLD_METADATA <= 1.0)
-        assert(THRESHOLD_RANDOM_OPERATIONS >= 0.0 and THRESHOLD_RANDOM_OPERATIONS <= 1.0)
+        assert(imbalance_operations >= 0.0 and imbalance_operations <= 1.0)
+        assert(small_requests >= 0.0 and small_requests <= 1.0)
+        assert(misaligned_requests >= 0.0 and misaligned_requests <= 1.0)
+        assert(random_operations >= 0.0 and random_operations <= 1.0)
 
-        assert(THRESHOLD_METADATA_TIME_RANK >= 0.0)
+        assert(metadata_time_rank >= 0.0)
 
 
 def convert_bytes(bytes_number):
diff --git a/drishti/includes/module.py b/drishti/includes/module.py
index 68e68dc..cf90530 100644
--- a/drishti/includes/module.py
+++ b/drishti/includes/module.py
@@ -26,7 +26,7 @@ def check_stdio(total_size, total_size_stdio):
     
     '''
 
-    if total_size and total_size_stdio / total_size > THRESHOLD_INTERFACE_STDIO:
+    if total_size and total_size_stdio / total_size > interface_stdio:
         issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
             total_size_stdio / total_size * 100.0,
             convert_bytes(total_size_stdio)
@@ -79,7 +79,7 @@ def check_operation_intensive(total_operations, total_reads, total_writes):
         total_writes: number of write operations been executed by the application
     '''
 
-    if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
+    if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > imbalance_operations:
         issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
             total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
         )
@@ -88,7 +88,7 @@ def check_operation_intensive(total_operations, total_reads, total_writes):
             message(INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
         )
 
-    if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > THRESHOLD_OPERATION_IMBALANCE:
+    if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > imbalance_operations:
         issue = 'Application is read operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
             total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
         )
@@ -108,7 +108,7 @@ def check_size_intensive(total_size, total_read_size, total_written_size):
         total_written_size: Output I/O size measured in byte
     '''
 
-    if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
+    if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > imbalance_operations:
         issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
             total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
         )
@@ -117,7 +117,7 @@ def check_size_intensive(total_size, total_read_size, total_written_size):
             message(INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
         )
 
-    if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > THRESHOLD_OPERATION_IMBALANCE:
+    if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > imbalance_operations:
         issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
             total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
         )
@@ -143,7 +143,7 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
         file_map: file id and file name pairing
     '''
 
-    if total_reads_small and total_reads_small / total_reads > THRESHOLD_SMALL_REQUESTS and total_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+    if total_reads_small and total_reads_small / total_reads > small_requests and total_reads_small > small_requests_absolute:
         issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
             total_reads_small, total_reads_small / total_reads * 100.0
         )
@@ -152,7 +152,7 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
         recommendation = []
 
         for index, row in detected_files.iterrows():
-            if row['total_reads'] > (total_reads * THRESHOLD_SMALL_REQUESTS / 2):
+            if row['total_reads'] > (total_reads * small_requests / 2):
                 detail.append(
                     {
                         'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
@@ -187,7 +187,7 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
             message(INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
         )
 
-    if total_writes_small and total_writes_small / total_writes > THRESHOLD_SMALL_REQUESTS and total_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+    if total_writes_small and total_writes_small / total_writes > small_requests and total_writes_small > small_requests_absolute:
         issue = 'Application issues a high number ({}) of small write requests (i.e., < 1MB) which represents {:.2f}% of all write requests'.format(
             total_writes_small, total_writes_small / total_writes * 100.0
         )
@@ -196,7 +196,7 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
         recommendation = []
 
         for index, row in detected_files.iterrows():
-            if row['total_writes'] > (total_writes * THRESHOLD_SMALL_REQUESTS / 2):
+            if row['total_writes'] > (total_writes * small_requests / 2):
                 detail.append(
                     {
                         'message': '{} ({:.2f}%) small write requests are to "{}"'.format(
@@ -243,7 +243,7 @@ def check_misaligned(total_operations, total_mem_not_aligned, total_file_not_ali
         modules: all different mudules been used in the application
     '''
 
-    if total_operations and total_mem_not_aligned / total_operations > THRESHOLD_MISALIGNED_REQUESTS:
+    if total_operations and total_mem_not_aligned / total_operations > misaligned_requests:
         issue = 'Application has a high number ({:.2f}%) of misaligned memory requests'.format(
             total_mem_not_aligned / total_operations * 100.0
         )
@@ -252,7 +252,7 @@ def check_misaligned(total_operations, total_mem_not_aligned, total_file_not_ali
             message(INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE, TARGET_DEVELOPER, HIGH, issue, None)
         )
 
-    if total_operations and total_file_not_aligned / total_operations > THRESHOLD_MISALIGNED_REQUESTS:
+    if total_operations and total_file_not_aligned / total_operations > misaligned_requests:
         issue = 'Application issues a high number ({:.2f}%) of misaligned file requests'.format(
             total_file_not_aligned / total_operations * 100.0
         )
@@ -330,7 +330,7 @@ def check_random_operation(read_consecutive, read_sequential, read_random, total
 
 
     if total_reads:
-        if read_random and read_random / total_reads > THRESHOLD_RANDOM_OPERATIONS and read_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
+        if read_random and read_random / total_reads > random_operations and read_random > random_operations_absolute:
             issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
                 read_random, read_random / total_reads * 100.0
             )
@@ -355,7 +355,7 @@ def check_random_operation(read_consecutive, read_sequential, read_random, total
             )
 
     if total_writes:
-        if write_random and write_random / total_writes > THRESHOLD_RANDOM_OPERATIONS and write_random > THRESHOLD_RANDOM_OPERATIONS_ABSOLUTE:
+        if write_random and write_random / total_writes > random_operations and write_random > random_operations_absolute:
             issue = 'Application is issuing a high number ({}) of random write operations ({:.2f}%)'.format(
                 write_random, write_random / total_writes * 100.0
             )
@@ -395,7 +395,7 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
         file_map: file id and file name pairing
     '''
 
-    if total_shared_reads and total_shared_reads_small / total_shared_reads > THRESHOLD_SMALL_REQUESTS and total_shared_reads_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+    if total_shared_reads and total_shared_reads_small / total_shared_reads > small_requests and total_shared_reads_small > small_requests_absolute:
         issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
             total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
         )
@@ -403,7 +403,7 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
         detail = []
 
         for index, row in shared_files.iterrows():
-            if row['INSIGHTS_POSIX_SMALL_READS'] > (total_shared_reads * THRESHOLD_SMALL_REQUESTS / 2):
+            if row['INSIGHTS_POSIX_SMALL_READS'] > (total_shared_reads * small_requests / 2):
                 detail.append(
                     {
                         'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
@@ -425,7 +425,7 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
             message(INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
         )
 
-    if total_shared_writes and total_shared_writes_small / total_shared_writes > THRESHOLD_SMALL_REQUESTS and total_shared_writes_small > THRESHOLD_SMALL_REQUESTS_ABSOLUTE:
+    if total_shared_writes and total_shared_writes_small / total_shared_writes > small_requests and total_shared_writes_small > small_requests_absolute:
         issue = 'Application issues a high number ({}) of small write requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file write requests'.format(
             total_shared_writes_small, total_shared_writes_small / total_shared_writes * 100.0
         )
@@ -433,7 +433,7 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
         detail = []
 
         for index, row in shared_files.iterrows():
-            if row['INSIGHTS_POSIX_SMALL_WRITES'] > (total_shared_writes * THRESHOLD_SMALL_REQUESTS / 2):
+            if row['INSIGHTS_POSIX_SMALL_WRITES'] > (total_shared_writes * small_requests / 2):
                 detail.append(
                     {
                         'message': '{} ({:.2f}%) small writes requests are to "{}"'.format(
@@ -467,7 +467,7 @@ def check_long_metadata(count_long_metadata, modules):
 
     if count_long_metadata > 0:
         issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
-            count_long_metadata, THRESHOLD_METADATA_TIME_RANK
+            count_long_metadata, metadata_time_rank
         )
 
         recommendation = [
@@ -547,7 +547,7 @@ def check_shared_data_imblance_split(slowest_rank_bytes, fastest_rank_bytes, tot
         total_transfer_size: total request size of that specific shared file
     '''
 
-    if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > THRESHOLD_STRAGGLERS:
+    if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > imbalance_stragglers:
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
         )
@@ -621,7 +621,7 @@ def check_shared_time_imbalance_split(slowest_rank_time, fastest_rank_time, tota
         total_transfer_size: total request time of that specific shared file
     '''
 
-    if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > THRESHOLD_STRAGGLERS:
+    if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > imbalance_stragglers:
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
         )
@@ -700,7 +700,7 @@ def check_individual_write_imbalance_split(max_bytes_written, min_bytes_written)
         min_bytes_written: minimum byte written in the file
     '''
 
-    if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > THRESHOLD_IMBALANCE:
+    if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > imbalance_size:
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
         )
@@ -786,7 +786,7 @@ def check_individual_read_imbalance_split(max_bytes_read, min_bytes_read):
         min_bytes_written: minimum byte read in the file
     '''
 
-    if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > THRESHOLD_IMBALANCE:
+    if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > imbalance_size:
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
         )
@@ -831,7 +831,7 @@ def check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, tot
     '''
 
     if mpiio_coll_reads == 0:
-        if total_mpiio_read_operations and total_mpiio_read_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+        if total_mpiio_read_operations and total_mpiio_read_operations > collective_operations_absolute:
             issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
                 mpiio_indep_reads,
                 mpiio_indep_reads / total_mpiio_read_operations * 100
@@ -886,7 +886,7 @@ def check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes,
     '''
 
     if mpiio_coll_writes == 0:
-        if total_mpiio_write_operations and total_mpiio_write_operations > THRESHOLD_COLLECTIVE_OPERATIONS_ABSOLUTE:
+        if total_mpiio_write_operations and total_mpiio_write_operations > collective_operations_absolute:
             issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
                 mpiio_indep_writes,
                 mpiio_indep_writes / total_mpiio_write_operations * 100

From 68ebab837716cff966d841ec519edeb5ad063387 Mon Sep 17 00:00:00 2001
From: onewbiek <yankun0213@gmail.com>
Date: Fri, 2 Feb 2024 10:48:13 -0800
Subject: [PATCH 19/19] Enable thresholds display

---
 drishti/handlers/handle_darshan.py  |  23 +++---
 drishti/handlers/handle_recorder.py |  39 +++++-----
 drishti/includes/config.py          |  48 +++++++------
 drishti/includes/module.py          | 108 +++++++++++++++++++++-------
 drishti/includes/parser.py          |   8 +++
 5 files changed, 152 insertions(+), 74 deletions(-)

diff --git a/drishti/handlers/handle_darshan.py b/drishti/handlers/handle_darshan.py
index 775a838..b4c59bf 100644
--- a/drishti/handlers/handle_darshan.py
+++ b/drishti/handlers/handle_darshan.py
@@ -354,7 +354,7 @@ def handler():
 
         #########################################################################################################################################################################
 
-        count_long_metadata = len(df['fcounters'][(df['fcounters']['POSIX_F_META_TIME'] > metadata_time_rank)])
+        count_long_metadata = len(df['fcounters'][(df['fcounters']['POSIX_F_META_TIME'] > thresholds['metadata_time_rank'][0])])
 
         check_long_metadata(count_long_metadata, modules)
 
@@ -375,7 +375,7 @@ def handler():
         for index, row in shared_files.iterrows():
             total_transfer_size = row['POSIX_BYTES_WRITTEN'] + row['POSIX_BYTES_READ']
 
-            if total_transfer_size and abs(row['POSIX_SLOWEST_RANK_BYTES'] - row['POSIX_FASTEST_RANK_BYTES']) / total_transfer_size > imbalance_stragglers:
+            if total_transfer_size and abs(row['POSIX_SLOWEST_RANK_BYTES'] - row['POSIX_FASTEST_RANK_BYTES']) / total_transfer_size > thresholds['imbalance_stragglers'][0]:
                 stragglers_count += 1
 
                 detected_files.append([
@@ -403,7 +403,7 @@ def handler():
         for index, row in shared_files_times.iterrows():
             total_transfer_time = row['POSIX_F_WRITE_TIME'] + row['POSIX_F_READ_TIME'] + row['POSIX_F_META_TIME']
 
-            if total_transfer_time and abs(row['POSIX_F_SLOWEST_RANK_TIME'] - row['POSIX_F_FASTEST_RANK_TIME']) / total_transfer_time > imbalance_stragglers:
+            if total_transfer_time and abs(row['POSIX_F_SLOWEST_RANK_TIME'] - row['POSIX_F_FASTEST_RANK_TIME']) / total_transfer_time > thresholds['imbalance_stragglers'][0]:
                 stragglers_count += 1
 
                 detected_files.append([
@@ -432,7 +432,7 @@ def handler():
         detected_files = []
 
         for index, row in aggregated.iterrows():
-            if row['POSIX_BYTES_WRITTEN_max'] and abs(row['POSIX_BYTES_WRITTEN_max'] - row['POSIX_BYTES_WRITTEN_min']) / row['POSIX_BYTES_WRITTEN_max'] > imbalance_size:
+            if row['POSIX_BYTES_WRITTEN_max'] and abs(row['POSIX_BYTES_WRITTEN_max'] - row['POSIX_BYTES_WRITTEN_min']) / row['POSIX_BYTES_WRITTEN_max'] > thresholds['imbalance_size'][0]:
                 imbalance_count += 1
 
                 detected_files.append([
@@ -448,7 +448,7 @@ def handler():
         detected_files = []
 
         for index, row in aggregated.iterrows():
-            if row['POSIX_BYTES_READ_max'] and abs(row['POSIX_BYTES_READ_max'] - row['POSIX_BYTES_READ_min']) / row['POSIX_BYTES_READ_max'] > imbalance_size:
+            if row['POSIX_BYTES_READ_max'] and abs(row['POSIX_BYTES_READ_max'] - row['POSIX_BYTES_READ_min']) / row['POSIX_BYTES_READ_max'] > thresholds['imbalance_size'][0]:
                 imbalance_count += 1
 
                 detected_files.append([
@@ -478,12 +478,12 @@ def handler():
         mpiio_indep_reads = df_mpiio['counters']['MPIIO_INDEP_READS'].sum()
 
         detected_files = []
-        if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > collective_operations_absolute:
+        if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > thresholds['collective_operations_absolute'][0]:
             files = pd.DataFrame(df_mpiio_collective_reads.groupby('id').sum()).reset_index()
             for index, row in df_mpiio_collective_reads.iterrows():
                 if ((row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and
-                    row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > collective_operations and
-                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > collective_operations_absolute):
+                    row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > thresholds['collective_operations'][0] and
+                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > thresholds['collective_operations_absolute'][0]):
 
                     detected_files.append([
                         row['id'], row['MPIIO_INDEP_READS'], row['MPIIO_INDEP_READS'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100
@@ -502,13 +502,13 @@ def handler():
         mpiio_indep_writes = df_mpiio['counters']['MPIIO_INDEP_WRITES'].sum()
 
         detected_files = []
-        if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > collective_operations_absolute:
+        if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > thresholds['collective_operations_absolute'][0]:
             files = pd.DataFrame(df_mpiio_collective_writes.groupby('id').sum()).reset_index()
 
             for index, row in df_mpiio_collective_writes.iterrows():
                 if ((row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) and 
-                    row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > collective_operations and 
-                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > collective_operations_absolute):
+                    row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > thresholds['collective_operations'][0] and 
+                    (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) > thresholds['collective_operations_absolute'][0]):
 
                     detected_files.append([
                         row['id'], row['MPIIO_INDEP_WRITES'], row['MPIIO_INDEP_WRITES'] / (row['MPIIO_INDEP_READS'] + row['MPIIO_INDEP_WRITES']) * 100
@@ -651,6 +651,7 @@ def handler():
     console.print()
 
     display_content(console)
+    display_thresholds(console)
     display_footer(console, insights_start_time, insights_end_time)
 
     filename = '{}.html'.format(args.log_path)
diff --git a/drishti/handlers/handle_recorder.py b/drishti/handlers/handle_recorder.py
index 0007d11..34c4790 100644
--- a/drishti/handlers/handle_recorder.py
+++ b/drishti/handlers/handle_recorder.py
@@ -47,7 +47,9 @@ def handler():
     file_map = None
 
     if os.path.exists(args.log_path + '.intervals.csv') and os.path.exists(args.log_path + '.records.csv') and os.path.exists(args.log_path + '.filemap.csv'):
-        print('Using existing parsed log file')
+        print('Using parsed file: {}'.format(os.path.abspath(args.log_path + '.intervals.csv')))
+        print('Using parsed file: {}'.format(os.path.abspath(args.log_path + '.records.csv')))
+        print('Using parsed file: {}'.format(os.path.abspath(args.log_path + '.filemap.csv')))
         df_intervals = pd.read_csv(args.log_path + '.intervals.csv')
         df_posix_records = pd.read_csv(args.log_path + '.records.csv')
         df_file_map = pd.read_csv(args.log_path + '.filemap.csv')
@@ -174,16 +176,16 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
 
         # Get the number of small I/O operations (less than 1 MB)
 
-        total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < small_bytes)])
-        total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < small_bytes)])
+        total_reads_small = len(df_posix[(df_posix['function'].str.contains('read')) & (df_posix['size'] < thresholds['small_bytes'][0])])
+        total_writes_small = len(df_posix[~(df_posix['function'].str.contains('read')) & (df_posix['size'] < thresholds['small_bytes'][0])])
 
         if args.split_files:
             detected_files = pd.DataFrame()
         else:
             detected_files = []
             for id in file_map.keys():
-                read_cnt = len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < small_bytes)])
-                write_cnt = len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < small_bytes)])
+                read_cnt = len(df_posix[(df_posix['file_id'] == id) & (df_posix['function'].str.contains('read')) & (df_posix['size'] < thresholds['small_bytes'][0])])
+                write_cnt = len(df_posix[(df_posix['file_id'] == id) & ~(df_posix['function'].str.contains('read')) & (df_posix['size'] < thresholds['small_bytes'][0])])
                 detected_files.append([id, read_cnt, write_cnt])
 
             column_names = ['id', 'total_reads', 'total_writes']
@@ -258,12 +260,12 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
         total_shared_reads = len(df_posix[(df_posix['file_id'].isin(shared_files)) & (df_posix['function'].str.contains('read'))])
         total_shared_reads_small = len(df_posix[(df_posix['file_id'].isin(shared_files)) 
                                     & (df_posix['function'].str.contains('read')) 
-                                    & (df_posix['size'] < small_bytes)])
+                                    & (df_posix['size'] < thresholds['small_bytes'][0])])
         
         total_shared_writes = len(df_posix[(df_posix['file_id'].isin(shared_files)) & ~(df_posix['function'].str.contains('read'))])
         total_shared_writes_small = len(df_posix[(df_posix['file_id'].isin(shared_files)) 
                                     & ~(df_posix['function'].str.contains('read')) 
-                                    & (df_posix['size'] < small_bytes)])
+                                    & (df_posix['size'] < thresholds['small_bytes'][0])])
 
         if args.split_files:
             detected_files = pd.DataFrame()
@@ -272,10 +274,10 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
             for id in shared_files:
                 read_cnt = len(df_posix[(df_posix['file_id'] == id) 
                                         & (df_posix['function'].str.contains('read')) 
-                                        & (df_posix['size'] < small_bytes)])
+                                        & (df_posix['size'] < thresholds['small_bytes'][0])])
                 write_cnt = len(df_posix[(df_posix['file_id'] == id) 
                                         & ~(df_posix['function'].str.contains('read')) 
-                                        & (df_posix['size'] < small_bytes)])
+                                        & (df_posix['size'] < thresholds['small_bytes'][0])])
                 detected_files.append([id, read_cnt, write_cnt])
             
             column_names = ['id', 'INSIGHTS_POSIX_SMALL_READS', 'INSIGHTS_POSIX_SMALL_WRITES']
@@ -287,7 +289,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
 
         # TODO: Assumed metadata operations: open, close, sync, create, seek
         df_detected = df_posix_records.groupby('rank')['duration'].sum().reset_index()
-        count_long_metadata = len(df_detected[(df_detected['duration'] > metadata_time_rank)])
+        count_long_metadata = len(df_detected[(df_detected['duration'] > thresholds['metadata_time_rank'][0])])
 
         check_long_metadata(count_long_metadata, modules)
   
@@ -318,7 +320,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 slowest_rank_bytes = df_detected.loc[df_detected['duration'].idxmax(), 'size']
                 fastest_rank_bytes = df_detected.loc[df_detected['duration'].idxmin(), 'size']
 
-                if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > imbalance_stragglers:
+                if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > thresholds['imbalance_stragglers'][0]:
                     stragglers_count += 1
 
                     detected_files.append([
@@ -356,7 +358,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 slowest_rank_time = df_detected['duration'].max()
                 fastest_rank_time = df_detected['duration'].min()
 
-                if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > imbalance_stragglers:
+                if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > thresholds['imbalance_stragglers'][0]:
                     stragglers_count += 1
 
                     detected_files.append([
@@ -396,7 +398,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 max_bytes_written = df_detected['size'].max()
                 min_bytes_written = df_detected['size'].min()
 
-                if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > imbalance_size:
+                if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > thresholds['imbalance_size'][0]:
                     imbalance_count += 1
 
                     detected_files.append([
@@ -417,7 +419,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
                 max_bytes_read = df_detected['size'].max()
                 min_bytes_read = df_detected['size'].min()
 
-                if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > imbalance_size:
+                if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > thresholds['imbalance_size'][0]:
                     imbalance_count += 1
 
                     detected_files.append([
@@ -448,13 +450,13 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
             detected_files = pd.DataFrame()
         else:
             detected_files = []
-            if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > collective_operations_absolute:
+            if mpiio_coll_reads == 0 and total_mpiio_read_operations and total_mpiio_read_operations > thresholds['collective_operations_absolute'][0]:
                 for id in file_map.keys():
                     indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
                     indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
                     indep_total_count = indep_read_count + indep_write_count
 
-                    if (indep_total_count > collective_operations_absolute and indep_read_count / indep_total_count > collective_operations):
+                    if (indep_total_count > thresholds['collective_operations_absolute'][0] and indep_read_count / indep_total_count > thresholds['collective_operations'][0]):
                         detected_files.append([
                             id, indep_read_count, indep_read_count / indep_total_count * 100
                         ])
@@ -468,13 +470,13 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
             detected_files = pd.DataFrame()
         else:
             detected_files = []
-            if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > collective_operations_absolute:
+            if mpiio_coll_writes == 0 and total_mpiio_write_operations and total_mpiio_write_operations > thresholds['collective_operations_absolute'][0]:
                 for id in file_map.keys():
                     indep_read_count = df_mpiio_reads[~(df_mpiio_reads['function'].str.contains('_all')) & (df_mpiio_reads['file_id'] == id)]
                     indep_write_count = df_mpiio_writes[~(df_mpiio_writes['function'].str.contains('_all')) & (df_mpiio_writes['file_id'] == id)]
                     indep_total_count = indep_read_count + indep_write_count
 
-                    if (indep_total_count > collective_operations_absolute and indep_write_count / indep_total_count > collective_operations):
+                    if (indep_total_count > thresholds['collective_operations_absolute'][0] and indep_write_count / indep_total_count > thresholds['collective_operations'][0]):
                         detected_files.append([
                             id, indep_write_count, indep_write_count / indep_total_count * 100
                         ])
@@ -572,6 +574,7 @@ def process_helper(file_map, df_intervals, df_posix_records, fid=None):
     console.print()
 
     display_content(console)
+    display_thresholds(console)
     display_footer(console, insights_start_time, insights_end_time)
 
     if args.split_files:
diff --git a/drishti/includes/config.py b/drishti/includes/config.py
index f362dc2..82bd872 100644
--- a/drishti/includes/config.py
+++ b/drishti/includes/config.py
@@ -34,19 +34,21 @@
 insights_total[WARN] = 0
 insights_total[RECOMMENDATIONS] = 0
 
-imbalance_operations = 0.1
-small_bytes = 1048576 # 1MB
-small_requests = 0.1
-small_requests_absolute = 1000
-misaligned_requests = 0.1
-metadata_time_rank = 30 # seconds
-random_operations = 0.2
-random_operations_absolute = 1000
-imbalance_stragglers = 0.15
-imbalance_size = 0.30
-interface_stdio = 0.1
-collective_operations = 0.5
-collective_operations_absolute = 1000
+thresholds = {
+    'imbalance_operations': [0.1, False],
+    'small_bytes': [1048576, False],
+    'small_requests': [0.1, False],
+    'small_requests_absolute': [1000, False],
+    'misaligned_requests': [0.1, False],
+    'metadata_time_rank': [30, False],
+    'random_operations': [0.2, False],
+    'random_operations_absolute': [1000, False],
+    'imbalance_stragglers': [0.15, False],
+    'imbalance_size': [0.3, False],
+    'interface_stdio': [0.1, False],
+    'collective_operations': [0.5, False],
+    'collective_operations_absolute': [1000, False],
+}
 
 INSIGHTS_STDIO_HIGH_USAGE = 'S01'
 INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE = 'P01'
@@ -98,6 +100,10 @@ def init_console():
     insights_total[HIGH] = 0
     insights_total[WARN] = 0
     insights_total[RECOMMENDATIONS] = 0
+
+    for name in thresholds:
+        thresholds[name][1] = False
+
     return console
 
 
@@ -166,14 +172,14 @@ def validate_thresholds():
 
         for category, thresholds_spec in data.items():
             for threshold_name, threshold_value in thresholds_spec.items():
-                globals()[threshold_name] = threshold_value
-
-        assert(imbalance_operations >= 0.0 and imbalance_operations <= 1.0)
-        assert(small_requests >= 0.0 and small_requests <= 1.0)
-        assert(misaligned_requests >= 0.0 and misaligned_requests <= 1.0)
-        assert(random_operations >= 0.0 and random_operations <= 1.0)
-
-        assert(metadata_time_rank >= 0.0)
+                thresholds[category + '_' + threshold_name][0] = threshold_value
+                
+        assert(thresholds['imbalance_operations'][0] >= 0.0 and thresholds['imbalance_operations'][0] <= 1.0)
+        assert(thresholds['small_requests'][0] >= 0.0 and thresholds['small_requests'][0] <= 1.0)
+        assert(thresholds['misaligned_requests'][0] >= 0.0 and thresholds['misaligned_requests'][0] <= 1.0)
+        assert(thresholds['random_operations'][0] >= 0.0 and thresholds['random_operations'][0] <= 1.0)
+
+        assert(thresholds['metadata_time_rank'][0] >= 0.0)
 
 
 def convert_bytes(bytes_number):
diff --git a/drishti/includes/module.py b/drishti/includes/module.py
index cf90530..2731e69 100644
--- a/drishti/includes/module.py
+++ b/drishti/includes/module.py
@@ -26,7 +26,8 @@ def check_stdio(total_size, total_size_stdio):
     
     '''
 
-    if total_size and total_size_stdio / total_size > interface_stdio:
+    if total_size and total_size_stdio / total_size > thresholds['interface_stdio'][0]:
+        thresholds['interface_stdio'][1] = True
         issue = 'Application is using STDIO, a low-performance interface, for {:.2f}% of its data transfers ({})'.format(
             total_size_stdio / total_size * 100.0,
             convert_bytes(total_size_stdio)
@@ -79,7 +80,7 @@ def check_operation_intensive(total_operations, total_reads, total_writes):
         total_writes: number of write operations been executed by the application
     '''
 
-    if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > imbalance_operations:
+    if total_writes > total_reads and total_operations and abs(total_writes - total_reads) / total_operations > thresholds['imbalance_operations'][0]:
         issue = 'Application is write operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
             total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
         )
@@ -88,7 +89,7 @@ def check_operation_intensive(total_operations, total_reads, total_writes):
             message(INSIGHTS_POSIX_WRITE_COUNT_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
         )
 
-    if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > imbalance_operations:
+    if total_reads > total_writes and total_operations and abs(total_writes - total_reads) / total_operations > thresholds['imbalance_operations'][0]:
         issue = 'Application is read operation intensive ({:.2f}% writes vs. {:.2f}% reads)'.format(
             total_writes / total_operations * 100.0, total_reads / total_operations * 100.0
         )
@@ -108,7 +109,7 @@ def check_size_intensive(total_size, total_read_size, total_written_size):
         total_written_size: Output I/O size measured in byte
     '''
 
-    if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > imbalance_operations:
+    if total_written_size > total_read_size and abs(total_written_size - total_read_size) / total_size > thresholds['imbalance_operations'][0]:
         issue = 'Application is write size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
             total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
         )
@@ -117,7 +118,7 @@ def check_size_intensive(total_size, total_read_size, total_written_size):
             message(INSIGHTS_POSIX_WRITE_SIZE_INTENSIVE, TARGET_DEVELOPER, INFO, issue, None)
         )
 
-    if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > imbalance_operations:
+    if total_read_size > total_written_size and abs(total_written_size - total_read_size) / total_size > thresholds['imbalance_operations'][0]:
         issue = 'Application is read size intensive ({:.2f}% write vs. {:.2f}% read)'.format(
             total_written_size / total_size * 100.0, total_read_size / total_size * 100.0
         )
@@ -143,7 +144,8 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
         file_map: file id and file name pairing
     '''
 
-    if total_reads_small and total_reads_small / total_reads > small_requests and total_reads_small > small_requests_absolute:
+    if total_reads_small and total_reads_small / total_reads > thresholds['small_requests'][0] and total_reads_small > thresholds['small_requests_absolute'][0]:
+        thresholds['small_requests_absolute'][1] = True
         issue = 'Application issues a high number ({}) of small read requests (i.e., < 1MB) which represents {:.2f}% of all read requests'.format(
             total_reads_small, total_reads_small / total_reads * 100.0
         )
@@ -152,7 +154,7 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
         recommendation = []
 
         for index, row in detected_files.iterrows():
-            if row['total_reads'] > (total_reads * small_requests / 2):
+            if row['total_reads'] > (total_reads * thresholds['small_requests'][0] / 2):
                 detail.append(
                     {
                         'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
@@ -187,7 +189,8 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
             message(INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
         )
 
-    if total_writes_small and total_writes_small / total_writes > small_requests and total_writes_small > small_requests_absolute:
+    if total_writes_small and total_writes_small / total_writes > thresholds['small_requests'][0] and total_writes_small > thresholds['small_requests_absolute'][0]:
+        thresholds['small_requests_absolute'][1] = True
         issue = 'Application issues a high number ({}) of small write requests (i.e., < 1MB) which represents {:.2f}% of all write requests'.format(
             total_writes_small, total_writes_small / total_writes * 100.0
         )
@@ -196,7 +199,7 @@ def check_small_operation(total_reads, total_reads_small, total_writes, total_wr
         recommendation = []
 
         for index, row in detected_files.iterrows():
-            if row['total_writes'] > (total_writes * small_requests / 2):
+            if row['total_writes'] > (total_writes * thresholds['small_requests'][0] / 2):
                 detail.append(
                     {
                         'message': '{} ({:.2f}%) small write requests are to "{}"'.format(
@@ -243,7 +246,8 @@ def check_misaligned(total_operations, total_mem_not_aligned, total_file_not_ali
         modules: all different mudules been used in the application
     '''
 
-    if total_operations and total_mem_not_aligned / total_operations > misaligned_requests:
+    if total_operations and total_mem_not_aligned / total_operations > thresholds['misaligned_requests'][0]:
+        thresholds['misaligned_requests'][1] = True
         issue = 'Application has a high number ({:.2f}%) of misaligned memory requests'.format(
             total_mem_not_aligned / total_operations * 100.0
         )
@@ -252,7 +256,8 @@ def check_misaligned(total_operations, total_mem_not_aligned, total_file_not_ali
             message(INSIGHTS_POSIX_HIGH_MISALIGNED_MEMORY_USAGE, TARGET_DEVELOPER, HIGH, issue, None)
         )
 
-    if total_operations and total_file_not_aligned / total_operations > misaligned_requests:
+    if total_operations and total_file_not_aligned / total_operations > thresholds['misaligned_requests'][0]:
+        thresholds['misaligned_requests'][1] = True
         issue = 'Application issues a high number ({:.2f}%) of misaligned file requests'.format(
             total_file_not_aligned / total_operations * 100.0
         )
@@ -330,7 +335,9 @@ def check_random_operation(read_consecutive, read_sequential, read_random, total
 
 
     if total_reads:
-        if read_random and read_random / total_reads > random_operations and read_random > random_operations_absolute:
+        if read_random and read_random / total_reads > thresholds['random_operations'][0] and read_random > thresholds['random_operations_absolute'][0]:
+            thresholds['random_operations'][1] = True
+            thresholds['random_operations_absolute'][1] = True
             issue = 'Application is issuing a high number ({}) of random read operations ({:.2f}%)'.format(
                 read_random, read_random / total_reads * 100.0
             )
@@ -355,7 +362,9 @@ def check_random_operation(read_consecutive, read_sequential, read_random, total
             )
 
     if total_writes:
-        if write_random and write_random / total_writes > random_operations and write_random > random_operations_absolute:
+        if write_random and write_random / total_writes > thresholds['random_operations'][0] and write_random > thresholds['random_operations_absolute'][0]:
+            thresholds['random_operations'][1] = True
+            thresholds['random_operations_absolute'][1] = True
             issue = 'Application is issuing a high number ({}) of random write operations ({:.2f}%)'.format(
                 write_random, write_random / total_writes * 100.0
             )
@@ -395,7 +404,9 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
         file_map: file id and file name pairing
     '''
 
-    if total_shared_reads and total_shared_reads_small / total_shared_reads > small_requests and total_shared_reads_small > small_requests_absolute:
+    if total_shared_reads and total_shared_reads_small / total_shared_reads > thresholds['small_requests'][0] and total_shared_reads_small > thresholds['small_requests_absolute'][0]:
+        thresholds['small_requests'][1] = True
+        thresholds['small_requests_absolute'][1] = True
         issue = 'Application issues a high number ({}) of small read requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file read requests'.format(
             total_shared_reads_small, total_shared_reads_small / total_shared_reads * 100.0
         )
@@ -403,7 +414,7 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
         detail = []
 
         for index, row in shared_files.iterrows():
-            if row['INSIGHTS_POSIX_SMALL_READS'] > (total_shared_reads * small_requests / 2):
+            if row['INSIGHTS_POSIX_SMALL_READS'] > (total_shared_reads * thresholds['small_requests'][0] / 2):
                 detail.append(
                     {
                         'message': '{} ({:.2f}%) small read requests are to "{}"'.format(
@@ -425,7 +436,9 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
             message(INSIGHTS_POSIX_HIGH_SMALL_READ_REQUESTS_SHARED_FILE_USAGE, TARGET_DEVELOPER, HIGH, issue, recommendation, detail)
         )
 
-    if total_shared_writes and total_shared_writes_small / total_shared_writes > small_requests and total_shared_writes_small > small_requests_absolute:
+    if total_shared_writes and total_shared_writes_small / total_shared_writes > thresholds['small_requests'][0] and total_shared_writes_small > thresholds['small_requests_absolute'][0]:
+        thresholds['small_requests'][1] = True
+        thresholds['small_requests_absolute'][1] = True
         issue = 'Application issues a high number ({}) of small write requests to a shared file (i.e., < 1MB) which represents {:.2f}% of all shared file write requests'.format(
             total_shared_writes_small, total_shared_writes_small / total_shared_writes * 100.0
         )
@@ -433,7 +446,7 @@ def check_shared_small_operation(total_shared_reads, total_shared_reads_small, t
         detail = []
 
         for index, row in shared_files.iterrows():
-            if row['INSIGHTS_POSIX_SMALL_WRITES'] > (total_shared_writes * small_requests / 2):
+            if row['INSIGHTS_POSIX_SMALL_WRITES'] > (total_shared_writes * thresholds['small_requests'][0] / 2):
                 detail.append(
                     {
                         'message': '{} ({:.2f}%) small writes requests are to "{}"'.format(
@@ -466,8 +479,9 @@ def check_long_metadata(count_long_metadata, modules):
     '''
 
     if count_long_metadata > 0:
+        thresholds['metadata_time_rank'][1] = True
         issue = 'There are {} ranks where metadata operations take over {} seconds'.format(
-            count_long_metadata, metadata_time_rank
+            count_long_metadata, thresholds['metadata_time_rank'][0]
         )
 
         recommendation = [
@@ -506,6 +520,7 @@ def check_shared_data_imblance(stragglers_count, detected_files, file_map):
     '''
 
     if stragglers_count:
+        thresholds['imbalance_stragglers'][1] = True
         issue = 'Detected data transfer imbalance caused by stragglers when accessing {} shared file.'.format(
             stragglers_count
         )
@@ -547,7 +562,8 @@ def check_shared_data_imblance_split(slowest_rank_bytes, fastest_rank_bytes, tot
         total_transfer_size: total request size of that specific shared file
     '''
 
-    if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > imbalance_stragglers:
+    if total_transfer_size and abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size > thresholds['imbalance_stragglers'][0]:
+        thresholds['imbalance_stragglers'][1] = True
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(slowest_rank_bytes - fastest_rank_bytes) / total_transfer_size * 100
         )
@@ -580,6 +596,7 @@ def check_shared_time_imbalance(stragglers_count, detected_files, file_map):
     '''
 
     if stragglers_count:
+        thresholds['imbalance_stragglers'][1] = True
         issue = 'Detected time imbalance caused by stragglers when accessing {} shared file.'.format(
             stragglers_count
         )
@@ -621,7 +638,8 @@ def check_shared_time_imbalance_split(slowest_rank_time, fastest_rank_time, tota
         total_transfer_size: total request time of that specific shared file
     '''
 
-    if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > imbalance_stragglers:
+    if total_transfer_time and abs(slowest_rank_time - fastest_rank_time) / total_transfer_time > thresholds['imbalance_stragglers'][0]:
+        thresholds['imbalance_stragglers'][1] = True
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(slowest_rank_time - fastest_rank_time) / total_transfer_time * 100
         )
@@ -653,6 +671,7 @@ def check_individual_write_imbalance(imbalance_count, detected_files, file_map):
     '''
 
     if imbalance_count:
+        thresholds['imbalance_size'][1] = True
         issue = 'Detected write imbalance when accessing {} individual files'.format(
             imbalance_count
         )
@@ -700,7 +719,8 @@ def check_individual_write_imbalance_split(max_bytes_written, min_bytes_written)
         min_bytes_written: minimum byte written in the file
     '''
 
-    if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > imbalance_size:
+    if max_bytes_written and abs(max_bytes_written - min_bytes_written) / max_bytes_written > thresholds['imbalance_size'][0]:
+        thresholds['imbalance_size'][1] = True
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(max_bytes_written - min_bytes_written) / max_bytes_written  * 100
         )
@@ -739,6 +759,7 @@ def check_individual_read_imbalance(imbalance_count, detected_files, file_map):
     '''
 
     if imbalance_count:
+        thresholds['imbalance_size'][1] = True
         issue = 'Detected read imbalance when accessing {} individual files.'.format(
             imbalance_count
         )
@@ -786,7 +807,8 @@ def check_individual_read_imbalance_split(max_bytes_read, min_bytes_read):
         min_bytes_written: minimum byte read in the file
     '''
 
-    if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > imbalance_size:
+    if max_bytes_read and abs(max_bytes_read - min_bytes_read) / max_bytes_read > thresholds['imbalance_size'][0]:
+        thresholds['imbalance_size'][1] = True
         issue = 'Load imbalance of {:.2f}% detected'.format(
             abs(max_bytes_read - min_bytes_read) / max_bytes_read  * 100
         )
@@ -831,7 +853,8 @@ def check_mpi_collective_read_operation(mpiio_coll_reads, mpiio_indep_reads, tot
     '''
 
     if mpiio_coll_reads == 0:
-        if total_mpiio_read_operations and total_mpiio_read_operations > collective_operations_absolute:
+        if total_mpiio_read_operations and total_mpiio_read_operations > thresholds['collective_operations_absolute'][0]:
+            thresholds['collective_operations_absolute'][1] = True
             issue = 'Application uses MPI-IO but it does not use collective read operations, instead it issues {} ({:.2f}%) independent read calls'.format(
                 mpiio_indep_reads,
                 mpiio_indep_reads / total_mpiio_read_operations * 100
@@ -886,7 +909,8 @@ def check_mpi_collective_write_operation(mpiio_coll_writes, mpiio_indep_writes,
     '''
 
     if mpiio_coll_writes == 0:
-        if total_mpiio_write_operations and total_mpiio_write_operations > collective_operations_absolute:
+        if total_mpiio_write_operations and total_mpiio_write_operations > thresholds['collective_operations_absolute'][0]:
+            thresholds['collective_operations_absolute'][1] = True
             issue = 'Application uses MPI-IO but it does not use collective write operations, instead it issues {} ({:.2f}%) independent write calls'.format(
                 mpiio_indep_writes,
                 mpiio_indep_writes / total_mpiio_write_operations * 100
@@ -1074,6 +1098,42 @@ def display_content(console):
         )
 
 
+def display_thresholds(console):
+    tholdMessage = {
+        'imbalance_operations': 'Minimum imbalance requests ratio:                [white]{}%[/white]'.format(thresholds['imbalance_operations'][0] * 100),
+        'small_bytes': 'Minimum size of a small request:                 [white]{} bytes[/white]'.format(thresholds['small_bytes'][0]),
+        'small_requests': 'Maximum small requests ratio:                    [white]{}%[/white]'.format(thresholds['small_requests'][0] * 100),
+        'small_requests_absolute': 'Maximum small requests:                          [white]{}[/white]'.format(thresholds['small_requests_absolute'][0]),
+        'misaligned_requests': 'Maximum misaligned requests ratio:               [white]{}%[/white]'.format(thresholds['misaligned_requests'][0] * 100),
+        'random_operations': 'Maximum random request ratio:                    [white]{}%[/white]'.format(thresholds['random_operations'][0] * 100),
+        'random_operations_absolute': 'Maximum random requests:                         [white]{}[/white]'.format(thresholds['random_operations_absolute'][0]),
+        'metadata_time_rank': 'Maximum metadata process time per rank:          [white]{} seconds[/white]'.format(thresholds['metadata_time_rank'][0]),
+        'imbalance_size': 'Maximum read/write size difference ratio:        [white]{}%[/white]'.format(thresholds['imbalance_size'][0] * 100),
+        'imbalance_stragglers': 'Maximum ratio difference among ranks:            [white]{}%[/white]'.format(thresholds['imbalance_stragglers'][0] * 100),
+        'interface_stdio': 'Maximum STDIO usage ratio:                       [white]{}%[/white]'.format(thresholds['interface_stdio'][0] * 100),
+        'collective_operations': 'Minimum MPI collective operation usage ratio:    [white]{}%[/white]'.format(thresholds['collective_operations'][0] * 100),
+        'collective_operations_absolute': 'Minimum MPI collective operations:               [white]{}[/white]'.format(thresholds['collective_operations_absolute'][0]),
+    }
+
+    toBeAppend = []
+    if args.thold:
+        for name, message in tholdMessage.items():
+            toBeAppend.append(message)
+    else:
+        for name, message in tholdMessage.items():
+            if thresholds[name][1]:
+                toBeAppend.append(message)
+
+    console.print(
+        Panel(
+            '\n'.join(toBeAppend),
+            title='THRESHOLDS',
+            title_align='left',
+            padding=1
+        )
+    )
+
+
 def display_footer(console, insights_start_time, insights_end_time):
     console.print(
         Panel(
diff --git a/drishti/includes/parser.py b/drishti/includes/parser.py
index 7ddfdd6..fbc759c 100644
--- a/drishti/includes/parser.py
+++ b/drishti/includes/parser.py
@@ -56,6 +56,14 @@
     help='Display extended details for the recommendations'
 )
 
+parser.add_argument(
+    '--threshold',
+    default=False,
+    action='store_true',
+    dest='thold',
+    help='Display all thresholds used for the report'
+)
+
 parser.add_argument(
     '--code',
     default=False,