diff --git a/CodeFile.py b/CodeFile.py
new file mode 100644
index 0000000..66908c6
--- /dev/null
+++ b/CodeFile.py
@@ -0,0 +1,4 @@
+class CodeFile:
+    def __init__(self, path: str, size: int):
+        self.path = path
+        self.size = size
diff --git a/Makefile b/Makefile
index 2321bf8..26b23c0 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,5 @@ update-example:
 	python3 main.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy" -s "2022-11-15" -i 14 -o "./media/example_output2.png"
 	python3 count_by_instrumentation.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy" -o "./media/example_pie_output.png"
 
-
-
 .PHONY: all
 all: install test lint
\ No newline at end of file
diff --git a/count_by_instrumentation.py b/count_by_instrumentation.py
index e28432a..991f5fa 100644
--- a/count_by_instrumentation.py
+++ b/count_by_instrumentation.py
@@ -23,7 +23,7 @@ def get_commit_by_date(self, repository, date):
 
     def get_repository_by_commit(self, repository, commit):
         repo_data = self.client.get_repository_at_commit(repository, commit)
-        repo_data = self.data_filter.parse_data(repo_data)
+        repo_data = self.data_filter.get_file_counts_and_lengths(repo_data)
 
         return repo_data
 
@@ -42,16 +42,26 @@ def main(args):
         repository=args.repo,
         commit=commit
     )
-    count = count_by_language_and_file_extension(files=repo_files["files"],
-                                                 languages=[args.language])
+    file_counts, file_sizes = count_by_language_and_file_extension(
+        files=repo_files["files"],
+        languages=[args.language])
 
-    df = pd.DataFrame(list(count.items()), columns=['Key', 'Value'])
+    # Print the table showing file counts and sizes
+    data = [(key, file_counts[key], file_sizes[key]) for key in file_counts.keys()]
+    df2 = pd.DataFrame(data, columns=['Key', 'File Count', 'Total File Size'])
+    df2 = df2.sort_values(by='Total File Size', key=lambda col: col.astype(int),
+                          ascending=False)
+
+    print(df2.to_markdown(index=False))
+    print(f"| Total | {df2['File Count'].sum()} | {df2['Total File Size'].sum()} |")
+
+    # Create a pie chart for file counts only
+    df = pd.DataFrame(list(file_counts.items()), columns=['Key', 'Value'])
     df = df.sort_values(by='Value', key=lambda col: col.astype(int), ascending=False)
 
     sns.set_theme()
     colors = sns.color_palette('pastel')[0:len(df)]
 
-    # Create a pie chart
     explode = [0.05] * len(df)  # this will "explode" each slice from the pie
     df.set_index('Key')['Value'].plot.pie(autopct='%1.0f%%', colors=colors,
                                           explode=explode)
@@ -59,9 +69,6 @@ def main(args):
     plt.title(f'Remaining {args.language} files by Instrumentation')
     plt.ylabel('')
 
-    print(df.to_markdown(index=False))
-    print(f"| Total | {df['Value'].sum()} |")
-
     if args.output is not None:
         plt.savefig(args.output)
     else:
diff --git a/data_filter.py b/data_filter.py
index cbf3f6d..20637ba 100644
--- a/data_filter.py
+++ b/data_filter.py
@@ -1,5 +1,7 @@
 from typing import List
 
+from CodeFile import CodeFile
+
 
 class DataFilter:
 
@@ -33,3 +35,17 @@ def parse_data(self, payload):
             "files": data_result
         }
         return json_result
+
+    def get_file_counts_and_lengths(self, payload):
+        data_result = []
+        tree = payload["tree"]
+        for i in tree:
+            if self.matches_meta(i) \
+                    and self.matches_file_extensions(i["path"]) \
+                    and self.matches_directory(i["path"]):
+                data_result.append(CodeFile(path=i["path"], size=i["size"]))
+
+        json_result = {
+            "files": data_result
+        }
+        return json_result
diff --git a/latest.png b/latest.png
new file mode 100644
index 0000000..208ae21
Binary files /dev/null and b/latest.png differ
diff --git a/media/example_output.png b/media/example_output.png
index 77ef629..14f6dba 100644
Binary files a/media/example_output.png and b/media/example_output.png differ
diff --git a/media/example_output2.png b/media/example_output2.png
index 21b201a..e104ad7 100644
Binary files a/media/example_output2.png and b/media/example_output2.png differ
diff --git a/media/example_pie_output.png b/media/example_pie_output.png
index f7b0ea1..cece08e 100644
Binary files a/media/example_pie_output.png and b/media/example_pie_output.png differ
diff --git a/readme.md b/readme.md
index 6655708..674b2e0 100644
--- a/readme.md
+++ b/readme.md
@@ -103,37 +103,30 @@ In the `open-telemetry/opentelemetry-java-instrumentation` repository, analyze t
 
 Output: 
 
-| Key               | Value |
-|:------------------|------:|
-| spring            |    52 |
-| jaxrs             |    37 |
-| servlet           |    23 |
-| restlet           |    22 |
-| couchbase         |    18 |
-| aws-sdk           |    17 |
-| ratpack           |    16 |
-| elasticsearch     |    15 |
-| play              |    15 |
-| jaxws             |    15 |
-| vertx             |    14 |
-| mongo             |    10 |
-| jdbc              |     8 |
-| apache-dubbo-2.7  |     7 |
-| jaxrs-client      |     5 |
-| netty             |     5 |
-| apache-httpclient |     3 |
-| opentelemetry-api |     3 |
-| grizzly-2.3       |     3 |
-| grails-3.0        |     3 |
-| undertow-1.4      |     3 |
-| kafka             |     3 |
-| internal          |     2 |
-| dropwizard        |     2 |
-| hibernate         |     1 |
-| rediscala-1.8     |     1 |
-| spymemcached-2.12 |     1 |
-| twilio-6.6        |     1 |
-| Total             |   305 |
+| Key               | File Count | Total File Size |
+|:------------------|-----------:|----------------:|
+| servlet           |         23 |          143182 |
+| aws-sdk           |         17 |          127827 |
+| jdbc              |          8 |           90890 |
+| elasticsearch     |         15 |           90341 |
+| jaxrs             |         37 |           72352 |
+| vertx             |         14 |           56845 |
+| ratpack           |         16 |           51932 |
+| mongo             |         10 |           51661 |
+| play              |         15 |           48986 |
+| restlet           |         22 |           38226 |
+| jaxws             |         17 |           21595 |
+| kafka             |          3 |           21505 |
+| twilio-6.6        |          1 |           18936 |
+| jaxrs-client      |          5 |           16067 |
+| spymemcached-2.12 |          1 |           15630 |
+| undertow-1.4      |          3 |           12754 |
+| hibernate         |          1 |           12167 |
+| dropwizard        |          2 |           10789 |
+| rediscala-1.8     |          1 |            3898 |
+| grails-3.0        |          3 |            3201 |
+| internal          |          2 |            2603 |
+| Total             |        216 |          911387 |
 
 ![Example](./media/example_pie_output.png)
 
diff --git a/utilities.py b/utilities.py
index 3c52e7f..6cdf4e3 100644
--- a/utilities.py
+++ b/utilities.py
@@ -2,6 +2,8 @@
 from collections import defaultdict
 from typing import List, Dict
 
+from CodeFile import CodeFile
+
 
 def get_dates_between(start_date_str, end_date, interval):
     date_format = "%Y-%m-%d"
@@ -37,17 +39,20 @@ def count_by_file_extension(files: List[str], languages: List[str]) -> dict:
     return file_counts
 
 
-def count_by_language_and_file_extension(files: List[str], languages: List[str]) -> Dict[str, Dict[str, int]]:
-    counts = defaultdict(int)
+def count_by_language_and_file_extension(files: List[CodeFile], languages: List[str]) -> Dict[str, Dict[str, int]]:
+    file_counts = defaultdict(int)
+    file_sizes = defaultdict(int)
     for file in files:
-        file_parts = file.split('/')
+        file_parts = file.path.split('/')
         if len(file_parts) < 3:
             continue
         instrumentation = file_parts[1]
         extension = file_parts[-1].split('.')[-1]
         if extension in languages:
-            counts[instrumentation] += 1
-    return counts
+            file_counts[instrumentation] += 1
+            file_sizes[instrumentation] += file.size
+    return file_counts, file_sizes
+
 
 def convert_to_plot(input_dict: dict, items):
     result = {}