diff --git a/src/thicket.cr b/src/thicket.cr index 36c5122..92c2ba4 100644 --- a/src/thicket.cr +++ b/src/thicket.cr @@ -22,6 +22,10 @@ module Thicket exit end + parser.on("-e", "--experimental", "Use true git graph parsing") do |v| + @@options[:experimental] = v + end + parser.on("-d", "--directory=DIRECTORY", "Path to the project directory") do |d| if d.nil? STDERR.puts "You must provide a project directory." diff --git a/src/thicket/commit_graph_file.cr b/src/thicket/commit_graph_file.cr new file mode 100644 index 0000000..efd42f3 --- /dev/null +++ b/src/thicket/commit_graph_file.cr @@ -0,0 +1,221 @@ +# https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +module Thicket + class CommitGraphFile + getter file_path : String + + getter version : UInt8 + getter hash_version : UInt8 + getter num_chunks : UInt8 + getter num_base_commit_graphs : UInt8 + + getter oid_fanout : Array(UInt32) + getter num_commits : UInt32 + getter commit_oids : Array(String) + getter commit_data : Array(CommitData) + + def initialize(@file_path) + file = File.new(@file_path, "rb") + + # Header data + verify_header_signature(file) + @version = file.read_at(4, 1, &.read_byte).not_nil! + @hash_version = file.read_at(5, 1, &.read_byte).not_nil! + @num_chunks = file.read_at(6, 1, &.read_byte).not_nil! + @num_base_commit_graphs = file.read_at(7, 1, &.read_byte).not_nil! + + contents = chunk_table_of_contents(file) + pp contents + + @oid_fanout = parse_oid_fanout(file, contents) + @num_commits = @oid_fanout.last + puts "Number of commits: #{@num_commits}" + + @commit_oids = parse_oid_lookup(file, contents) + puts "First commit: #{@commit_oids.first[0..6]}" + puts " Last commit: #{@commit_oids.last[0..6]}" + + @commit_data = parse_commit_data(file, contents, @num_commits) + pp @commit_oids.first + pp @commit_data.first + + file.close + end + + # The length of a full commit hash in bytes. + def commit_hash_length : UInt32 + case @hash_version + when 1 # SHA-1 + 20.to_u32 + else + raise "Unknown hash version identifier: #{@hash_version}" + end + end + + private def verify_header_signature(file : File) + signature = file.read_at(0, 4, &.read_string(4)) + + if signature != "CGPH" + raise "Found unknown commit graph file header signature: #{signature}" + end + end + + private def chunk_table_of_contents(file : File) : Array({ signature: String, offset: UInt64 }) + contents = [] of { signature: String, offset: UInt64 } + + current_byte = 8 + + loop do + chunk_signature = file.read_at(current_byte, 4, &.read_string(4)) + break if chunk_signature == "\0\0\0\0" + + chunk_offset_bytes = begin + slice = Bytes.new(8) + file.read_at(current_byte + 4, 8, &.read(slice)) + slice.reverse! + slice.to_unsafe.as(UInt64*).value + end + + contents << { signature: chunk_signature, offset: chunk_offset_bytes } + + current_byte += 12 + end + + if contents.none? { |c| c[:signature] == "OIDF" } + raise "Unable to find OID Fanout chunk in commit graph file." + end + + if contents.none? { |c| c[:signature] == "OIDL" } + raise "Unable to find OID Lookup chunk in commit graph file." + end + + if contents.none? { |c| c[:signature] == "CDAT" } + raise "Unable to find Commit Data chunk in commit graph file." + end + + contents.sort_by { |c| c[:offset] } + end + + private def parse_oid_fanout( + file : File, + contents : Array({ signature: String, offset: UInt64 }) + ) : Array(UInt32) + oid_fanout_index = contents.index { |c| c[:signature] == "OIDF" }.not_nil! + oid_fanout_offset = contents[oid_fanout_index][:offset] + + oid_fanout_length = if contents[oid_fanout_index + 1]? + contents[oid_fanout_index + 1][:offset] - oid_fanout_offset + else + # Exclude trailer hash if necessary + file.size - commit_hash_length - oid_fanout_offset + end + + slice = Bytes.new(1024) + file.read_at(oid_fanout_offset.to_i32, oid_fanout_length.to_i32, &.read(slice)) + slice.reverse! + + slice.each_slice(4) + .map { |integer_slice| integer_slice.to_unsafe.as(UInt32*).value } + .to_a + .reverse + end + + private def parse_oid_lookup( + file : File, + contents : Array({ signature: String, offset: UInt64 }) + ) : Array(String) + oid_lookup_index = contents.index { |c| c[:signature] == "OIDL" }.not_nil! + oid_lookup_offset = contents[oid_lookup_index][:offset] + + oid_lookup_length = if contents[oid_lookup_index + 1]? + contents[oid_lookup_index + 1][:offset] - oid_lookup_offset + else + # Exclude trailer hash if necessary + file.size - commit_hash_length - oid_lookup_offset + end + + slice = Bytes.new(@num_commits * commit_hash_length) + file.read_at(oid_lookup_offset.to_i32, oid_lookup_length.to_i32, &.read(slice)) + slice.reverse! + + oids = Array.new(num_commits) do |i| + start = i * commit_hash_length + subslice = slice[start, commit_hash_length] + + subslice.to_a.map { |b| sprintf("%02x", b) }.reverse.join + end + + oids.reverse! + end + + private def parse_commit_data( + file : File, + contents : Array({ signature: String, offset: UInt64 }), + num_commits : UInt32, + ) : Array(CommitData) + commit_data_index = contents.index { |c| c[:signature] == "CDAT" }.not_nil! + commit_data_offset = contents[commit_data_index][:offset] + + commit_data_length = if contents[commit_data_index + 1]? + contents[commit_data_index + 1][:offset] - commit_data_offset + else + # Exclude trailer hash if necessary + file.size - commit_hash_length - commit_data_offset + end + + single_commit_data_size = commit_hash_length + 16 + slice = Bytes.new(commit_data_length) + file.read_at(commit_data_offset.to_i32, commit_data_length.to_i32, &.read(slice)) + slice.reverse! + + puts "Found #{slice.size} bytes of commit data." + + Array.new(num_commits) do |i| + subslice = slice[i * single_commit_data_size, single_commit_data_size] + + root_tree_oid = subslice[0, commit_hash_length].to_a + .map { |b| sprintf("%02x", b) } + .reverse + .join + + first_parent_start = commit_hash_length + first_parent_slice = subslice[first_parent_start, 4] + first_parent_value = first_parent_slice.to_unsafe.as(UInt32*).value + first_parent = first_parent_value == 0x7000000 ? nil : first_parent_value + + second_parent_start = first_parent_start + 4 + second_parent_slice = subslice[second_parent_start, 4] + second_parent_value = second_parent_slice.to_unsafe.as(UInt32*).value + second_parent = second_parent_value == 0x7000000 ? nil : second_parent_value + + generation_number_start = second_parent_start + 4 + # the generation number is only the higher 30 bits, not all 32 + generation_number_slice = subslice[generation_number_start, 4] + generation_number = generation_number_slice.to_unsafe.as(UInt32*).value & 0xFFFFFFFC + + # the commit time starts including the last two bits of the generation + # number subslice, and all 4 subsequent bytes + commit_time_slice = subslice[generation_number_start, 8] + commit_time = commit_time_slice.to_unsafe.as(UInt64*).value & 0x00000003FFFFFFFF + + CommitData.new( + root_tree_oid, + first_parent, + second_parent, + generation_number, + commit_time, + ) + end + end + + struct CommitData + getter root_tree_oid : String + getter first_parent : UInt32 | Nil + getter second_parent : UInt32 | Nil + getter generation_number : UInt32 + getter commit_time : UInt64 + + def initialize(@root_tree_oid, @first_parent, @second_parent, @generation_number, @commit_time) + end + end + end +end diff --git a/src/thicket/log.cr b/src/thicket/log.cr index 6394c8f..b6cf026 100644 --- a/src/thicket/log.cr +++ b/src/thicket/log.cr @@ -1,6 +1,7 @@ require "file_utils" require "./time_measure" +require "./commit_graph_file" module Thicket class Log @@ -10,7 +11,25 @@ module Thicket @count_parsed = 0 end + def process_experimental + FileUtils.cd(git_working_directory) + + if File.exists?("./.git/objects/info/commit-graph") + puts "Detected single commit-graph file, no chain present." + cgf = CommitGraphFile.new("./.git/objects/info/commit-graph") + else + puts "Detected commit-graph chain." + chain = true + end + end + def print + if @options[:experimental] + process_experimental + + return + end + FileUtils.cd(git_working_directory) `#{git_log_command}`.split("\n").each do |l| puts process_git_log_line(l)