Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement git graphing algorithm #6

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/thicket.cr
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ module Thicket
exit
end

parser.on("-e", "--experimental", "Use true git graph parsing") do |v|
@@options[:experimental] = v
end

parser.on("-d", "--directory=DIRECTORY", "Path to the project directory") do |d|
if d.nil?
STDERR.puts "You must provide a project directory."
Expand Down
221 changes: 221 additions & 0 deletions src/thicket/commit_graph_file.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
# https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
module Thicket
class CommitGraphFile
getter file_path : String

getter version : UInt8
getter hash_version : UInt8
getter num_chunks : UInt8
getter num_base_commit_graphs : UInt8

getter oid_fanout : Array(UInt32)
getter num_commits : UInt32
getter commit_oids : Array(String)
getter commit_data : Array(CommitData)

def initialize(@file_path)
file = File.new(@file_path, "rb")

# Header data
verify_header_signature(file)
@version = file.read_at(4, 1, &.read_byte).not_nil!
@hash_version = file.read_at(5, 1, &.read_byte).not_nil!
@num_chunks = file.read_at(6, 1, &.read_byte).not_nil!
@num_base_commit_graphs = file.read_at(7, 1, &.read_byte).not_nil!

contents = chunk_table_of_contents(file)
pp contents

@oid_fanout = parse_oid_fanout(file, contents)
@num_commits = @oid_fanout.last
puts "Number of commits: #{@num_commits}"

@commit_oids = parse_oid_lookup(file, contents)
puts "First commit: #{@commit_oids.first[0..6]}"
puts " Last commit: #{@commit_oids.last[0..6]}"

@commit_data = parse_commit_data(file, contents, @num_commits)
pp @commit_oids.first
pp @commit_data.first

file.close
end

# The length of a full commit hash in bytes.
def commit_hash_length : UInt32
case @hash_version
when 1 # SHA-1
20.to_u32
else
raise "Unknown hash version identifier: #{@hash_version}"
end
end

private def verify_header_signature(file : File)
signature = file.read_at(0, 4, &.read_string(4))

if signature != "CGPH"
raise "Found unknown commit graph file header signature: #{signature}"
end
end

private def chunk_table_of_contents(file : File) : Array({ signature: String, offset: UInt64 })
contents = [] of { signature: String, offset: UInt64 }

current_byte = 8

loop do
chunk_signature = file.read_at(current_byte, 4, &.read_string(4))
break if chunk_signature == "\0\0\0\0"

chunk_offset_bytes = begin
slice = Bytes.new(8)
file.read_at(current_byte + 4, 8, &.read(slice))
slice.reverse!
slice.to_unsafe.as(UInt64*).value
end

contents << { signature: chunk_signature, offset: chunk_offset_bytes }

current_byte += 12
end

if contents.none? { |c| c[:signature] == "OIDF" }
raise "Unable to find OID Fanout chunk in commit graph file."
end

if contents.none? { |c| c[:signature] == "OIDL" }
raise "Unable to find OID Lookup chunk in commit graph file."
end

if contents.none? { |c| c[:signature] == "CDAT" }
raise "Unable to find Commit Data chunk in commit graph file."
end

contents.sort_by { |c| c[:offset] }
end

private def parse_oid_fanout(
file : File,
contents : Array({ signature: String, offset: UInt64 })
) : Array(UInt32)
oid_fanout_index = contents.index { |c| c[:signature] == "OIDF" }.not_nil!
oid_fanout_offset = contents[oid_fanout_index][:offset]

oid_fanout_length = if contents[oid_fanout_index + 1]?
contents[oid_fanout_index + 1][:offset] - oid_fanout_offset
else
# Exclude trailer hash if necessary
file.size - commit_hash_length - oid_fanout_offset
end

slice = Bytes.new(1024)
file.read_at(oid_fanout_offset.to_i32, oid_fanout_length.to_i32, &.read(slice))
slice.reverse!

slice.each_slice(4)
.map { |integer_slice| integer_slice.to_unsafe.as(UInt32*).value }
.to_a
.reverse
end

private def parse_oid_lookup(
file : File,
contents : Array({ signature: String, offset: UInt64 })
) : Array(String)
oid_lookup_index = contents.index { |c| c[:signature] == "OIDL" }.not_nil!
oid_lookup_offset = contents[oid_lookup_index][:offset]

oid_lookup_length = if contents[oid_lookup_index + 1]?
contents[oid_lookup_index + 1][:offset] - oid_lookup_offset
else
# Exclude trailer hash if necessary
file.size - commit_hash_length - oid_lookup_offset
end

slice = Bytes.new(@num_commits * commit_hash_length)
file.read_at(oid_lookup_offset.to_i32, oid_lookup_length.to_i32, &.read(slice))
slice.reverse!

oids = Array.new(num_commits) do |i|
start = i * commit_hash_length
subslice = slice[start, commit_hash_length]

subslice.to_a.map { |b| sprintf("%02x", b) }.reverse.join
end

oids.reverse!
end

private def parse_commit_data(
file : File,
contents : Array({ signature: String, offset: UInt64 }),
num_commits : UInt32,
) : Array(CommitData)
commit_data_index = contents.index { |c| c[:signature] == "CDAT" }.not_nil!
commit_data_offset = contents[commit_data_index][:offset]

commit_data_length = if contents[commit_data_index + 1]?
contents[commit_data_index + 1][:offset] - commit_data_offset
else
# Exclude trailer hash if necessary
file.size - commit_hash_length - commit_data_offset
end

single_commit_data_size = commit_hash_length + 16
slice = Bytes.new(commit_data_length)
file.read_at(commit_data_offset.to_i32, commit_data_length.to_i32, &.read(slice))
slice.reverse!

puts "Found #{slice.size} bytes of commit data."

Array.new(num_commits) do |i|
subslice = slice[i * single_commit_data_size, single_commit_data_size]

root_tree_oid = subslice[0, commit_hash_length].to_a
.map { |b| sprintf("%02x", b) }
.reverse
.join

first_parent_start = commit_hash_length
first_parent_slice = subslice[first_parent_start, 4]
first_parent_value = first_parent_slice.to_unsafe.as(UInt32*).value
first_parent = first_parent_value == 0x7000000 ? nil : first_parent_value

second_parent_start = first_parent_start + 4
second_parent_slice = subslice[second_parent_start, 4]
second_parent_value = second_parent_slice.to_unsafe.as(UInt32*).value
second_parent = second_parent_value == 0x7000000 ? nil : second_parent_value

generation_number_start = second_parent_start + 4
# the generation number is only the higher 30 bits, not all 32
generation_number_slice = subslice[generation_number_start, 4]
generation_number = generation_number_slice.to_unsafe.as(UInt32*).value & 0xFFFFFFFC

# the commit time starts including the last two bits of the generation
# number subslice, and all 4 subsequent bytes
commit_time_slice = subslice[generation_number_start, 8]
commit_time = commit_time_slice.to_unsafe.as(UInt64*).value & 0x00000003FFFFFFFF

CommitData.new(
root_tree_oid,
first_parent,
second_parent,
generation_number,
commit_time,
)
end
end

struct CommitData
getter root_tree_oid : String
getter first_parent : UInt32 | Nil
getter second_parent : UInt32 | Nil
getter generation_number : UInt32
getter commit_time : UInt64

def initialize(@root_tree_oid, @first_parent, @second_parent, @generation_number, @commit_time)
end
end
end
end
19 changes: 19 additions & 0 deletions src/thicket/log.cr
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require "file_utils"

require "./time_measure"
require "./commit_graph_file"

module Thicket
class Log
Expand All @@ -10,7 +11,25 @@ module Thicket
@count_parsed = 0
end

def process_experimental
FileUtils.cd(git_working_directory)

if File.exists?("./.git/objects/info/commit-graph")
puts "Detected single commit-graph file, no chain present."
cgf = CommitGraphFile.new("./.git/objects/info/commit-graph")
else
puts "Detected commit-graph chain."
chain = true
end
end

def print
if @options[:experimental]
process_experimental

return
end

FileUtils.cd(git_working_directory)
`#{git_log_command}`.split("\n").each do |l|
puts process_git_log_line(l)
Expand Down