From 501f6f18b4e3486ecb39ae220f3315c7eb52998d Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Sat, 4 Nov 2023 19:21:03 +0900 Subject: [PATCH] Move rev_diff_start_iter and rev_diff_iter_next to Rust The code is a little horrible, but until every use of rev_chunk moves to Rust, that's what we'll live with. --- src/build.mk | 1 - src/cinnabar-fast-import.c | 50 +++++++++--------- src/hg-bundle.c | 48 ----------------- src/hg-bundle.h | 6 +-- src/hg_bundle.rs | 103 +++++++++++++++++++++++++------------ src/libcinnabar.rs | 1 + src/store.rs | 18 +++---- 7 files changed, 109 insertions(+), 118 deletions(-) delete mode 100644 src/hg-bundle.c diff --git a/src/build.mk b/src/build.mk index 7dd3c48c7..d9ab385fb 100644 --- a/src/build.mk +++ b/src/build.mk @@ -61,7 +61,6 @@ all:: git-cinnabar$X CINNABAR_OBJECTS += cinnabar-fast-import.o CINNABAR_OBJECTS += cinnabar-helper.o CINNABAR_OBJECTS += cinnabar-notes.o -CINNABAR_OBJECTS += hg-bundle.o CINNABAR_OBJECTS += hg-connect-stdio.o CINNABAR_OBJECTS += hg-data.o CINNABAR_OBJECTS += mingw.o diff --git a/src/cinnabar-fast-import.c b/src/cinnabar-fast-import.c index 2919d037e..4a7f71f51 100644 --- a/src/cinnabar-fast-import.c +++ b/src/cinnabar-fast-import.c @@ -524,7 +524,8 @@ void store_file(struct rev_chunk *chunk) static struct hg_file last_file; struct hg_file file; struct strbuf data = STRBUF_INIT; - struct rev_diff_part diff; + struct strslice diff; + struct rev_diff_part part; size_t last_end = 0; if (is_empty_hg_file(chunk->node)) @@ -539,15 +540,15 @@ void store_file(struct rev_chunk *chunk) } rev_diff_start_iter(&diff, chunk); - while (rev_diff_iter_next(&diff)) { - if (diff.start > last_file.file.len || diff.start < last_end) + while (rev_diff_iter_next(&diff, &part)) { + if (part.start > last_file.file.len || part.start < last_end) die("Malformed file chunk for %s", hg_oid_to_hex(chunk->node)); strbuf_add(&data, last_file.file.buf + last_end, - diff.start - last_end); - strbuf_addslice(&data, diff.data); + part.start - last_end); + strbuf_addslice(&data, part.data); - last_end = diff.end; + last_end = part.end; } if (last_file.file.len < last_end) @@ -643,7 +644,8 @@ void store_manifest(struct rev_chunk *chunk, struct strbuf path = STRBUF_INIT; struct strbuf data = STRBUF_INIT; struct strslice_mut manifest = stored_manifest; - struct rev_diff_part diff; + struct strslice diff; + struct rev_diff_part part; size_t last_end = 0; struct strslice slice; struct manifest_line line; @@ -686,35 +688,35 @@ void store_manifest(struct rev_chunk *chunk, } rev_diff_start_iter(&diff, chunk); - while (rev_diff_iter_next(&diff)) { + while (rev_diff_iter_next(&diff, &part)) { size_t len; - if (diff.start > last_manifest_content.len || - diff.start < last_end || diff.start > diff.end) + if (part.start > last_manifest_content.len || + part.start < last_end || part.start > part.end) goto malformed; - len = diff.start - last_end; + len = part.start - last_end; strslice_copy( strslice_slice(last_manifest_content, last_end, len), - strslice_mut_slice(manifest, 0, diff.start - last_end)); + strslice_mut_slice(manifest, 0, part.start - last_end)); manifest = strslice_mut_slice(manifest, len, SIZE_MAX); - strslice_copy(diff.data, - strslice_mut_slice(manifest, 0, diff.data.len)); - manifest = strslice_mut_slice(manifest, diff.data.len, SIZE_MAX); + strslice_copy(part.data, + strslice_mut_slice(manifest, 0, part.data.len)); + manifest = strslice_mut_slice(manifest, part.data.len, SIZE_MAX); - last_end = diff.end; + last_end = part.end; // We assume manifest diffs are line-based. - if (diff.start > 0 && - last_manifest_content.buf[diff.start - 1] != '\n') + if (part.start > 0 && + last_manifest_content.buf[part.start - 1] != '\n') goto malformed; - if (diff.end > 0 && - last_manifest_content.buf[diff.end - 1] != '\n') + if (part.end > 0 && + last_manifest_content.buf[part.end - 1] != '\n') goto malformed; // TODO: Avoid a remove+add cycle for same-file modifications. // Process removed files. - slice = strslice_slice(last_manifest_content, diff.start, - diff.end - diff.start); + slice = strslice_slice(last_manifest_content, part.start, + part.end - part.start); while (split_manifest_line(&slice, &line) == 0) { manifest_metadata_path(&path, &line.path); tree_content_remove(&last_manifest->branch_tree, @@ -736,9 +738,9 @@ void store_manifest(struct rev_chunk *chunk, } rev_diff_start_iter(&diff, chunk); - while (rev_diff_iter_next(&diff)) { + while (rev_diff_iter_next(&diff, &part)) { // Process added files. - slice = diff.data; + slice = part.data; while (split_manifest_line(&slice, &line) == 0) { uint16_t mode; struct object_id file_node; diff --git a/src/hg-bundle.c b/src/hg-bundle.c deleted file mode 100644 index 3be255810..000000000 --- a/src/hg-bundle.c +++ /dev/null @@ -1,48 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "git-compat-util.h" -#include "http.h" -#include "hg-bundle.h" -#include - -void rev_diff_start_iter(struct rev_diff_part *iterator, - struct rev_chunk *chunk) -{ - iterator->start = 0; - iterator->end = 0; - iterator->data.len = 0; - iterator->data.buf = NULL; - iterator->chunk = chunk; -} - -int rev_diff_iter_next(struct rev_diff_part *iterator) -{ - const char *part; - const char *chunk_end = iterator->chunk->raw.buf + - iterator->chunk->raw.len; - - if (iterator->data.buf == NULL) - part = (char *) iterator->chunk->diff_data; - else - part = iterator->data.buf + - iterator->data.len; - - if (part == chunk_end) - return 0; - - if (part > chunk_end - 12) - die("Invalid revchunk"); - - iterator->start = get_be32(part); - iterator->end = get_be32(part + 4); - iterator->data.len = get_be32(part + 8); - iterator->data.buf = (char *) part + 12; - - if (iterator->data.buf + iterator->data.len > chunk_end || - iterator->start > iterator->end) - die("Invalid revchunk"); - - return 1; -} diff --git a/src/hg-bundle.h b/src/hg-bundle.h index b193a2f4c..97d186754 100644 --- a/src/hg-bundle.h +++ b/src/hg-bundle.h @@ -20,19 +20,17 @@ struct rev_chunk { // Only in changegroupv2 const struct hg_object_id *delta_node; /* const struct hg_object_id *changeset; // We actually don't care about this */ - const unsigned char *diff_data; }; struct rev_diff_part { size_t start; size_t end; struct strslice data; - struct rev_chunk *chunk; }; -void rev_diff_start_iter(struct rev_diff_part *iterator, +void rev_diff_start_iter(struct strslice *iterator, struct rev_chunk *chunk); -int rev_diff_iter_next(struct rev_diff_part *iterator); +int rev_diff_iter_next(struct strslice *iterator, struct rev_diff_part *part); #endif diff --git a/src/hg_bundle.rs b/src/hg_bundle.rs index 63c38e2cf..215375fd4 100644 --- a/src/hg_bundle.rs +++ b/src/hg_bundle.rs @@ -8,13 +8,13 @@ use std::fmt::Display; use std::fs::File; use std::io::{self, copy, Chain, Cursor, ErrorKind, Read, Write}; use std::iter::repeat; -use std::mem::{self, MaybeUninit}; +use std::mem; use std::os::raw::c_int; -use std::ptr::NonNull; +use std::ptr::{self, NonNull}; use std::rc::Rc; use std::str::FromStr; -use bstr::ByteSlice; +use bstr::{BStr, ByteSlice}; use byteorder::{BigEndian, ByteOrder, ReadBytesExt, WriteBytesExt}; use bzip2::read::BzDecoder; use bzip2::write::BzEncoder; @@ -43,10 +43,25 @@ use crate::util::{FromBytes, ImmutBString, ReadExt, SliceExt, ToBoxed}; use crate::xdiff::textdiff; use crate::{get_changes, HELPER_LOCK}; -extern "C" { - fn rev_diff_start_iter(iterator: *mut rev_diff_part, chunk: *const rev_chunk); +#[no_mangle] +pub unsafe extern "C" fn rev_diff_start_iter(iterator: *mut strslice, chunk: *const rev_chunk) { + ptr::write(iterator, chunk.as_ref().unwrap().iter_diff().0.into()); +} - fn rev_diff_iter_next(iterator: *mut rev_diff_part) -> c_int; +#[no_mangle] +pub unsafe extern "C" fn rev_diff_iter_next( + iterator: *mut strslice, + part: *mut rev_diff_part, +) -> c_int { + let mut diff_iter = RevDiffIter(iterator.as_mut().unwrap().as_bytes()); + let next = diff_iter.next(); + ptr::write(iterator, diff_iter.0.into()); + if let Some(p) = next { + ptr::write(part, mem::transmute(p.0)); + 1 + } else { + 0 + } } #[allow(non_camel_case_types)] @@ -57,7 +72,7 @@ pub struct rev_chunk { parent1: NonNull, parent2: NonNull, delta_node: NonNull, - diff_data: NonNull, + data_offset: usize, } #[allow(non_camel_case_types)] @@ -66,7 +81,6 @@ pub struct rev_diff_part<'a> { start: usize, end: usize, data: strslice<'a>, - chunk: &'a rev_chunk, } impl rev_chunk { @@ -87,11 +101,7 @@ impl rev_chunk { } pub fn iter_diff(&self) -> RevDiffIter { - unsafe { - let mut part = MaybeUninit::zeroed(); - rev_diff_start_iter(part.as_mut_ptr(), self); - RevDiffIter(part.assume_init()) - } + RevDiffIter(&self.raw.as_bytes()[self.data_offset..]) } } @@ -194,36 +204,65 @@ impl Iterator for RevChunkIter { } else { NonNull::new_unchecked(buf.as_ptr().add(60)).cast() }; - let diff_data = NonNull::new_unchecked(buf.as_ptr().add(data_offset)).cast(); rev_chunk { raw: buf, node, parent1, parent2, delta_node, - diff_data, + data_offset, } }) } } -pub struct RevDiffIter<'a>(rev_diff_part<'a>); +#[repr(transparent)] +pub struct RevDiffIter<'a>(&'a [u8]); -pub struct RevDiffPart { - pub start: usize, - pub end: usize, - pub data: ImmutBString, -} +#[repr(transparent)] +pub struct RevDiffPart<'a>(rev_diff_part<'a>); impl<'a> Iterator for RevDiffIter<'a> { - type Item = RevDiffPart; + type Item = RevDiffPart<'a>; - fn next(&mut self) -> Option { - unsafe { rev_diff_iter_next(&mut self.0) != 0 }.then(|| RevDiffPart { - start: self.0.start, - end: self.0.end, - data: self.0.data.as_bytes().to_vec().into_boxed_slice(), - }) + fn next(&mut self) -> Option> { + let slice = self.0.as_bytes(); + if slice.is_empty() { + return None; + } + if slice.len() < 12 { + die!("Invalid revchunk"); + } + let start = usize::try_from(BigEndian::read_u32(&slice[0..4])).unwrap(); + let end = usize::try_from(BigEndian::read_u32(&slice[4..8])).unwrap(); + let len = usize::try_from(BigEndian::read_u32(&slice[8..12])).unwrap(); + let slice = &slice[12..]; + if slice.len() < len { + die!("Invalid revchunk"); + } + let (data, slice) = slice.split_at(len); + unsafe { + ptr::write(&mut self.0 as *mut _, slice); + } + Some(RevDiffPart(rev_diff_part { + start, + end, + data: data.into(), + })) + } +} + +impl<'a> RevDiffPart<'a> { + pub fn start(&self) -> usize { + self.0.start + } + + pub fn end(&self) -> usize { + self.0.end + } + + pub fn data(&self) -> &BStr { + self.0.data.as_bytes().as_bstr() } } @@ -837,12 +876,12 @@ impl BundleConnection { let mut last_end = 0; let mut raw_changeset = Vec::new(); for diff in chunk.iter_diff() { - if diff.start > reference_cs.len() || diff.start < last_end { + if diff.start() > reference_cs.len() || diff.start() < last_end { die!("Malformed changeset chunk for {node}"); } - raw_changeset.extend_from_slice(&reference_cs[last_end..diff.start]); - raw_changeset.extend_from_slice(&diff.data); - last_end = diff.end; + raw_changeset.extend_from_slice(&reference_cs[last_end..diff.start()]); + raw_changeset.extend_from_slice(diff.data()); + last_end = diff.end(); } if reference_cs.len() < last_end { die!("Malformed changeset chunk for {node}"); diff --git a/src/libcinnabar.rs b/src/libcinnabar.rs index 97c58eb6c..6949b52c4 100644 --- a/src/libcinnabar.rs +++ b/src/libcinnabar.rs @@ -12,6 +12,7 @@ use crate::libgit::{child_process, object_id}; use crate::oid::{Abbrev, ObjectId}; #[allow(non_camel_case_types)] +#[derive(Clone, Debug)] #[repr(C)] pub struct strslice<'a> { len: usize, diff --git a/src/store.rs b/src/store.rs index b2dd20e16..a01d3bc43 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1430,12 +1430,12 @@ pub fn store_changegroup(input: R, version: u8) { let mut last_end = 0; let mut mn_size = 0; for diff in manifest.iter_diff() { - if diff.start > reference_mn.len() || diff.start < last_end { + if diff.start() > reference_mn.len() || diff.start() < last_end { die!("Malformed changeset chunk for {mid}"); } - mn_size += diff.start - last_end; - mn_size += diff.data.len(); - last_end = diff.end; + mn_size += diff.start() - last_end; + mn_size += diff.data().len(); + last_end = diff.end(); } if reference_mn.len() < last_end { die!("Malformed changeset chunk for {mid}"); @@ -1500,7 +1500,7 @@ pub fn store_changegroup(input: R, version: u8) { } } else if parents == null_parents { if let Some(diff) = file.iter_diff().next() { - if diff.start == 0 && diff.data.get(..2) == Some(b"\x01\n") { + if diff.start() == 0 && diff.data().get(..2) == Some(b"\x01\n") { stored_files.insert(node, parents); } } @@ -1540,12 +1540,12 @@ pub fn store_changegroup(input: R, version: u8) { let mut last_end = 0; let mut raw_changeset = Vec::new(); for diff in changeset.iter_diff() { - if diff.start > reference_cs.len() || diff.start < last_end { + if diff.start() > reference_cs.len() || diff.start() < last_end { die!("Malformed changeset chunk for {changeset_id}"); } - raw_changeset.extend_from_slice(&reference_cs[last_end..diff.start]); - raw_changeset.extend_from_slice(&diff.data); - last_end = diff.end; + raw_changeset.extend_from_slice(&reference_cs[last_end..diff.start()]); + raw_changeset.extend_from_slice(diff.data()); + last_end = diff.end(); } if reference_cs.len() < last_end { die!("Malformed changeset chunk for {changeset_id}");