Skip to content

[fix] Make Tree roundtrip by storing additional bit of information #1917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/ls-tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ fn run(args: Args) -> anyhow::Result<()> {
for entry in entries {
writeln!(
out,
"{:06o} {:4} {} {}",
*entry.mode,
"{:>6o} {:4} {} {}",
entry.mode,
entry.mode.as_str(),
entry.hash,
entry.path
Expand Down
8 changes: 4 additions & 4 deletions gitoxide-core/src/repository/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ fn write_changes(
} => {
writeln!(out, "A: {}", typed_location(location, entry_mode))?;
writeln!(out, " {}", id.attach(repo).shorten_or_id())?;
writeln!(out, " -> {:o}", entry_mode.0)?;
writeln!(out, " -> {entry_mode:o}")?;
}
gix::diff::tree_with_rewrites::Change::Deletion {
location,
Expand All @@ -59,7 +59,7 @@ fn write_changes(
} => {
writeln!(out, "D: {}", typed_location(location, entry_mode))?;
writeln!(out, " {}", id.attach(repo).shorten_or_id())?;
writeln!(out, " {:o} ->", entry_mode.0)?;
writeln!(out, " {entry_mode:o} ->")?;
}
gix::diff::tree_with_rewrites::Change::Modification {
location,
Expand All @@ -76,7 +76,7 @@ fn write_changes(
id = id.attach(repo).shorten_or_id()
)?;
if previous_entry_mode != entry_mode {
writeln!(out, " {:o} -> {:o}", previous_entry_mode.0, entry_mode.0)?;
writeln!(out, " {previous_entry_mode:o} -> {entry_mode:o}")?;
}
}
gix::diff::tree_with_rewrites::Change::Rewrite {
Expand All @@ -101,7 +101,7 @@ fn write_changes(
id = id.attach(repo).shorten_or_id()
)?;
if source_entry_mode != entry_mode {
writeln!(out, " {:o} -> {:o}", source_entry_mode.0, entry_mode.0)?;
writeln!(out, " {source_entry_mode:o} -> {entry_mode:o}")?;
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion gix-index/src/entry/mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ impl Mode {

impl From<gix_object::tree::EntryMode> for Mode {
fn from(value: gix_object::tree::EntryMode) -> Self {
Self::from_bits_truncate(u32::from(value.0))
let value: u16 = value.value();
Self::from_bits_truncate(u32::from(value))
}
}

Expand Down
4 changes: 2 additions & 2 deletions gix-merge/tests/merge/tree/baseline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ fn parse_conflict_file_info(line: &str) -> Option<(Entry, Side)> {
Entry {
location: path.to_owned(),
id: gix_hash::ObjectId::from_hex(hex_id.as_bytes()).unwrap(),
mode: EntryMode(gix_utils::btoi::to_signed_with_radix::<usize>(oct_mode.as_bytes(), 8).unwrap() as u16),
mode: EntryMode::try_from(oct_mode.as_bytes()).unwrap(),
},
match stage {
"1" => Side::Ancestor,
Expand Down Expand Up @@ -339,7 +339,7 @@ pub fn visualize_tree(
mode = if mode.is_tree() {
"".into()
} else {
format!("{:o}:", mode.0)
format!("{mode:o}:")
}
)
}
Expand Down
156 changes: 115 additions & 41 deletions gix-object/src/tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub mod write;
pub struct Editor<'a> {
/// A way to lookup trees.
find: &'a dyn crate::FindExt,
/// The kind of hashes to produce
/// The kind of hashes to produce>
object_hash: gix_hash::Kind,
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
Expand All @@ -44,18 +44,121 @@ pub struct Editor<'a> {
/// create it by converting [`EntryKind`] into `EntryMode`.
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct EntryMode(pub u16);
pub struct EntryMode {
// Represents the value read from Git, except that "040000" is represented with 0o140000 but
// "40000" is represented with 0o40000.
internal: u16,
}

impl TryFrom<u32> for tree::EntryMode {
type Error = u32;
fn try_from(mode: u32) -> Result<Self, Self::Error> {
Ok(match mode {
0o40000 | 0o120000 | 0o160000 => EntryMode { internal: mode as u16 },
blob_mode if blob_mode & 0o100000 == 0o100000 => EntryMode { internal: mode as u16 },
_ => return Err(mode),
})
}
}

impl EntryMode {
/// Expose the value as u16 (lossy, unlike the internal representation that is hidden).
pub const fn value(self) -> u16 {
// Demangle the hack: In the case where the second leftmost octet is 4 (Tree), the leftmost bit is
// there to represent whether the bytes representation should have 5 or 6 octets.
if self.internal & IFMT == 0o140000 {
0o040000
} else {
self.internal
}
}

/// Return the representation as used in the git internal format, which is octal and written
/// to the `backing` buffer. The respective sub-slice that was written to is returned.
pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
if self.internal == 0 {
std::slice::from_ref(&b'0')
} else {
for (idx, backing_octet) in backing.iter_mut().enumerate() {
let bit_pos = 3 /* because base 8 and 2^3 == 8*/ * (6 - idx - 1);
let oct_mask = 0b111 << bit_pos;
let digit = (self.internal & oct_mask) >> bit_pos;
*backing_octet = b'0' + digit as u8;
}
// Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
if backing[1] == b'4' {
if backing[0] == b'1' {
backing[0] = b'0';
&backing[0..6]
} else {
&backing[1..6]
}
} else {
&backing[0..6]
}
}
.into()
}

/// Construct an EntryMode from bytes represented as in the git internal format
/// Return the mode and the remainder of the bytes.
pub(crate) fn extract_from_bytes(i: &[u8]) -> Option<(Self, &'_ [u8])> {
let mut mode = 0;
let mut idx = 0;
let mut space_pos = 0;
if i.is_empty() {
return None;
}
// const fn, this is why we can't have nice things (like `.iter().any()`).
while idx < i.len() {
let b = i[idx];
// Delimiter, return what we got
if b == b' ' {
space_pos = idx;
break;
}
// Not a pure octal input.
// Performance matters here, so `!(b'0'..=b'7').contains(&b)` won't do.
#[allow(clippy::manual_range_contains)]
if b < b'0' || b > b'7' {
return None;
}
// More than 6 octal digits we must have hit the delimiter or the input was malformed.
if idx > 6 {
return None;
}
mode = (mode << 3) + (b - b'0') as u16;
idx += 1;
}
// Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
if mode == 0o040000 && i[0] == b'0' {
mode += 0o100000;
}
Some((Self { internal: mode }, &i[(space_pos + 1)..]))
}

/// Construct an EntryMode from bytes represented as in the git internal format.
pub fn from_bytes(i: &[u8]) -> Option<Self> {
Self::extract_from_bytes(i).map(|(mode, _rest)| mode)
}
}

impl std::fmt::Debug for EntryMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "EntryMode({:#o})", self.0)
write!(f, "EntryMode(0o{})", self.as_bytes(&mut Default::default()))
}
}

impl std::fmt::Octal for EntryMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_bytes(&mut Default::default()))
}
}

/// A discretized version of ideal and valid values for entry modes.
///
/// Note that even though it can represent every valid [mode](EntryMode), it might
/// loose information due to that as well.
/// lose information due to that as well.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
#[repr(u16)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
Expand All @@ -74,7 +177,7 @@ pub enum EntryKind {

impl From<EntryKind> for EntryMode {
fn from(value: EntryKind) -> Self {
EntryMode(value as u16)
EntryMode { internal: value as u16 }
}
}

Expand All @@ -100,22 +203,14 @@ impl EntryKind {
}
}

impl std::ops::Deref for EntryMode {
type Target = u16;

fn deref(&self) -> &Self::Target {
&self.0
}
}

const IFMT: u16 = 0o170000;

impl EntryMode {
/// Discretize the raw mode into an enum with well-known state while dropping unnecessary details.
pub const fn kind(&self) -> EntryKind {
let etype = self.0 & IFMT;
let etype = self.value() & IFMT;
if etype == 0o100000 {
if self.0 & 0o000100 == 0o000100 {
if self.value() & 0o000100 == 0o000100 {
EntryKind::BlobExecutable
} else {
EntryKind::Blob
Expand All @@ -131,27 +226,27 @@ impl EntryMode {

/// Return true if this entry mode represents a Tree/directory
pub const fn is_tree(&self) -> bool {
self.0 & IFMT == EntryKind::Tree as u16
self.value() & IFMT == EntryKind::Tree as u16
}

/// Return true if this entry mode represents the commit of a submodule.
pub const fn is_commit(&self) -> bool {
self.0 & IFMT == EntryKind::Commit as u16
self.value() & IFMT == EntryKind::Commit as u16
}

/// Return true if this entry mode represents a symbolic link
pub const fn is_link(&self) -> bool {
self.0 & IFMT == EntryKind::Link as u16
self.value() & IFMT == EntryKind::Link as u16
}

/// Return true if this entry mode represents anything BUT Tree/directory
pub const fn is_no_tree(&self) -> bool {
self.0 & IFMT != EntryKind::Tree as u16
self.value() & IFMT != EntryKind::Tree as u16
}

/// Return true if the entry is any kind of blob.
pub const fn is_blob(&self) -> bool {
self.0 & IFMT == 0o100000
self.value() & IFMT == 0o100000
}

/// Return true if the entry is an executable blob.
Expand All @@ -178,27 +273,6 @@ impl EntryMode {
Commit => "commit",
}
}

/// Return the representation as used in the git internal format, which is octal and written
/// to the `backing` buffer. The respective sub-slice that was written to is returned.
pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
if self.0 == 0 {
std::slice::from_ref(&b'0')
} else {
let mut nb = 0;
let mut n = self.0;
while n > 0 {
let remainder = (n % 8) as u8;
backing[nb] = b'0' + remainder;
n /= 8;
nb += 1;
}
let res = &mut backing[..nb];
res.reverse();
res
}
.into()
}
}

impl TreeRef<'_> {
Expand Down
42 changes: 3 additions & 39 deletions gix-object/src/tree/ref_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,54 +171,18 @@ impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
type Error = &'a [u8];

fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> {
mode_from_decimal(mode)
.map(|(mode, _rest)| tree::EntryMode(mode as u16))
.ok_or(mode)
}
}

fn mode_from_decimal(i: &[u8]) -> Option<(u32, &[u8])> {
let mut mode = 0u32;
let mut spacer_pos = 1;
for b in i.iter().take_while(|b| **b != b' ') {
if *b < b'0' || *b > b'7' {
return None;
}
mode = (mode << 3) + u32::from(b - b'0');
spacer_pos += 1;
}
if i.len() < spacer_pos {
return None;
}
let (_, i) = i.split_at(spacer_pos);
Some((mode, i))
}

impl TryFrom<u32> for tree::EntryMode {
type Error = u32;

fn try_from(mode: u32) -> Result<Self, Self::Error> {
Ok(match mode {
0o40000 | 0o120000 | 0o160000 => tree::EntryMode(mode as u16),
blob_mode if blob_mode & 0o100000 == 0o100000 => tree::EntryMode(mode as u16),
_ => return Err(mode),
})
tree::EntryMode::from_bytes(mode).ok_or(mode)
}
}

mod decode {
use bstr::ByteSlice;
use winnow::{error::ParserError, prelude::*};

use crate::{
tree,
tree::{ref_iter::mode_from_decimal, EntryRef},
TreeRef,
};
use crate::{tree, tree::EntryRef, TreeRef};

pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> {
let (mode, i) = mode_from_decimal(i)?;
let mode = tree::EntryMode::try_from(mode).ok()?;
let (mode, i) = tree::EntryMode::extract_from_bytes(i)?;
let (filename, i) = i.split_at(i.find_byte(0)?);
let i = &i[1..];
const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot
Expand Down
Loading
Loading