|
| 1 | +use std::{iter::Peekable, path::PathBuf}; |
| 2 | + |
| 3 | +use gix_diff::blob::{ |
| 4 | + intern::TokenSource, |
| 5 | + unified_diff::{ConsumeHunk, ContextSize, HunkHeader}, |
| 6 | + Algorithm, UnifiedDiff, |
| 7 | +}; |
| 8 | +use gix_object::bstr::{self, BString, ByteVec}; |
| 9 | + |
| 10 | +#[derive(Debug, PartialEq)] |
| 11 | +struct DiffHunk { |
| 12 | + header: HunkHeader, |
| 13 | + lines: BString, |
| 14 | +} |
| 15 | + |
| 16 | +struct DiffHunkRecorder { |
| 17 | + inner: Vec<DiffHunk>, |
| 18 | +} |
| 19 | + |
| 20 | +impl DiffHunkRecorder { |
| 21 | + fn new() -> Self { |
| 22 | + Self { inner: Vec::new() } |
| 23 | + } |
| 24 | +} |
| 25 | + |
| 26 | +impl ConsumeHunk for DiffHunkRecorder { |
| 27 | + type Out = Vec<DiffHunk>; |
| 28 | + |
| 29 | + fn consume_hunk( |
| 30 | + &mut self, |
| 31 | + header: HunkHeader, |
| 32 | + lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])], |
| 33 | + ) -> std::io::Result<()> { |
| 34 | + let mut buf = Vec::new(); |
| 35 | + |
| 36 | + for &(kind, line) in lines { |
| 37 | + buf.push(kind.to_prefix() as u8); |
| 38 | + buf.extend_from_slice(line); |
| 39 | + buf.push(b'\n'); |
| 40 | + } |
| 41 | + |
| 42 | + let diff_hunk = DiffHunk { |
| 43 | + header, |
| 44 | + lines: buf.into(), |
| 45 | + }; |
| 46 | + |
| 47 | + self.inner.push(diff_hunk); |
| 48 | + |
| 49 | + Ok(()) |
| 50 | + } |
| 51 | + |
| 52 | + fn finish(self) -> Self::Out { |
| 53 | + self.inner |
| 54 | + } |
| 55 | +} |
| 56 | + |
| 57 | +struct Baseline<'a> { |
| 58 | + lines: Peekable<bstr::Lines<'a>>, |
| 59 | +} |
| 60 | + |
| 61 | +mod baseline { |
| 62 | + use std::path::Path; |
| 63 | + |
| 64 | + use gix_diff::blob::unified_diff::HunkHeader; |
| 65 | + use gix_object::bstr::ByteSlice; |
| 66 | + |
| 67 | + use super::{Baseline, DiffHunk}; |
| 68 | + |
| 69 | + static START_OF_HEADER: &[u8; 4] = b"@@ -"; |
| 70 | + |
| 71 | + impl Baseline<'_> { |
| 72 | + pub fn collect(baseline_path: impl AsRef<Path>) -> std::io::Result<Vec<DiffHunk>> { |
| 73 | + let content = std::fs::read(baseline_path)?; |
| 74 | + |
| 75 | + let mut baseline = Baseline { |
| 76 | + lines: content.lines().peekable(), |
| 77 | + }; |
| 78 | + |
| 79 | + baseline.skip_header(); |
| 80 | + |
| 81 | + Ok(baseline.collect()) |
| 82 | + } |
| 83 | + |
| 84 | + fn skip_header(&mut self) { |
| 85 | + // diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb |
| 86 | + // index ccccccc..ddddddd 100644 |
| 87 | + // --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa |
| 88 | + // +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb |
| 89 | + |
| 90 | + let line = self.lines.next().expect("line to be present"); |
| 91 | + assert!(line.starts_with(b"diff --git ")); |
| 92 | + |
| 93 | + let line = self.lines.next().expect("line to be present"); |
| 94 | + assert!(line.starts_with(b"index ")); |
| 95 | + |
| 96 | + let line = self.lines.next().expect("line to be present"); |
| 97 | + assert!(line.starts_with(b"--- ")); |
| 98 | + |
| 99 | + let line = self.lines.next().expect("line to be present"); |
| 100 | + assert!(line.starts_with(b"+++ ")); |
| 101 | + } |
| 102 | + |
| 103 | + /// Parse diff hunk headers that conform to the unified diff hunk header format. |
| 104 | + /// |
| 105 | + /// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This |
| 106 | + /// allows us to split the input on ` ` and `,` only. |
| 107 | + /// |
| 108 | + /// @@ -18,6 +18,7 @@ abc def ghi |
| 109 | + /// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@ |
| 110 | + fn parse_hunk_header(&self, line: &[u8]) -> gix_testtools::Result<HunkHeader> { |
| 111 | + let Some(line) = line.strip_prefix(START_OF_HEADER) else { |
| 112 | + todo!() |
| 113 | + }; |
| 114 | + |
| 115 | + let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect(); |
| 116 | + let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else { |
| 117 | + todo!() |
| 118 | + }; |
| 119 | + |
| 120 | + Ok(HunkHeader { |
| 121 | + before_hunk_start: self.parse_number(before_hunk_start), |
| 122 | + before_hunk_len: self.parse_number(before_hunk_len), |
| 123 | + after_hunk_start: self.parse_number(after_hunk_start), |
| 124 | + after_hunk_len: self.parse_number(after_hunk_len), |
| 125 | + }) |
| 126 | + } |
| 127 | + |
| 128 | + fn parse_number(&self, bytes: &[u8]) -> u32 { |
| 129 | + bytes |
| 130 | + .to_str() |
| 131 | + .expect("to be a valid UTF-8 string") |
| 132 | + .parse::<u32>() |
| 133 | + .expect("to be a number") |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + impl Iterator for Baseline<'_> { |
| 138 | + type Item = DiffHunk; |
| 139 | + |
| 140 | + fn next(&mut self) -> Option<Self::Item> { |
| 141 | + let mut hunk_header = None; |
| 142 | + let mut hunk_lines = Vec::new(); |
| 143 | + |
| 144 | + while let Some(line) = self.lines.next() { |
| 145 | + if line.starts_with(START_OF_HEADER) { |
| 146 | + assert!(hunk_header.is_none(), "should not overwrite existing hunk_header"); |
| 147 | + hunk_header = self.parse_hunk_header(line).ok(); |
| 148 | + |
| 149 | + continue; |
| 150 | + } |
| 151 | + |
| 152 | + match line[0] { |
| 153 | + b' ' | b'+' | b'-' => { |
| 154 | + hunk_lines.extend_from_slice(line); |
| 155 | + hunk_lines.push(b'\n'); |
| 156 | + } |
| 157 | + _ => todo!(), |
| 158 | + } |
| 159 | + |
| 160 | + match self.lines.peek() { |
| 161 | + Some(next_line) if next_line.starts_with(START_OF_HEADER) => break, |
| 162 | + None => break, |
| 163 | + _ => {} |
| 164 | + } |
| 165 | + } |
| 166 | + |
| 167 | + hunk_header.map(|hunk_header| DiffHunk { |
| 168 | + header: hunk_header, |
| 169 | + lines: hunk_lines.into(), |
| 170 | + }) |
| 171 | + } |
| 172 | + } |
| 173 | +} |
| 174 | + |
| 175 | +#[test] |
| 176 | +fn sliders() -> gix_testtools::Result { |
| 177 | + let worktree_path = fixture_path()?; |
| 178 | + let asset_dir = worktree_path.join("assets"); |
| 179 | + |
| 180 | + let dir = std::fs::read_dir(&worktree_path)?; |
| 181 | + |
| 182 | + for entry in dir { |
| 183 | + let entry = entry?; |
| 184 | + let file_name = entry.file_name().into_string().expect("to be string"); |
| 185 | + |
| 186 | + if !file_name.ends_with(".baseline") { |
| 187 | + continue; |
| 188 | + } |
| 189 | + |
| 190 | + let parts: Vec<_> = file_name.split('.').collect(); |
| 191 | + let [name, algorithm, ..] = parts[..] else { |
| 192 | + unimplemented!() |
| 193 | + }; |
| 194 | + let algorithm = match algorithm { |
| 195 | + "myers" => Algorithm::Myers, |
| 196 | + "histogram" => Algorithm::Histogram, |
| 197 | + _ => unimplemented!(), |
| 198 | + }; |
| 199 | + |
| 200 | + let parts: Vec<_> = name.split('-').collect(); |
| 201 | + let [old_blob_id, new_blob_id] = parts[..] else { |
| 202 | + unimplemented!(); |
| 203 | + }; |
| 204 | + |
| 205 | + let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?; |
| 206 | + let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?; |
| 207 | + |
| 208 | + let interner = gix_diff::blob::intern::InternedInput::new( |
| 209 | + tokens_for_diffing(old_data.as_slice()), |
| 210 | + tokens_for_diffing(new_data.as_slice()), |
| 211 | + ); |
| 212 | + |
| 213 | + let actual = gix_diff::blob::diff( |
| 214 | + algorithm, |
| 215 | + &interner, |
| 216 | + UnifiedDiff::new(&interner, DiffHunkRecorder::new(), ContextSize::symmetrical(3)), |
| 217 | + )?; |
| 218 | + |
| 219 | + let baseline_path = worktree_path.join(file_name); |
| 220 | + let baseline = Baseline::collect(baseline_path).unwrap(); |
| 221 | + |
| 222 | + let actual = actual |
| 223 | + .iter() |
| 224 | + .fold(BString::default(), |mut acc, diff_hunk| { |
| 225 | + acc.push_str(diff_hunk.header.to_string().as_str()); |
| 226 | + acc.push(b'\n'); |
| 227 | + |
| 228 | + acc.extend_from_slice(&diff_hunk.lines); |
| 229 | + |
| 230 | + acc |
| 231 | + }) |
| 232 | + .to_string(); |
| 233 | + |
| 234 | + let baseline = baseline |
| 235 | + .iter() |
| 236 | + .fold(BString::default(), |mut acc, diff_hunk| { |
| 237 | + acc.push_str(diff_hunk.header.to_string().as_str()); |
| 238 | + acc.push(b'\n'); |
| 239 | + |
| 240 | + acc.extend_from_slice(&diff_hunk.lines); |
| 241 | + |
| 242 | + acc |
| 243 | + }) |
| 244 | + .to_string(); |
| 245 | + |
| 246 | + pretty_assertions::assert_eq!(actual, baseline); |
| 247 | + } |
| 248 | + |
| 249 | + Ok(()) |
| 250 | +} |
| 251 | + |
| 252 | +fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> { |
| 253 | + gix_diff::blob::sources::byte_lines(data) |
| 254 | +} |
| 255 | + |
| 256 | +fn fixture_path() -> gix_testtools::Result<PathBuf> { |
| 257 | + gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh") |
| 258 | +} |
0 commit comments