forked from mufeedvh/code2prompt
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
82 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
//! This module contains util functions | ||
/// Removes a UTF‑8 Byte Order Mark (BOM) from the beginning of a byte slice if present. | ||
/// | ||
/// The UTF‑8 BOM is the byte sequence `[0xEF, 0xBB, 0xBF]`. This function checks whether | ||
/// the provided slice starts with these bytes and, if so, returns a subslice without them. | ||
/// Otherwise, it returns the original slice. | ||
pub fn strip_utf8_bom(data: &[u8]) -> &[u8] { | ||
const BOM: &[u8] = &[0xEF, 0xBB, 0xBF]; | ||
if data.starts_with(BOM) { | ||
&data[BOM.len()..] | ||
} else { | ||
data | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
use code2prompt::util::strip_utf8_bom; | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn test_strip_utf8_bom_when_present() { | ||
let input = b"\xEF\xBB\xBFHello, world!"; | ||
let expected = b"Hello, world!"; | ||
let output = strip_utf8_bom(input); | ||
assert_eq!( | ||
output, expected, | ||
"BOM should be stripped from the beginning of the input." | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_strip_utf8_bom_when_not_present() { | ||
let input = b"Hello, world!"; | ||
let output = strip_utf8_bom(input); | ||
assert_eq!( | ||
output, input, | ||
"Input without a BOM should remain unchanged." | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_strip_utf8_bom_empty_input() { | ||
let input = b""; | ||
let output = strip_utf8_bom(input); | ||
assert_eq!( | ||
output, input, | ||
"An empty input should return an empty output." | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_strip_utf8_bom_only_bom() { | ||
let input = b"\xEF\xBB\xBF"; | ||
let expected = b""; | ||
let output = strip_utf8_bom(input); | ||
assert_eq!( | ||
output, expected, | ||
"Input that is only a BOM should return an empty slice." | ||
); | ||
} | ||
} |