Skip to content

Commit

Permalink
Refactor upcase/downcase char
Browse files Browse the repository at this point in the history
  • Loading branch information
CeleritasCelery committed Jan 10, 2025
1 parent 51820bb commit bc24753
Showing 1 changed file with 45 additions and 68 deletions.
113 changes: 45 additions & 68 deletions src/casefiddle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,31 @@ use text_buffer::Buffer as TextBuffer;
fn capitalize<'ob>(string_or_char: StringOrChar<'ob>, cx: &'ob Context<'ob>) -> Object<'ob> {
match string_or_char {
StringOrChar::String(s) => cx.add(casify_string(s, CaseMode::Capitalize)),
StringOrChar::Char(c) => cx.add(upcase_char(c)),
StringOrChar::Char(c) => cx.add(casify_char(c, char::to_uppercase)),
}
}

#[defun]
fn upcase<'ob>(string_or_char: StringOrChar<'ob>, cx: &'ob Context<'ob>) -> Object<'ob> {
match string_or_char {
StringOrChar::String(s) => cx.add(casify_string(s, CaseMode::Upcase)),
StringOrChar::Char(c) => cx.add(upcase_char(c)),
StringOrChar::Char(c) => cx.add(casify_char(c, char::to_uppercase)),
}
}

#[defun]
fn downcase<'ob>(string_or_char: StringOrChar<'ob>, cx: &'ob Context<'ob>) -> Object<'ob> {
match string_or_char {
StringOrChar::String(s) => cx.add(casify_string(s, CaseMode::Downcase)),
StringOrChar::Char(c) => cx.add(downcase_char(c)),
StringOrChar::Char(c) => cx.add(casify_char(c, char::to_lowercase)),
}
}

#[defun]
fn upcase_initials<'ob>(string_or_char: StringOrChar<'ob>, cx: &'ob Context<'ob>) -> Object<'ob> {
match string_or_char {
StringOrChar::String(s) => cx.add(casify_string(s, CaseMode::UpcaseInitials)),
StringOrChar::Char(c) => cx.add(upcase_char(c)),
StringOrChar::Char(c) => cx.add(casify_char(c, char::to_uppercase)),
}
}

Expand Down Expand Up @@ -131,46 +131,23 @@ enum CaseMode {
UpcaseInitials,
}

fn upcase_char(c: u64) -> u64 {
fn casify_char<T>(c: u64, f: impl Fn(char) -> T) -> u64
where
T: Iterator<Item = char>,
{
// emacs uses an identity function for invalid codepoints
if c > crate::lisp::CHAR_MODIFIER_MASK {
return c;
}
let c: u32 = match c.try_into() {
Ok(c) => c,
Err(_) => return c,
};
let c: char = match c.try_into() {
Ok(c) => c,
Err(_) => return c as u64,
};
let mut upper_c = c.to_uppercase();
// if the char capitalizes to multiple characters, don't change case
if upper_c.len() > 1 {
return c as u64;
}
upper_c.next().expect("char should upcase") as u64
}

fn downcase_char(c: u64) -> u64 {
// see `upcase_char` comment
if c > crate::lisp::CHAR_MODIFIER_MASK {
return c;
}
let c: u32 = match c.try_into() {
Ok(c) => c,
Err(_) => return c,
};
let c: char = match c.try_into() {
Ok(c) => c,
Err(_) => return c as u64,
};
let mut lower_c = c.to_lowercase();
// if the char lowercases to multiple characters, don't change case
if lower_c.len() > 1 {
return c as u64;
let Ok(u) = u32::try_from(c) else { return c };
let Ok(chr) = char::try_from(u) else { return c };
let mut cased = f(chr);
let first = cased.next().unwrap();
// if the char changes case to multiple characters, don't change case
match cased.next() {
Some(_) => c,
None => first as u64,
}
lower_c.next().expect("char should downcase") as u64
}

fn find_forward_word(buf: &TextBuffer) -> Range<usize> {
Expand Down Expand Up @@ -211,43 +188,43 @@ mod tests {
fn test_downcase() {
let roots = &RootSet::default();
let cx = &Context::new(roots);
assert_eq!(downcase("The cat in the hat".into(), cx), cx.add("the cat in the hat"));
assert_eq!(downcase('x'.into(), cx), cx.add('x'));
assert_eq!(downcase('X'.into(), cx), cx.add('x'));
assert_eq!(downcase("The cat in the hat".into(), cx), "the cat in the hat");
assert_eq!(downcase('x'.into(), cx), 'x');
assert_eq!(downcase('X'.into(), cx), 'x');
}

#[test]
fn test_upcase() {
let roots = &RootSet::default();
let cx = &Context::new(roots);
// Emacs Doc Tests
assert_eq!(upcase("The cat in the hat".into(), cx), cx.add("THE CAT IN THE HAT"));
assert_eq!(upcase("fi".into(), cx), cx.add("FI"));
assert_eq!(upcase('fi'.into(), cx), cx.add('fi'));
assert_eq!(upcase('x'.into(), cx), cx.add('X'));
assert_eq!(upcase('X'.into(), cx), cx.add('X'));
assert_eq!(upcase("The cat in the hat".into(), cx), "THE CAT IN THE HAT");
assert_eq!(upcase("fi".into(), cx), "FI");
assert_eq!(upcase('fi'.into(), cx), 'fi');
assert_eq!(upcase('x'.into(), cx), 'X');
assert_eq!(upcase('X'.into(), cx), 'X');

// Basic escape characters
assert_eq!(upcase("\n".into(), cx), cx.add("\n"));
assert_eq!(upcase("\t".into(), cx), cx.add("\t"));
assert_eq!(upcase("\r".into(), cx), cx.add("\r"));
assert_eq!(upcase("\n".into(), cx), "\n");
assert_eq!(upcase("\t".into(), cx), "\t");
assert_eq!(upcase("\r".into(), cx), "\r");

// Control characters
assert_eq!(upcase("\u{0}".into(), cx), cx.add("\u{0}"));
assert_eq!(upcase("\u{1B}".into(), cx), cx.add("\u{1B}"));
assert_eq!(upcase("\u{7F}".into(), cx), cx.add("\u{7F}"));
assert_eq!(upcase("\u{0}".into(), cx), "\u{0}");
assert_eq!(upcase("\u{1B}".into(), cx), "\u{1B}");
assert_eq!(upcase("\u{7F}".into(), cx), "\u{7F}");

// Non-ASCII characters
assert_eq!(upcase("αβγ".into(), cx), cx.add("ΑΒΓ"));
assert_eq!(upcase("åäö".into(), cx), cx.add("ÅÄÖ"));
assert_eq!(upcase("αβγ".into(), cx), "ΑΒΓ");
assert_eq!(upcase("åäö".into(), cx), "ÅÄÖ");

// Mixed content
assert_eq!(upcase("hello\nworld".into(), cx), cx.add("HELLO\nWORLD"));
assert_eq!(upcase("foo\tbar".into(), cx), cx.add("FOO\tBAR"));
assert_eq!(upcase("path\\to\\file\"name\"".into(), cx), cx.add("PATH\\TO\\FILE\"NAME\""));
assert_eq!(upcase("hello\nworld".into(), cx), "HELLO\nWORLD");
assert_eq!(upcase("foo\tbar".into(), cx), "FOO\tBAR");
assert_eq!(upcase("path\\to\\file\"name\"".into(), cx), "PATH\\TO\\FILE\"NAME\"");

// Invalid code points
assert_eq!(upcase(StringOrChar::Char(0xD800), cx), cx.add(0xD800));
assert_eq!(upcase(StringOrChar::Char(0xD800), cx), 0xD800);
assert_eq!(upcase(StringOrChar::Char(u64::MAX), cx), cx.add(u64::MAX));
}

Expand All @@ -268,23 +245,23 @@ mod tests {
// TODO: implement syntax tables so it's known whether a character makes a word or symbol
// // U+1D100 MUSICAL SYMBOL SINGLE BARLINE (Other-Symbol)
// // U+0041 LATIN CAPITAL LETTER A
// assert_eq!(capitalize(cx.add("𝄀A"), cx), Ok(cx.add("𝄀a")));
// assert_eq!(capitalize("𝄀A", cx), Ok("𝄀a"));
// // U+0024 DOLLAR SIGN (Currency-Symbol)
// // U+0041 LATIN CAPITAL LETTER A
// assert_eq!(capitalize(cx.add("$A"), cx), Ok(cx.add("$a")));
// assert_eq!(capitalize("$A", cx), Ok("$a"));
// // U+002D HYPHEN-MINUS (Dash-Punctuation)
// // U+0041 LATIN CAPITAL LETTER A
// assert_eq!(capitalize(cx.add("-A"), cx), Ok(cx.add("-A")));
// assert_eq!(capitalize("-A", cx), Ok("-A"));
// // U+005E CIRCUMFLEX ACCENT (Modifier-Symbol)
// // U+0041 LATIN CAPITAL LETTER A
// assert_eq!(capitalize(cx.add("^A"), cx), Ok(cx.add("^A")));
// assert_eq!(capitalize("^A", cx), Ok("^A"));
// // U+0FBE TIBETAN KU RU KHA (Other-Symbol)
// // U+0041 LATIN CAPITAL LETTER A
// assert_eq!(capitalize(cx.add("྾A"), cx), Ok(cx.add("྾A")));
// assert_eq!(capitalize("྾A", cx), Ok("྾A"));
// // U+10A50 KHAROSHTHI PUNCTUATION DOT (Other-Punctuation)
// // U+104B0 OSAGE CAPITAL LETTER A
// // (becomes) U+104D8 OSAGE SMALL LETTER A
// assert_eq!(capitalize(cx.add("𐩐𐒰"), cx), Ok(cx.add("𐩐𐓘")));
// assert_eq!(capitalize("𐩐𐒰", cx), Ok("𐩐𐓘"));
}

#[test]
Expand All @@ -293,9 +270,9 @@ mod tests {
let cx = &Context::new(roots);

// Emacs Doc Tests
assert_eq!(upcase_initials("The CAT in the hAt".into(), cx), cx.add("The CAT In The HAt"));
assert_eq!(upcase_initials('x'.into(), cx), cx.add('X'));
assert_eq!(upcase_initials('X'.into(), cx), cx.add('X'));
assert_eq!(upcase_initials("The CAT in the hAt".into(), cx), "The CAT In The HAt");
assert_eq!(upcase_initials('x'.into(), cx), 'X');
assert_eq!(upcase_initials('X'.into(), cx), 'X');
}

#[cfg(not(miri))] // Uses SIMD
Expand Down

0 comments on commit bc24753

Please sign in to comment.