Skip to content

Commit 0d6678e

Browse files
committed
[uloc] Implement to_language_tag()
Also: - Implement Rust macros for generating wrappers for the numerous `uloc` methods that output strings into a buffer of a fixed size.
1 parent c699eab commit 0d6678e

File tree

1 file changed

+190
-80
lines changed

1 file changed

+190
-80
lines changed

rust_icu_uloc/src/lib.rs

Lines changed: 190 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,118 @@ impl TryFrom<&ffi::CStr> for ULoc {
6363
}
6464
}
6565

66+
/// Generates a method to wrap ICU4C `uloc` methods that require a resizable output string buffer.
67+
///
68+
/// The various `uloc` methods of this type have inconsistent signature patterns, with some putting
69+
/// all their input arguments _before_ the `buffer` and its `capacity`, and some splitting the input
70+
/// arguments.
71+
///
72+
/// Therefore, the macro supports input arguments in both positions.
73+
///
74+
/// For an invocation of the form
75+
/// ```
76+
/// buffered_string_method_with_retry!(
77+
/// my_method,
78+
/// BUFFER_CAPACITY,
79+
/// [before_arg_a: before_type_a, before_arg_b: before_type_b,],
80+
/// [after_arg_a: after_type_a, after_arg_b: after_type_b,]
81+
/// );
82+
/// ```
83+
/// the generated method has a signature of the form
84+
/// ```
85+
/// fn my_method(
86+
/// uloc_method: unsafe extern "C" fn(
87+
/// before_type_a,
88+
/// before_type_b,
89+
/// *mut raw::c_char,
90+
/// i32,
91+
/// after_type_a,
92+
/// after_type_b,
93+
/// *mut UErrorCode,
94+
/// ) -> i32,
95+
/// before_arg_a: before_type_a,
96+
/// before_arg_b: before_type_b,
97+
/// after_arg_a: after_type_a,
98+
/// after_arg_b: after_type_b
99+
/// ) -> Result<String, common::Error> {}
100+
/// ```
101+
macro_rules! buffered_string_method_with_retry {
102+
103+
($method_name:ident, $buffer_capacity:expr,
104+
[$($before_arg:ident: $before_arg_type:ty,)*],
105+
[$($after_arg:ident: $after_arg_type:ty,)*]) => {
106+
fn $method_name(
107+
uloc_method: unsafe extern "C" fn(
108+
$($before_arg_type,)*
109+
*mut raw::c_char,
110+
i32,
111+
$($after_arg_type,)*
112+
*mut UErrorCode,
113+
) -> i32,
114+
$($before_arg: $before_arg_type,)*
115+
$($after_arg: $after_arg_type,)*
116+
) -> Result<String, common::Error> {
117+
let mut status = common::Error::OK_CODE;
118+
let mut buf: Vec<u8> = vec![0; $buffer_capacity];
119+
120+
// Requires that any pointers that are passed in are valid.
121+
let full_len: i32 = unsafe {
122+
assert!(common::Error::is_ok(status));
123+
uloc_method(
124+
$($before_arg,)*
125+
buf.as_mut_ptr() as *mut raw::c_char,
126+
$buffer_capacity as i32,
127+
$($after_arg,)*
128+
&mut status,
129+
)
130+
};
131+
132+
if status == UErrorCode::U_BUFFER_OVERFLOW_ERROR ||
133+
full_len > $buffer_capacity
134+
.try_into()
135+
.map_err(|e| common::Error::wrapper(format!("{:?}", e)))? {
136+
137+
assert!(full_len > 0);
138+
let full_len: usize = full_len
139+
.try_into()
140+
.map_err(|e| common::Error::wrapper(format!("{:?}", e)))?;
141+
buf.resize(full_len, 0);
142+
143+
// Same unsafe requirements as above, plus full_len must be exactly the output
144+
// buffer size.
145+
unsafe {
146+
assert!(common::Error::is_ok(status));
147+
uloc_method(
148+
$($before_arg,)*
149+
buf.as_mut_ptr() as *mut raw::c_char,
150+
full_len as i32,
151+
$($after_arg,)*
152+
&mut status,
153+
)
154+
};
155+
}
156+
157+
common::Error::ok_or_warning(status)?;
158+
159+
// Adjust the size of the buffer here.
160+
if (full_len > 0) {
161+
let full_len: usize = full_len
162+
.try_into()
163+
.map_err(|e| common::Error::wrapper(format!("{:?}", e)))?;
164+
buf.resize(full_len, 0);
165+
}
166+
String::from_utf8(buf).map_err(|_| common::Error::string_with_interior_nul())
167+
}
168+
}
169+
}
170+
171+
macro_rules! call_buffered_string_char_star_with_repr {
172+
($self:ident, $uloc_method:ident) => {{
173+
let asciiz = $self.as_c_str();
174+
buffered_string_char_star(versioned_function!($uloc_method), asciiz.as_ptr())
175+
}};
176+
}
177+
66178
impl ULoc {
67179
/// Implements `uloc_getLanguage`.
68180
pub fn language(&self) -> Result<String, common::Error> {
@@ -102,6 +214,25 @@ impl ULoc {
102214
.map(|repr| ULoc { repr })
103215
}
104216

217+
// Implements `uloc_toLanguageTag` from ICU4C.
218+
pub fn to_language_tag(&self, strict: bool) -> Result<String, common::Error> {
219+
buffered_string_method_with_retry!(
220+
buffered_string_to_language_tag,
221+
LOCALE_CAPACITY,
222+
[locale_id: *const raw::c_char,],
223+
[strict: rust_icu_sys::UBool,]
224+
);
225+
226+
let locale_id = self.as_c_str();
227+
// No `UBool` constants available in rust_icu_sys, unfortunately.
228+
let strict = if strict { 1 } else { 0 };
229+
buffered_string_to_language_tag(
230+
versioned_function!(uloc_toLanguageTag),
231+
locale_id.as_ptr(),
232+
strict,
233+
)
234+
}
235+
105236
/// Returns the current label of this locale.
106237
pub fn label(&self) -> &str {
107238
&self.repr
@@ -112,14 +243,24 @@ impl ULoc {
112243
ffi::CString::new(self.repr.clone()).expect("ULoc contained interior NUL bytes")
113244
}
114245

246+
// Implements `uloc_acceptLanguage` from ICU4C.
115247
pub fn accept_language(
116248
accept_list: impl IntoIterator<Item = impl Into<ULoc>>,
117249
available_locales: impl IntoIterator<Item = impl Into<ULoc>>,
118250
) -> Result<(Option<ULoc>, UAcceptResult), common::Error> {
119-
let mut buf: Vec<u8> = vec![0; LOCALE_CAPACITY];
120-
let mut accept_result: UAcceptResult = UAcceptResult::ULOC_ACCEPT_FAILED;
121-
let mut status = common::Error::OK_CODE;
251+
buffered_string_method_with_retry!(
252+
buffered_string_uloc_accept_language,
253+
LOCALE_CAPACITY,
254+
[],
255+
[
256+
out_result: *mut UAcceptResult,
257+
accept_list: *mut *const ::std::os::raw::c_char,
258+
accept_list_count: i32,
259+
available_locales: *mut UEnumeration,
260+
]
261+
);
122262

263+
let mut accept_result: UAcceptResult = UAcceptResult::ULOC_ACCEPT_FAILED;
123264
let mut accept_list_cstrings: Vec<ffi::CString> = vec![];
124265
// This is mutable only to satisfy the missing `const`s in the ICU4C API.
125266
let mut accept_list: Vec<*const raw::c_char> = accept_list
@@ -142,55 +283,24 @@ impl ULoc {
142283
available_locales.iter().map(|uloc| uloc.label()).collect();
143284
let mut available_locales = Enumeration::try_from(&available_locales[..])?;
144285

145-
let full_len = unsafe {
146-
versioned_function!(uloc_acceptLanguage)(
147-
buf.as_mut_ptr() as *mut raw::c_char,
148-
buf.len() as i32,
149-
&mut accept_result,
150-
accept_list.as_mut_ptr(),
151-
accept_list.len() as i32,
152-
available_locales.repr(),
153-
&mut status,
154-
)
155-
};
156-
157-
if status == UErrorCode::U_BUFFER_OVERFLOW_ERROR {
158-
assert!(full_len > 0);
159-
let full_len: usize = full_len
160-
.try_into()
161-
.map_err(|e| common::Error::wrapper(format!("{:?}", e)))?;
162-
buf.resize(full_len, 0);
163-
unsafe {
164-
versioned_function!(uloc_acceptLanguage)(
165-
buf.as_mut_ptr() as *mut raw::c_char,
166-
buf.len() as i32,
167-
&mut accept_result,
168-
accept_list.as_mut_ptr(),
169-
accept_list.len() as i32,
170-
available_locales.repr(),
171-
&mut status,
172-
);
173-
}
174-
}
286+
let matched_locale = buffered_string_uloc_accept_language(
287+
versioned_function!(uloc_acceptLanguage),
288+
&mut accept_result,
289+
accept_list.as_mut_ptr(),
290+
accept_list.len() as i32,
291+
available_locales.repr(),
292+
);
175293

176-
common::Error::ok_or_warning(status)?;
177294
// Having no match is a valid if disappointing result.
178295
if accept_result == UAcceptResult::ULOC_ACCEPT_FAILED {
179296
return Ok((None, accept_result));
180297
}
181298

182-
// Adjust the size of the buffer here.
183-
assert!(full_len > 0);
184-
buf.resize(full_len as usize, 0);
185-
186-
String::from_utf8(buf)
187-
.map_err(|_| common::Error::string_with_interior_nul())
299+
matched_locale
188300
.and_then(|s| ULoc::try_from(s.as_str()))
189301
.map(|uloc| (Some(uloc), accept_result))
190302
}
191303

192-
/// Call a `uloc_*` method with a particular signature (that clones and modifies the internal
193-
/// representation of the locale ID and requires a resizable buffer).
194304
fn call_buffered_string_method(
195305
&self,
196306
uloc_method: unsafe extern "C" fn(
@@ -200,40 +310,14 @@ impl ULoc {
200310
*mut UErrorCode,
201311
) -> i32,
202312
) -> Result<String, common::Error> {
203-
let mut status = common::Error::OK_CODE;
204-
let repr = ffi::CString::new(self.repr.clone())
205-
.map_err(|_| common::Error::string_with_interior_nul())?;
206-
let mut buf: Vec<u8> = vec![0; LOCALE_CAPACITY];
207-
208-
// Requires that repr is a valid pointer
209-
let full_len = unsafe {
210-
assert!(common::Error::is_ok(status));
211-
uloc_method(
212-
repr.as_ptr(),
213-
buf.as_mut_ptr() as *mut raw::c_char,
214-
LOCALE_CAPACITY as i32,
215-
&mut status,
216-
)
217-
} as usize;
218-
common::Error::ok_or_warning(status)?;
219-
if full_len > LOCALE_CAPACITY {
220-
buf.resize(full_len, 0);
221-
// Same unsafe requirements as above, plus full_len must be exactly
222-
// the output buffer size.
223-
unsafe {
224-
assert!(common::Error::is_ok(status));
225-
uloc_method(
226-
repr.as_ptr(),
227-
buf.as_mut_ptr() as *mut raw::c_char,
228-
full_len as i32,
229-
&mut status,
230-
)
231-
};
232-
common::Error::ok_or_warning(status)?;
233-
}
234-
// Adjust the size of the buffer here.
235-
buf.resize(full_len, 0);
236-
String::from_utf8(buf).map_err(|_| common::Error::string_with_interior_nul())
313+
buffered_string_method_with_retry!(
314+
buffered_string_char_star,
315+
LOCALE_CAPACITY,
316+
[char_star: *const raw::c_char,],
317+
[]
318+
);
319+
let asciiz = self.as_c_str();
320+
buffered_string_char_star(uloc_method, asciiz.as_ptr())
237321
}
238322
}
239323

@@ -316,18 +400,27 @@ mod tests {
316400
assert_eq!(minimized_subtags.label(), expected.label());
317401
}
318402

403+
#[test]
404+
fn test_to_language_tag() {
405+
let loc = ULoc::try_from("sr_Cyrl_RS").expect("get sr_Cyrl_RS locale");
406+
let language_tag = loc
407+
.to_language_tag(true)
408+
.expect("should convert to language tag");
409+
assert_eq!(language_tag, "sr-Cyrl-RS".to_string());
410+
}
411+
319412
#[test]
320413
fn test_accept_language_fallback() {
321414
let accept_list: Result<Vec<_>, _> = vec!["es_MX", "ar_EG", "fr_FR"]
322415
.into_iter()
323-
.map(|s| ULoc::try_from(s))
416+
.map(ULoc::try_from)
324417
.collect();
325418
let accept_list = accept_list.expect("make accept_list");
326419

327420
let available_locales: Result<Vec<_>, _> =
328421
vec!["de_DE", "en_US", "es", "nl_NL", "sr_RS_Cyrl"]
329422
.into_iter()
330-
.map(|s| ULoc::try_from(s))
423+
.map(ULoc::try_from)
331424
.collect();
332425
let available_locales = available_locales.expect("make available_locales");
333426

@@ -346,13 +439,13 @@ mod tests {
346439
fn test_accept_language_exact_match() {
347440
let accept_list: Result<Vec<_>, _> = vec!["es_ES", "ar_EG", "fr_FR"]
348441
.into_iter()
349-
.map(|s| ULoc::try_from(s))
442+
.map(ULoc::try_from)
350443
.collect();
351444
let accept_list = accept_list.expect("make accept_list");
352445

353446
let available_locales: Result<Vec<_>, _> = vec!["de_DE", "en_US", "es_MX", "ar_EG"]
354447
.into_iter()
355-
.map(|s| ULoc::try_from(s))
448+
.map(ULoc::try_from)
356449
.collect();
357450
let available_locales = available_locales.expect("make available_locales");
358451

@@ -366,4 +459,21 @@ mod tests {
366459
)
367460
);
368461
}
462+
463+
#[test]
464+
fn test_accept_language_no_match() {
465+
let accept_list: Result<Vec<_>, _> = vec!["es_ES", "ar_EG", "fr_FR"]
466+
.into_iter()
467+
.map(ULoc::try_from)
468+
.collect();
469+
let accept_list = accept_list.expect("make accept_list");
470+
471+
let available_locales: Result<Vec<_>, _> =
472+
vec!["el_GR"].into_iter().map(ULoc::try_from).collect();
473+
let available_locales = available_locales.expect("make available_locales");
474+
475+
let actual =
476+
ULoc::accept_language(accept_list, available_locales).expect("call accept_language");
477+
assert_eq!(actual, (None, UAcceptResult::ULOC_ACCEPT_FAILED))
478+
}
369479
}

0 commit comments

Comments
 (0)