Skip to content

Commit 9c0b107

Browse files
committed
Some grooming.
1 parent 61fd54f commit 9c0b107

File tree

3 files changed

+42
-83
lines changed

3 files changed

+42
-83
lines changed

Jint/Native/Intl/Icu.cs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ namespace Jint.Native.Intl;
55

66
// ICU (International Components for Unicode) is a native C/C++ library provided by the OS
77
// that implements BCP-47 locale canonicalization, alias resolution, and other i18n data.
8+
// https://github.com/unicode-org/icu
89
// We use DllImport to bind directly to its functions (e.g. uloc_toLanguageTag) so we can
910
// reuse the OS-provided ICU implementation instead of reimplementing the spec in C#.
10-
// The wrapper below converts managed strings to UTF-8, calls ICU, and returns the canonical tag.
11-
1211
internal static class ICU
1312
{
1413
private const string MacLib = "/usr/lib/libicucore.dylib";
@@ -48,8 +47,6 @@ public enum UErrorCode : int
4847
[MethodImpl(MethodImplOptions.AggressiveInlining)]
4948
public static string PtrToAnsiString(IntPtr p) => Marshal.PtrToStringAnsi(p)!;
5049

51-
52-
// Older runtimes: pass IntPtr and pin a UTF-8 buffer manually.
5350
[DllImport(UcLib, CallingConvention = CallingConvention.Cdecl, EntryPoint = "uloc_toLanguageTag")]
5451
private static extern unsafe int uloc_toLanguageTag_ptr(
5552
byte* localeIdUtf8, // const char* (UTF-8)
@@ -77,15 +74,15 @@ private static extern unsafe int uloc_forLanguageTag_ptr(
7774
out int parsedLength,
7875
ref UErrorCode err);
7976

80-
public static unsafe int uloc_forLanguageTag(
81-
string langtag, byte[] localeId, int localeIdCapacity, out int parsedLength, ref UErrorCode err)
77+
public static unsafe int uloc_forLanguageTag(string langtag, byte[] localeId, int localeIdCapacity, out int parsedLength, ref UErrorCode err)
8278
{
8379
var inBytes = System.Text.Encoding.UTF8.GetBytes(langtag + "\0");
8480
fixed (byte* p = inBytes)
8581
{
8682
return uloc_forLanguageTag_ptr(p, localeId, localeIdCapacity, out parsedLength, ref err);
8783
}
8884
}
85+
8986
[DllImport(UcLib, CallingConvention = CallingConvention.Cdecl, EntryPoint = "uloc_canonicalize")]
9087
private static extern unsafe int uloc_canonicalize_ptr(
9188
byte* localeIdUtf8, // const char* (UTF-8, NUL-terminated)

Jint/Native/Intl/IcuHelpers.cs

Lines changed: 39 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -8,81 +8,6 @@ namespace Jint.Native.Intl
88
/// </summary>
99
internal static class IcuHelpers
1010
{
11-
12-
/// <summary>
13-
/// Equivalent to WebKit's languageTagForLocaleID(localeID, isImmortal=false).
14-
/// Calls ICU uloc_toLanguageTag(localeId, strict=false), then applies the same
15-
/// unicode extension cleanup WebKit does (drop "-u-…-true" values).
16-
/// </summary>
17-
public static string LanguageTagForLocaleId(string localeId)
18-
{
19-
if (string.IsNullOrEmpty(localeId))
20-
return string.Empty;
21-
22-
var status = ICU.UErrorCode.U_ZERO_ERROR;
23-
24-
// First pass with a reasonable buffer
25-
byte[] buf = new byte[256];
26-
int len = ICU.uloc_toLanguageTag(localeId, buf, buf.Length, strict: false, ref status);
27-
28-
// If ICU tells us the required size, reallocate and retry
29-
if (len > buf.Length)
30-
{
31-
buf = new byte[len];
32-
status = ICU.UErrorCode.U_ZERO_ERROR;
33-
len = ICU.uloc_toLanguageTag(localeId, buf, buf.Length, strict: false, ref status);
34-
}
35-
36-
if (status != ICU.UErrorCode.U_ZERO_ERROR || len <= 0)
37-
Throw.ArgumentException($"ICU uloc_toLanguageTag failed for '{localeId}' (status={status}).");
38-
39-
// ICU writes UTF-8 bytes; decode exactly the returned length
40-
string tag = System.Text.Encoding.UTF8.GetString(buf, 0, len);
41-
42-
// Do the same extension cleanup WebKit applies
43-
return CanonicalizeUnicodeExtensionsAfterIcu(tag);
44-
}
45-
46-
// Keys whose boolean "true" value is **elided** in canonical form.
47-
// For these, "-u-<key>-yes" and "-u-<key>-true" both canonicalize to just "-u-<key>".
48-
// Add "ca" here so a bare `-u-ca` does not synthesize `-yes`
49-
private static readonly HashSet<string> s_trueDroppableKeys = new(StringComparer.OrdinalIgnoreCase)
50-
{
51-
"kb", "kc", "kh", "kk", "kn", "ca"
52-
};
53-
54-
55-
// Canonicalize subdivision aliases (used for rg/sd values).
56-
private static string CanonicalizeSubdivision(string value)
57-
{
58-
switch (value.ToLowerInvariant())
59-
{
60-
case "no23": return "no50";
61-
case "cn11": return "cnbj";
62-
case "cz10a": return "cz110";
63-
case "fra": return "frges";
64-
case "frg": return "frges";
65-
case "lud": return "lucl"; // test262 prefers the first in replacement list
66-
default: return value;
67-
}
68-
}
69-
70-
// Canonicalize time zone type aliases (used for tz values).
71-
private static string CanonicalizeTimeZoneType(string value)
72-
{
73-
switch (value.ToLowerInvariant())
74-
{
75-
case "cnckg": return "cnsha"; // deprecated -> preferred
76-
case "eire": return "iedub"; // alias -> canonical
77-
case "est": return "papty"; // alias -> canonical
78-
case "gmt0": return "gmt"; // alias -> canonical
79-
case "uct": return "utc"; // alias -> canonical
80-
case "zulu": return "utc"; // alias -> canonical
81-
case "utcw05": return "papty"; // short offset alias seen in test262
82-
default: return value;
83-
}
84-
}
85-
8611
/// <summary>
8712
/// Mirrors WebKit's canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization():
8813
/// - Finds the "-u-" extension and its end (before the next singleton).
@@ -282,6 +207,45 @@ public static string CanonicalizeUnicodeLocaleIdOrThrow(Realm realm, string tag)
282207
return canonical;
283208
}
284209

210+
// Keys whose boolean "true" value is **elided** in canonical form.
211+
// For these, "-u-<key>-yes" and "-u-<key>-true" both canonicalize to just "-u-<key>".
212+
// Add "ca" here so a bare `-u-ca` does not synthesize `-yes`
213+
private static readonly HashSet<string> s_trueDroppableKeys = new(StringComparer.OrdinalIgnoreCase)
214+
{
215+
"kb", "kc", "kh", "kk", "kn", "ca"
216+
};
217+
218+
// Canonicalize subdivision aliases (used for rg/sd values).
219+
private static string CanonicalizeSubdivision(string value)
220+
{
221+
switch (value.ToLowerInvariant())
222+
{
223+
case "no23": return "no50";
224+
case "cn11": return "cnbj";
225+
case "cz10a": return "cz110";
226+
case "fra": return "frges";
227+
case "frg": return "frges";
228+
case "lud": return "lucl"; // test262 prefers the first in replacement list
229+
default: return value;
230+
}
231+
}
232+
233+
// Canonicalize time zone type aliases (used for tz values).
234+
private static string CanonicalizeTimeZoneType(string value)
235+
{
236+
switch (value.ToLowerInvariant())
237+
{
238+
case "cnckg": return "cnsha"; // deprecated -> preferred
239+
case "eire": return "iedub"; // alias -> canonical
240+
case "est": return "papty"; // alias -> canonical
241+
case "gmt0": return "gmt"; // alias -> canonical
242+
case "uct": return "utc"; // alias -> canonical
243+
case "zulu": return "utc"; // alias -> canonical
244+
case "utcw05": return "papty"; // short offset alias seen in test262
245+
default: return value;
246+
}
247+
}
248+
285249
private static string FixKnownLanguageAliases(string canonicalTag)
286250
{
287251
if (string.IsNullOrEmpty(canonicalTag))

Jint/Native/Intl/IntlInstance.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
using Jint.Runtime;
66
using Jint.Runtime.Descriptors;
77
using Jint.Runtime.Interop;
8-
using System.Text;
98

109
namespace Jint.Native.Intl;
1110

@@ -137,5 +136,4 @@ private JsValue GetCanonicalLocales(JsValue thisObject, JsCallArguments argument
137136

138137
return arr;
139138
}
140-
141139
}

0 commit comments

Comments
 (0)