Skip to content

Commit ac89c9d

Browse files
committed
✨ Add new aliases for a few of the well-known encodings
— ✨ Add new functions for detecting (input) completion and setting (input) completion for punycode. — 🎨 Change the name of the *_get_assume_valid functions to *_is_assuming_valid, which reads a bit better than the usual get/set pair.
1 parent 9ddaab8 commit ac89c9d

File tree

11 files changed

+170
-24
lines changed

11 files changed

+170
-24
lines changed

documentation/source/api/encodings/punycode.rst

+37-2
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ Punycode is a Bootstring Encoding, using configuration and parameters for the Bo
3535

3636
Famously, Punycode is used for both Rust ABI identifier name mangling and in DNS for making Unicode names ASCII-only and clearly-marked as being derived from non-ASCII characters.
3737

38-
.. doxygenstruct:: cnc_pny_decode_state_t
3938

40-
.. doxygenstruct:: cnc_pny_encode_state_t
39+
40+
Transcoding Functions
41+
---------------------
4142

4243
.. doxygenfunction:: cnc_mcnrtoc32n_punycode
4344

@@ -46,3 +47,37 @@ Famously, Punycode is used for both Rust ABI identifier name mangling and in DNS
4647
.. doxygenfunction:: cnc_mcsnrtoc32sn_punycode
4748

4849
.. doxygenfunction:: cnc_c32snrtomcsn_punycode
50+
51+
52+
53+
State Type
54+
----------
55+
56+
.. doxygenstruct:: cnc_pny_decode_state_t
57+
58+
.. doxygenstruct:: cnc_pny_encode_state_t
59+
60+
61+
62+
State Functions
63+
---------------
64+
65+
.. doxygenfunction:: cnc_pny_decode_state_set_input_incomplete
66+
67+
.. doxygenfunction:: cnc_pny_encode_state_set_input_incomplete
68+
69+
.. doxygenfunction:: cnc_pny_decode_state_set_input_complete
70+
71+
.. doxygenfunction:: cnc_pny_encode_state_set_input_complete
72+
73+
.. doxygenfunction:: cnc_pny_decode_state_is_input_complete
74+
75+
.. doxygenfunction:: cnc_pny_encode_state_is_input_complete
76+
77+
.. doxygenfunction:: cnc_pny_decode_state_set_assume_valid
78+
79+
.. doxygenfunction:: cnc_pny_encode_state_set_assume_valid
80+
81+
.. doxygenfunction:: cnc_pny_decode_state_is_assuming_valid
82+
83+
.. doxygenfunction:: cnc_pny_encode_state_is_assuming_valid

documentation/source/api/mcstate_t.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ The state object is used during conversions to provide a place for the function
4343

4444
.. doxygenfunction:: cnc_mcstate_is_complete
4545

46+
47+
State Functions
48+
---------------
49+
4650
.. doxygenfunction:: cnc_mcstate_set_assume_valid
4751

48-
.. doxygenfunction:: cnc_mcstate_get_assume_valid
52+
.. doxygenfunction:: cnc_mcstate_is_assuming_valid

documentation/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# -- Project information -----------------------------------------------------
2020

2121
project = 'ztd.cuneicode'
22-
copyright = "2022, ThePhD & Shepherd's Oasis, LLC"
22+
copyright = "2023, ThePhD & Shepherd's Oasis, LLC"
2323
author = "ThePhD & Shepherd's Oasis, LLC"
2424

2525
# The full version, including alpha/beta/rc tags

examples/basic/source/registry_shift_jis_to_utf8.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ int main() {
4949
cnc_conversion* conversion = NULL;
5050
cnc_conversion_info conversion_info = { 0 };
5151
{
52-
cnc_open_err err = cnc_conv_new(registry, "shift-jis-x0208", "utf-8",
53-
&conversion, &conversion_info);
52+
cnc_open_err err = cnc_conv_new(
53+
registry, "shift-jis", "utf-8", &conversion, &conversion_info);
5454
if (err != cnc_open_err_ok) {
5555
fprintf(stderr, "[error] could not open a new registry.");
5656
cnc_registry_delete(registry);

include/ztd/cuneicode/mcstate.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void cnc_mcstate_set_assu
178178
/// @brief Gets the internal state for the cnc_mcstate_t object representing its current "assume
179179
/// valid" state.
180180
///
181-
/// @param[in,out] __state The state to turn validity on for.
182-
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_mcstate_get_assume_valid(
181+
/// @param[in,out] __state The state to return validity on for.
182+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_mcstate_is_assuming_valid(
183183
const cnc_mcstate_t* __state);
184184

185185
//////

include/ztd/cuneicode/punycode_state.h

+71-9
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,16 @@ typedef struct cnc_pny_encode_state_t {
7373
size_t __is_initialized : 1;
7474
//////
7575
/// @brief Private. Do not access.
76+
size_t __assume_valid : 1;
77+
//////
78+
/// @brief Private. Do not access.
7679
size_t __idna : 1;
7780
//////
7881
/// @brief Private. Do not access.
7982
size_t __action_state : 2;
8083
//////
8184
/// @brief Private. Do not access.
82-
size_t __padding : (sizeof(size_t) * CHAR_BIT) - 6;
85+
size_t __padding : (sizeof(size_t) * CHAR_BIT) - 7;
8386
//////
8487
/// @brief Private. Do not access.
8588
size_t __has_seen_non_basic;
@@ -102,6 +105,9 @@ typedef struct cnc_pny_decode_state_t {
102105
size_t input_is_complete : 1;
103106
//////
104107
/// @brief Private. Do not access.
108+
size_t __assume_valid : 1;
109+
//////
110+
/// @brief Private. Do not access.
105111
size_t __is_initialized : 1;
106112
//////
107113
/// @brief Private. Do not access.
@@ -117,7 +123,7 @@ typedef struct cnc_pny_decode_state_t {
117123
size_t __action_state : 2;
118124
//////
119125
/// @brief Private. Do not access.
120-
size_t __padding : (sizeof(size_t) * CHAR_BIT) - 7;
126+
size_t __padding : (sizeof(size_t) * CHAR_BIT) - 8;
121127
//////
122128
/// @brief Private. Do not access.
123129
alignas(void*) unsigned char __storage[(sizeof(void*) * 3) + (256 * sizeof(char))
@@ -128,34 +134,90 @@ typedef struct cnc_pny_decode_state_t {
128134
/// @brief Returns whether or not the given cnc_pny_encode_state_t has no more data that needs to be
129135
/// output.
130136
///
131-
/// @param[in] __state The state to inspect
137+
/// @param[in] __state The state to inspect.
132138
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_encode_state_is_complete(
133139
const cnc_pny_encode_state_t* __state);
134140

135141
//////
136142
/// @brief Returns whether or not the given cnc_pny_decode_state_t has no more data that needs to be
137143
/// output.
138144
///
139-
/// @param[in] __state The state to inspect
145+
/// @param[in] __state The state to inspect.
140146
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_decode_state_is_complete(
141147
const cnc_pny_decode_state_t* __state);
142148

143149
//////
144-
/// @brief Returns whether or not the given cnc_pny_encode_state_t has no more data that needs to be
145-
/// output.
150+
/// @brief Tells the state that input should still be expected.
146151
///
147-
/// @param[in, out] __state The state to trigger the completion on.
152+
/// @param[in, out] __state The state to remove the expectation that input is complete from.
148153
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void cnc_pny_encode_state_set_input_complete(
149154
cnc_pny_encode_state_t* __state);
150155

151156
//////
152-
/// @brief Returns whether or not the given cnc_pny_decode_state_t has no more data that needs to be
153-
/// output.
157+
/// @brief Tells the state that input should still be expected.
158+
///
159+
/// @param[in, out] __state The state to remove the expectation that input is complete from.
160+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void
161+
cnc_pny_decode_state_set_input_incomplete(cnc_pny_decode_state_t* __state);
162+
163+
//////
164+
/// @brief Tells the state that input should still be expected.
165+
///
166+
/// @param[in, out] __state The state to turn off its current completion state.
167+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void
168+
cnc_pny_encode_state_set_input_incomplete(cnc_pny_encode_state_t* __state);
169+
170+
//////
171+
/// @brief Returns whether or not the given cnc_pny_decode_state_t is expecting anymore input.
154172
///
155173
/// @param[in, out] __state The state to trigger the completion on.
156174
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void cnc_pny_decode_state_set_input_complete(
157175
cnc_pny_decode_state_t* __state);
158176

177+
//////
178+
/// @brief Returns whether or not the given cnc_pny_encode_state_t is expecting anymore input.
179+
///
180+
/// @param[in, out] __state The state to inspect.
181+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_encode_state_is_input_complete(
182+
const cnc_pny_encode_state_t* __state);
183+
184+
//////
185+
/// @brief Returns whether or not the given cnc_pny_decode_state_t is expecting anymore input.
186+
///
187+
/// @param[in, out] __state The state to to inspect.
188+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_decode_state_is_input_complete(
189+
const cnc_pny_decode_state_t* __state);
190+
191+
//////
192+
/// @brief Returns whether or not the given cnc_pny_encode_state_t has no more data that needs to be
193+
/// output.
194+
///
195+
/// @param[in, out] __state The state to make operations assume the input is valid.
196+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void cnc_pny_encode_state_set_assume_valid(
197+
cnc_pny_encode_state_t* __state, bool __value);
198+
199+
//////
200+
/// @brief Returns whether or not the given cnc_pny_decode_state_t has no more data that needs to be
201+
/// output.
202+
///
203+
/// @param[in, out] __state The state to make operations assume the input is valid.
204+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void cnc_pny_decode_state_set_assume_valid(
205+
cnc_pny_decode_state_t* __state, bool __value);
206+
207+
//////
208+
/// @brief Returns whether or not the given cnc_pny_encode_state_t is assuming input data is valid.
209+
///
210+
/// @param[in, out] __state The state to inspect.
211+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_encode_state_is_assuming_valid(
212+
const cnc_pny_encode_state_t* __state);
213+
214+
//////
215+
/// @brief Returns whether or not the given cnc_pny_decode_state_t is assuming input data is valid.
216+
///
217+
/// @param[in, out] __state The state to inspect.
218+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_decode_state_is_assuming_valid(
219+
const cnc_pny_decode_state_t* __state);
220+
159221
//////
160222
/// @}
161223

paper.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ affiliations:
1818
index: 1
1919
- name: Shepherd's Oasis, LLC
2020
index: 2
21-
date: 15 July 2022
21+
date: 15 July 2023
2222
bibliography: paper.bib
2323
---
2424

shared/simdutf/source/registry.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ static inline cnc_open_err mcstate_unchecked_open(cnc_conversion_registry* regis
131131
size_t& input_bytes_size = *p_input_bytes_size; \
132132
const bool is_counting_only = p_output_bytes == nullptr || *p_output_bytes == nullptr; \
133133
const bool is_unbounded_write = p_output_bytes_size == nullptr; \
134-
const bool assume_valid = cnc_mcstate_get_assume_valid(state); \
134+
const bool assume_valid = cnc_mcstate_is_assuming_valid(state); \
135135
if (!is_counting_only && is_unbounded_write) { \
136136
if (assume_valid) { \
137137
size_t output_written = ztd::endian::native == ztd::endian::big \

source/ztd/cuneicode/conv.cpp

+9-4
Original file line numberDiff line numberDiff line change
@@ -937,11 +937,13 @@ extern cnc_open_err __cnc_add_default_registry_entries(
937937
&::__basic_close_function<cnc_mcstate_t>))
938938

939939
_ADD_MCN_NAMED_ENCODING("ascii", ascii);
940+
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias(__registry, "ANSI_X3.4-1968", "ascii"));
940941
_ADD_MCN_NAMED_ENCODING("atari st", atari_st);
941942
_ADD_MCN_NAMED_ENCODING("atascii", atascii);
942943

943944
_ADD_MCN_NAMED_ENCODING("gbk", gbk);
944945
_ADD_MCN_NAMED_ENCODING("big5-hkscs", big5_hkscs);
946+
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias(__registry, "big5", "big5-hkscs"));
945947
_ADD_MCN_NAMED_ENCODING("gb18030", gb18030);
946948

947949
_ADD_MCN_NAMED_ENCODING("kamenicky", kamenicky);
@@ -971,9 +973,16 @@ extern cnc_open_err __cnc_add_default_registry_entries(
971973
_ADD_MCN_NAMED_ENCODING("iso-8859-16", iso_8859_16);
972974

973975
_ADD_MCN_NAMED_ENCODING("mac_roman", mac_roman);
976+
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias(__registry, "x-mac-roman", "mac-roman"));
974977
_ADD_MCN_NAMED_ENCODING("mac-cyrillic", mac_cyrillic);
978+
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias(__registry, "x-mac-cyrillic", "mac-cyrillic"));
975979
_ADD_MCN_NAMED_ENCODING("shift-jis-x0208", shift_jis_x0208);
980+
// Shift_JIS aliases to the one available in the WHATWG datbase first and foremost
981+
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias(__registry, "shift-jis", "shift-jis-x0208"));
982+
976983
_ADD_MCN_NAMED_ENCODING("tatar ansi", tatar_ansi);
984+
// Tatar aliases to the ANSI version by-default
985+
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias(__registry, "tatar", "tatar-ansi"));
977986
_ADD_MCN_NAMED_ENCODING("tatar ascii", tatar_ascii);
978987

979988
_ADD_MCN_NAMED_ENCODING("windows-473", windows_1251);
@@ -1004,10 +1013,6 @@ extern cnc_open_err __cnc_add_default_registry_entries(
10041013
__registry, ::cnc::__cnc_detail::__wide_alias(), ::cnc::__cnc_detail::__wide_name()));
10051014
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias_c8(
10061015
__registry, ::cnc::__cnc_detail::__exec_alias(), ::cnc::__cnc_detail::__exec_name()));
1007-
// Tatar aliases to the ANSI version by-default
1008-
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias(__registry, "tatar", "tatar-ansi"));
1009-
// Shift_JIS aliases to the one available in the WHATWG datbase first and foremost
1010-
_CHECK_ERR_AND_RETURN(cnc_registry_add_alias(__registry, "shift-jis", "shift-jis-x0208"));
10111016

10121017
#undef _ADD_MCN_NAMED_ENCODING
10131018
#undef _ADD_MCN_NAMED_ENCODING_BASIC

source/ztd/cuneicode/mcstate.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
#include <memory>
3939
#include <cstring>
4040

41-
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_mcstate_get_assume_valid(
41+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_mcstate_is_assuming_valid(
4242
const cnc_mcstate_t* __state) {
4343
if (__state == nullptr) {
4444
return false;

source/ztd/cuneicode/punycode_state.cpp

+40
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,43 @@ ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void cnc_pny_decode_state
5959
cnc_pny_decode_state_t* __p_state) {
6060
__p_state->input_is_complete = true;
6161
}
62+
63+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_encode_state_is_input_complete(
64+
const cnc_pny_encode_state_t* __p_state) {
65+
return __p_state->input_is_complete;
66+
}
67+
68+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_decode_state_is_input_complete(
69+
const cnc_pny_decode_state_t* __p_state) {
70+
return __p_state->input_is_complete;
71+
}
72+
73+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void
74+
cnc_pny_encode_state_set_input_incomplete(cnc_pny_encode_state_t* __p_state) {
75+
__p_state->input_is_complete = false;
76+
}
77+
78+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void
79+
cnc_pny_decode_state_set_input_incomplete(cnc_pny_decode_state_t* __p_state) {
80+
__p_state->input_is_complete = false;
81+
}
82+
83+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void cnc_pny_encode_state_set_assume_valid(
84+
cnc_pny_encode_state_t* __p_state, bool __value) {
85+
__p_state->__assume_valid = __value;
86+
}
87+
88+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ void cnc_pny_decode_state_set_assume_valid(
89+
cnc_pny_decode_state_t* __p_state, bool __value) {
90+
__p_state->__assume_valid = __value;
91+
}
92+
93+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_encode_state_is_assuming_valid(
94+
const cnc_pny_encode_state_t* __p_state) {
95+
return __p_state->__assume_valid;
96+
}
97+
98+
ZTD_C_LANGUAGE_LINKAGE_I_ ZTD_CUNEICODE_API_LINKAGE_I_ bool cnc_pny_decode_state_is_assuming_valid(
99+
const cnc_pny_decode_state_t* __p_state) {
100+
return __p_state->__assume_valid;
101+
}

0 commit comments

Comments
 (0)