Skip to content

Commit 08ec28e

Browse files
committed
gh-76535: Add C API functions for changing case of a single codepoint
1 parent 6547330 commit 08ec28e

File tree

5 files changed

+92
-2
lines changed

5 files changed

+92
-2
lines changed

Doc/c-api/unicode.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,14 @@ the Python configuration.
223223
224224
Return ``1`` or ``0`` depending on whether *ch* is a titlecase character.
225225
226+
.. c:function:: int Py_UNICODE_ISCASED(Py_UCS4 ch)
227+
228+
Return ``1`` or ``0`` depending on whether *ch* is a cased character.
229+
230+
.. c:function:: int Py_UNICODE_ISTITLE(Py_UCS4 ch)
231+
232+
Return ``1`` or ``0`` depending on whether *ch* is a case-ignorable character.
233+
226234
227235
.. c:function:: int Py_UNICODE_ISLINEBREAK(Py_UCS4 ch)
228236
@@ -322,6 +330,26 @@ These APIs can be used to work with surrogates:
322330
surrogate pair. *high* must be in the range [0xD800; 0xDBFF] and *low* must
323331
be in the range [0xDC00; 0xDFFF].
324332
333+
.. c:function:: Py_ssize_t PyUnicode_ToLower(Py_UCS4 ch, Py_UCS4 *buffer, \
334+
Py_ssize_t size)
335+
336+
Convert *ch* to lower case, store result in *buffer*, which should be
337+
able to hold *size* characters, and return the number of characters stored.
338+
If the buffer is not big enough, return -1.
339+
340+
.. c:function:: Py_ssize_t PyUnicode_ToUpper(Py_UCS4 ch, Py_UCS4 *buffer, \
341+
Py_ssize_t size)
342+
343+
Convert *ch* to upper case, store result in *buffer*, which should be
344+
able to hold *size* characters, and return the number of characters stored.
345+
If the buffer is not big enough, return -1.
346+
347+
.. c:function:: Py_ssize_t PyUnicode_ToTitle(Py_UCS4 ch, Py_UCS4 *buffer, \
348+
Py_ssize_t size)
349+
350+
Convert *ch* to title case, store result in *buffer*, which should be
351+
able to hold *size* characters, and return the number of characters stored.
352+
If the buffer is not big enough, return -1.
325353
326354
Creating and accessing Unicode strings
327355
""""""""""""""""""""""""""""""""""""""

Include/cpython/unicodeobject.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,14 @@ PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
602602
Py_UCS4 ch /* Unicode character */
603603
);
604604

605+
PyAPI_FUNC(int) _PyUnicode_IsCased(
606+
Py_UCS4 ch /* Unicode character */
607+
);
608+
609+
PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
610+
Py_UCS4 ch /* Unicode character */
611+
);
612+
605613
PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
606614
const Py_UCS4 ch /* Unicode character */
607615
);
@@ -671,6 +679,8 @@ static inline int Py_UNICODE_ISSPACE(Py_UCS4 ch) {
671679
#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
672680
#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
673681
#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
682+
#define Py_UNICODE_ISCASED(ch) _PyUnicode_IsCased(ch)
683+
#define Py_UNICODE_ISCASEIGNORABLE(ch) _PyUnicode_IsCaseIgnorable(ch)
674684
#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
675685

676686
#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)

Include/internal/pycore_unicodeobject.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res);
2121
extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res);
2222
extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res);
2323
extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res);
24-
extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch);
25-
extern int _PyUnicode_IsCased(Py_UCS4 ch);
2624

2725
/* --- Unicode API -------------------------------------------------------- */
2826

Include/unicodeobject.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,6 +1007,30 @@ PyAPI_FUNC(int) PyUnicode_Contains(
10071007

10081008
PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
10091009

1010+
/* Lowercases character and adds result to buffer */
1011+
1012+
PyAPI_FUNC(Py_ssize_t) PyUnicode_ToLower(
1013+
Py_UCS4 ch,
1014+
Py_UCS4 *buffer,
1015+
Py_ssize_t size
1016+
);
1017+
1018+
/* Uppercases character and adds result to buffer */
1019+
1020+
PyAPI_FUNC(Py_ssize_t) PyUnicode_ToUpper(
1021+
Py_UCS4 ch,
1022+
Py_UCS4 *buffer,
1023+
Py_ssize_t size
1024+
);
1025+
1026+
/* Titlecases character and adds result to buffer */
1027+
1028+
PyAPI_FUNC(Py_ssize_t) PyUnicode_ToTitle(
1029+
Py_UCS4 ch,
1030+
Py_UCS4 *buffer,
1031+
Py_ssize_t size
1032+
);
1033+
10101034
/* === Characters Type APIs =============================================== */
10111035

10121036
#ifndef Py_LIMITED_API

Objects/unicodeobject.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9616,6 +9616,36 @@ case_operation(PyObject *self,
96169616
return res;
96179617
}
96189618

9619+
Py_ssize_t
9620+
PyUnicode_ToLower(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
9621+
{
9622+
Py_ssize_t n = _PyUnicode_ToLowerFull(ch, buffer);
9623+
if (n > size) {
9624+
return -1;
9625+
}
9626+
return n;
9627+
}
9628+
9629+
Py_ssize_t
9630+
PyUnicode_ToUpper(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
9631+
{
9632+
Py_ssize_t n = _PyUnicode_ToUpperFull(ch, buffer);
9633+
if (n > size) {
9634+
return -1;
9635+
}
9636+
return n;
9637+
}
9638+
9639+
Py_ssize_t
9640+
PyUnicode_ToTitle(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
9641+
{
9642+
Py_ssize_t n = _PyUnicode_ToTitleFull(ch, buffer);
9643+
if (n > size) {
9644+
return -1;
9645+
}
9646+
return n;
9647+
}
9648+
96199649
PyObject *
96209650
PyUnicode_Join(PyObject *separator, PyObject *seq)
96219651
{

0 commit comments

Comments
 (0)