Skip to content

Commit db96a3f

Browse files
committed
gh-146192: Add base32 support to binascii
Add base32 encoder and decoder functions implemented in C to `binascii` and use them to greatly improve the performance and reduce the memory usage of the existing base32 codec functions in `base64`. No API or documentation changes are necessary with respect to any functions in `base64`, and all existing unit tests for those functions continue to pass without modification. Resolves: gh-146192
1 parent d357a7d commit db96a3f

File tree

6 files changed

+940
-81
lines changed

6 files changed

+940
-81
lines changed

Doc/library/binascii.rst

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,49 @@ The :mod:`!binascii` module defines the following functions:
203203

204204
.. versionadded:: 3.15
205205

206+
.. function:: a2b_base32(string, /)
207+
208+
Convert base32 data back to binary and return the binary data.
209+
210+
Valid base32 data:
211+
212+
* Conforms to :rfc:`4648`.
213+
* Contains only characters from the base32 alphabet.
214+
* Contains no excess data after padding (including excess padding, newlines, etc.).
215+
* Does not start with padding.
216+
217+
Invalid base32 data will raise :exc:`binascii.Error`.
218+
219+
.. versionadded:: 3.15
220+
221+
.. function:: b2a_base32(data, /)
222+
223+
Convert binary data to a line(s) of ASCII characters in base32 coding,
224+
as specified in :rfc:`4648`. The return value is the converted line.
225+
226+
.. versionadded:: 3.15
227+
228+
.. function:: a2b_base32hex(string, /)
229+
230+
Convert base32hex data back to binary and return the binary data.
231+
232+
Valid base32hex:
233+
234+
* Conforms to :rfc:`4648`.
235+
* Contains only characters from the base32hex alphabet.
236+
* Contains no excess data after padding (including excess padding, newlines, etc.).
237+
* Does not start with padding.
238+
239+
Invalid base32hex data will raise :exc:`binascii.Error`.
240+
241+
.. versionadded:: 3.15
242+
243+
.. function:: b2a_base32hex(data, /)
244+
245+
Convert binary data to a line(s) of ASCII characters in base32hex coding,
246+
as specified in :rfc:`4648`. The return value is the converted line.
247+
248+
.. versionadded:: 3.15
206249

207250
.. function:: a2b_qp(data, header=False)
208251

Lib/base64.py

Lines changed: 9 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -209,51 +209,8 @@ def urlsafe_b64decode(s):
209209
the letter O). For security purposes the default is None, so that
210210
0 and 1 are not allowed in the input.
211211
'''
212-
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
213-
_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
214-
_b32tab2 = {}
215-
_b32rev = {}
216-
217-
def _b32encode(alphabet, s):
218-
# Delay the initialization of the table to not waste memory
219-
# if the function is never called
220-
if alphabet not in _b32tab2:
221-
b32tab = [bytes((i,)) for i in alphabet]
222-
_b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
223-
b32tab = None
224-
225-
if not isinstance(s, bytes_types):
226-
s = memoryview(s).tobytes()
227-
leftover = len(s) % 5
228-
# Pad the last quantum with zero bits if necessary
229-
if leftover:
230-
s = s + b'\0' * (5 - leftover) # Don't use += !
231-
encoded = bytearray()
232-
from_bytes = int.from_bytes
233-
b32tab2 = _b32tab2[alphabet]
234-
for i in range(0, len(s), 5):
235-
c = from_bytes(s[i: i + 5]) # big endian
236-
encoded += (b32tab2[c >> 30] + # bits 1 - 10
237-
b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
238-
b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
239-
b32tab2[c & 0x3ff] # bits 31 - 40
240-
)
241-
# Adjust for any leftover partial quanta
242-
if leftover == 1:
243-
encoded[-6:] = b'======'
244-
elif leftover == 2:
245-
encoded[-4:] = b'===='
246-
elif leftover == 3:
247-
encoded[-3:] = b'==='
248-
elif leftover == 4:
249-
encoded[-1:] = b'='
250-
return encoded.take_bytes()
251-
252-
def _b32decode(alphabet, s, casefold=False, map01=None):
253-
# Delay the initialization of the table to not waste memory
254-
# if the function is never called
255-
if alphabet not in _b32rev:
256-
_b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
212+
213+
def _b32decode_prepare(s, casefold=False, map01=None):
257214
s = _bytes_from_decode_data(s)
258215
if len(s) % 8:
259216
raise binascii.Error('Incorrect padding')
@@ -266,51 +223,27 @@ def _b32decode(alphabet, s, casefold=False, map01=None):
266223
s = s.translate(bytes.maketrans(b'01', b'O' + map01))
267224
if casefold:
268225
s = s.upper()
269-
# Strip off pad characters from the right. We need to count the pad
270-
# characters because this will tell us how many null bytes to remove from
271-
# the end of the decoded string.
272-
l = len(s)
273-
s = s.rstrip(b'=')
274-
padchars = l - len(s)
275-
# Now decode the full quanta
276-
decoded = bytearray()
277-
b32rev = _b32rev[alphabet]
278-
for i in range(0, len(s), 8):
279-
quanta = s[i: i + 8]
280-
acc = 0
281-
try:
282-
for c in quanta:
283-
acc = (acc << 5) + b32rev[c]
284-
except KeyError:
285-
raise binascii.Error('Non-base32 digit found') from None
286-
decoded += acc.to_bytes(5) # big endian
287-
# Process the last, partial quanta
288-
if l % 8 or padchars not in {0, 1, 3, 4, 6}:
289-
raise binascii.Error('Incorrect padding')
290-
if padchars and decoded:
291-
acc <<= 5 * padchars
292-
last = acc.to_bytes(5) # big endian
293-
leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1
294-
decoded[-5:] = last[:leftover]
295-
return decoded.take_bytes()
226+
return s
296227

297228

298229
def b32encode(s):
299-
return _b32encode(_b32alphabet, s)
230+
return binascii.b2a_base32(s)
300231
b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
301232

302233
def b32decode(s, casefold=False, map01=None):
303-
return _b32decode(_b32alphabet, s, casefold, map01)
234+
s = _b32decode_prepare(s, casefold, map01)
235+
return binascii.a2b_base32(s)
304236
b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
305237
extra_args=_B32_DECODE_MAP01_DOCSTRING)
306238

307239
def b32hexencode(s):
308-
return _b32encode(_b32hexalphabet, s)
240+
return binascii.b2a_base32hex(s)
309241
b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
310242

311243
def b32hexdecode(s, casefold=False):
312244
# base32hex does not have the 01 mapping
313-
return _b32decode(_b32hexalphabet, s, casefold)
245+
s = _b32decode_prepare(s, casefold)
246+
return binascii.a2b_base32hex(s)
314247
b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
315248
extra_args='')
316249

0 commit comments

Comments
 (0)