Skip to content

Commit 0a8e4b1

Browse files
committed
gh-146192: Add base32 support to binascii
Add base32 encoder and decoder functions implemented in C to `binascii` and use them to greatly improve the performance and reduce the memory usage of the existing base32 codec functions in `base64`. No API or documentation changes are necessary with respect to any functions in `base64`, and all existing unit tests for those functions continue to pass without modification. Resolves: gh-146192
1 parent 2f4eb34 commit 0a8e4b1

File tree

6 files changed

+938
-79
lines changed

6 files changed

+938
-79
lines changed

Doc/library/binascii.rst

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,49 @@ The :mod:`!binascii` module defines the following functions:
182182

183183
.. versionadded:: 3.15
184184

185+
.. function:: a2b_base32(string, /)
186+
187+
Convert base32 data back to binary and return the binary data.
188+
189+
Valid base32 data:
190+
191+
* Conforms to :rfc:`4648`.
192+
* Contains only characters from the base32 alphabet.
193+
* Contains no excess data after padding (including excess padding, newlines, etc.).
194+
* Does not start with padding.
195+
196+
Invalid base32 data will raise :exc:`binascii.Error`.
197+
198+
.. versionadded:: 3.15
199+
200+
.. function:: b2a_base32(data, /)
201+
202+
Convert binary data to a line(s) of ASCII characters in base32 coding,
203+
as specified in :rfc:`4648`. The return value is the converted line.
204+
205+
.. versionadded:: 3.15
206+
207+
.. function:: a2b_base32hex(string, /)
208+
209+
Convert base32hex data back to binary and return the binary data.
210+
211+
Valid base32hex:
212+
213+
* Conforms to :rfc:`4648`.
214+
* Contains only characters from the base32hex alphabet.
215+
* Contains no excess data after padding (including excess padding, newlines, etc.).
216+
* Does not start with padding.
217+
218+
Invalid base32hex data will raise :exc:`binascii.Error`.
219+
220+
.. versionadded:: 3.15
221+
222+
.. function:: b2a_base32hex(data, /)
223+
224+
Convert binary data to a line(s) of ASCII characters in base32hex coding,
225+
as specified in :rfc:`4648`. The return value is the converted line.
226+
227+
.. versionadded:: 3.15
185228

186229
.. function:: a2b_qp(data, header=False)
187230

Lib/base64.py

Lines changed: 9 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -206,51 +206,8 @@ def urlsafe_b64decode(s):
206206
the letter O). For security purposes the default is None, so that
207207
0 and 1 are not allowed in the input.
208208
'''
209-
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
210-
_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
211-
_b32tab2 = {}
212-
_b32rev = {}
213-
214-
def _b32encode(alphabet, s):
215-
# Delay the initialization of the table to not waste memory
216-
# if the function is never called
217-
if alphabet not in _b32tab2:
218-
b32tab = [bytes((i,)) for i in alphabet]
219-
_b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
220-
b32tab = None
221-
222-
if not isinstance(s, bytes_types):
223-
s = memoryview(s).tobytes()
224-
leftover = len(s) % 5
225-
# Pad the last quantum with zero bits if necessary
226-
if leftover:
227-
s = s + b'\0' * (5 - leftover) # Don't use += !
228-
encoded = bytearray()
229-
from_bytes = int.from_bytes
230-
b32tab2 = _b32tab2[alphabet]
231-
for i in range(0, len(s), 5):
232-
c = from_bytes(s[i: i + 5]) # big endian
233-
encoded += (b32tab2[c >> 30] + # bits 1 - 10
234-
b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
235-
b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
236-
b32tab2[c & 0x3ff] # bits 31 - 40
237-
)
238-
# Adjust for any leftover partial quanta
239-
if leftover == 1:
240-
encoded[-6:] = b'======'
241-
elif leftover == 2:
242-
encoded[-4:] = b'===='
243-
elif leftover == 3:
244-
encoded[-3:] = b'==='
245-
elif leftover == 4:
246-
encoded[-1:] = b'='
247-
return encoded.take_bytes()
248-
249-
def _b32decode(alphabet, s, casefold=False, map01=None):
250-
# Delay the initialization of the table to not waste memory
251-
# if the function is never called
252-
if alphabet not in _b32rev:
253-
_b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
209+
210+
def _b32decode_prepare(s, casefold=False, map01=None):
254211
s = _bytes_from_decode_data(s)
255212
if len(s) % 8:
256213
raise binascii.Error('Incorrect padding')
@@ -263,51 +220,27 @@ def _b32decode(alphabet, s, casefold=False, map01=None):
263220
s = s.translate(bytes.maketrans(b'01', b'O' + map01))
264221
if casefold:
265222
s = s.upper()
266-
# Strip off pad characters from the right. We need to count the pad
267-
# characters because this will tell us how many null bytes to remove from
268-
# the end of the decoded string.
269-
l = len(s)
270-
s = s.rstrip(b'=')
271-
padchars = l - len(s)
272-
# Now decode the full quanta
273-
decoded = bytearray()
274-
b32rev = _b32rev[alphabet]
275-
for i in range(0, len(s), 8):
276-
quanta = s[i: i + 8]
277-
acc = 0
278-
try:
279-
for c in quanta:
280-
acc = (acc << 5) + b32rev[c]
281-
except KeyError:
282-
raise binascii.Error('Non-base32 digit found') from None
283-
decoded += acc.to_bytes(5) # big endian
284-
# Process the last, partial quanta
285-
if l % 8 or padchars not in {0, 1, 3, 4, 6}:
286-
raise binascii.Error('Incorrect padding')
287-
if padchars and decoded:
288-
acc <<= 5 * padchars
289-
last = acc.to_bytes(5) # big endian
290-
leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1
291-
decoded[-5:] = last[:leftover]
292-
return decoded.take_bytes()
223+
return s
293224

294225

295226
def b32encode(s):
296-
return _b32encode(_b32alphabet, s)
227+
return binascii.b2a_base32(s)
297228
b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
298229

299230
def b32decode(s, casefold=False, map01=None):
300-
return _b32decode(_b32alphabet, s, casefold, map01)
231+
s = _b32decode_prepare(s, casefold, map01)
232+
return binascii.a2b_base32(s)
301233
b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
302234
extra_args=_B32_DECODE_MAP01_DOCSTRING)
303235

304236
def b32hexencode(s):
305-
return _b32encode(_b32hexalphabet, s)
237+
return binascii.b2a_base32hex(s)
306238
b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
307239

308240
def b32hexdecode(s, casefold=False):
309241
# base32hex does not have the 01 mapping
310-
return _b32decode(_b32hexalphabet, s, casefold)
242+
s = _b32decode_prepare(s, casefold)
243+
return binascii.a2b_base32hex(s)
311244
b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
312245
extra_args='')
313246

0 commit comments

Comments
 (0)