Skip to content

Commit 42d754e

Browse files
sethmlarsone-nomem
andauthored
gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name handling
Co-authored-by: Eashwar Ranganathan <eashwar@eashwar.com>
1 parent ce1abaf commit 42d754e

File tree

4 files changed

+47
-4
lines changed

4 files changed

+47
-4
lines changed

Lib/tarfile.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,20 @@ def _create_pax_generic_header(cls, pax_headers, type, encoding):
12761276
@classmethod
12771277
def frombuf(cls, buf, encoding, errors):
12781278
"""Construct a TarInfo object from a 512 byte bytes object.
1279+
1280+
To support the old v7 tar format AREGTYPE headers are
1281+
transformed to DIRTYPE headers if their name ends in '/'.
1282+
"""
1283+
return cls._frombuf(buf, encoding, errors)
1284+
1285+
@classmethod
1286+
def _frombuf(cls, buf, encoding, errors, *, dircheck=True):
1287+
"""Construct a TarInfo object from a 512 byte bytes object.
1288+
1289+
If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will
1290+
be normalized to ``DIRTYPE`` if the name ends in a trailing slash.
1291+
``dircheck`` must be set to ``False`` if this function is called
1292+
on a follow-up header such as ``GNUTYPE_LONGNAME``.
12791293
"""
12801294
if len(buf) == 0:
12811295
raise EmptyHeaderError("empty header")
@@ -1306,7 +1320,7 @@ def frombuf(cls, buf, encoding, errors):
13061320

13071321
# Old V7 tar format represents a directory as a regular
13081322
# file with a trailing slash.
1309-
if obj.type == AREGTYPE and obj.name.endswith("/"):
1323+
if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"):
13101324
obj.type = DIRTYPE
13111325

13121326
# The old GNU sparse format occupies some of the unused
@@ -1341,8 +1355,15 @@ def fromtarfile(cls, tarfile):
13411355
"""Return the next TarInfo object from TarFile object
13421356
tarfile.
13431357
"""
1358+
return cls._fromtarfile(tarfile)
1359+
1360+
@classmethod
1361+
def _fromtarfile(cls, tarfile, *, dircheck=True):
1362+
"""
1363+
See dircheck documentation in _frombuf().
1364+
"""
13441365
buf = tarfile.fileobj.read(BLOCKSIZE)
1345-
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1366+
obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, dircheck=dircheck)
13461367
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
13471368
return obj._proc_member(tarfile)
13481369

@@ -1400,7 +1421,7 @@ def _proc_gnulong(self, tarfile):
14001421

14011422
# Fetch the next header and process it.
14021423
try:
1403-
next = self.fromtarfile(tarfile)
1424+
next = self._fromtarfile(tarfile, dircheck=False)
14041425
except HeaderError as e:
14051426
raise SubsequentHeaderError(str(e)) from None
14061427

@@ -1535,7 +1556,7 @@ def _proc_pax(self, tarfile):
15351556

15361557
# Fetch the next header.
15371558
try:
1538-
next = self.fromtarfile(tarfile)
1559+
next = self._fromtarfile(tarfile, dircheck=False)
15391560
except HeaderError as e:
15401561
raise SubsequentHeaderError(str(e)) from None
15411562

Lib/test/test_tarfile.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,25 @@ def test_longname_directory(self):
12341234
self.assertIsNotNone(tar.getmember(longdir))
12351235
self.assertIsNotNone(tar.getmember(longdir.removesuffix('/')))
12361236

1237+
def test_longname_file_not_directory(self):
1238+
# Test reading a longname file and ensure it is not handled as a directory
1239+
# Issue #141707
1240+
buf = io.BytesIO()
1241+
with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar:
1242+
ti = tarfile.TarInfo()
1243+
ti.type = tarfile.AREGTYPE
1244+
ti.name = ('a' * 99) + '/' + ('b' * 3)
1245+
tar.addfile(ti)
1246+
1247+
expected = {t.name: t.type for t in tar.getmembers()}
1248+
1249+
buf.seek(0)
1250+
with tarfile.open(mode='r', fileobj=buf) as tar:
1251+
actual = {t.name: t.type for t in tar.getmembers()}
1252+
1253+
self.assertEqual(expected, actual)
1254+
1255+
12371256
class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
12381257

12391258
subdir = "gnu"

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1557,6 +1557,7 @@ Ashwin Ramaswami
15571557
Jeff Ramnani
15581558
Grant Ramsay
15591559
Bayard Randel
1560+
Eashwar Ranganathan
15601561
Varpu Rantala
15611562
Brodie Rao
15621563
Rémi Rampin
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` when parsing
2+
GNU long name or link headers.

0 commit comments

Comments
 (0)