mirror of
https://github.com/python/cpython.git
synced 2026-05-06 04:37:33 -04:00
gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name handling
Co-authored-by: Eashwar Ranganathan <eashwar@eashwar.com>
This commit is contained in:
committed by
GitHub
parent
ce1abaf9b8
commit
42d754e34c
+25
-4
@@ -1276,6 +1276,20 @@ class TarInfo(object):
|
||||
@classmethod
|
||||
def frombuf(cls, buf, encoding, errors):
|
||||
"""Construct a TarInfo object from a 512 byte bytes object.
|
||||
|
||||
To support the old v7 tar format AREGTYPE headers are
|
||||
transformed to DIRTYPE headers if their name ends in '/'.
|
||||
"""
|
||||
return cls._frombuf(buf, encoding, errors)
|
||||
|
||||
@classmethod
|
||||
def _frombuf(cls, buf, encoding, errors, *, dircheck=True):
|
||||
"""Construct a TarInfo object from a 512 byte bytes object.
|
||||
|
||||
If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will
|
||||
be normalized to ``DIRTYPE`` if the name ends in a trailing slash.
|
||||
``dircheck`` must be set to ``False`` if this function is called
|
||||
on a follow-up header such as ``GNUTYPE_LONGNAME``.
|
||||
"""
|
||||
if len(buf) == 0:
|
||||
raise EmptyHeaderError("empty header")
|
||||
@@ -1306,7 +1320,7 @@ class TarInfo(object):
|
||||
|
||||
# Old V7 tar format represents a directory as a regular
|
||||
# file with a trailing slash.
|
||||
if obj.type == AREGTYPE and obj.name.endswith("/"):
|
||||
if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"):
|
||||
obj.type = DIRTYPE
|
||||
|
||||
# The old GNU sparse format occupies some of the unused
|
||||
@@ -1341,8 +1355,15 @@ class TarInfo(object):
|
||||
"""Return the next TarInfo object from TarFile object
|
||||
tarfile.
|
||||
"""
|
||||
return cls._fromtarfile(tarfile)
|
||||
|
||||
@classmethod
|
||||
def _fromtarfile(cls, tarfile, *, dircheck=True):
|
||||
"""
|
||||
See dircheck documentation in _frombuf().
|
||||
"""
|
||||
buf = tarfile.fileobj.read(BLOCKSIZE)
|
||||
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
|
||||
obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, dircheck=dircheck)
|
||||
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
|
||||
return obj._proc_member(tarfile)
|
||||
|
||||
@@ -1400,7 +1421,7 @@ class TarInfo(object):
|
||||
|
||||
# Fetch the next header and process it.
|
||||
try:
|
||||
next = self.fromtarfile(tarfile)
|
||||
next = self._fromtarfile(tarfile, dircheck=False)
|
||||
except HeaderError as e:
|
||||
raise SubsequentHeaderError(str(e)) from None
|
||||
|
||||
@@ -1535,7 +1556,7 @@ class TarInfo(object):
|
||||
|
||||
# Fetch the next header.
|
||||
try:
|
||||
next = self.fromtarfile(tarfile)
|
||||
next = self._fromtarfile(tarfile, dircheck=False)
|
||||
except HeaderError as e:
|
||||
raise SubsequentHeaderError(str(e)) from None
|
||||
|
||||
|
||||
@@ -1234,6 +1234,25 @@ class LongnameTest:
|
||||
self.assertIsNotNone(tar.getmember(longdir))
|
||||
self.assertIsNotNone(tar.getmember(longdir.removesuffix('/')))
|
||||
|
||||
def test_longname_file_not_directory(self):
|
||||
# Test reading a longname file and ensure it is not handled as a directory
|
||||
# Issue #141707
|
||||
buf = io.BytesIO()
|
||||
with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar:
|
||||
ti = tarfile.TarInfo()
|
||||
ti.type = tarfile.AREGTYPE
|
||||
ti.name = ('a' * 99) + '/' + ('b' * 3)
|
||||
tar.addfile(ti)
|
||||
|
||||
expected = {t.name: t.type for t in tar.getmembers()}
|
||||
|
||||
buf.seek(0)
|
||||
with tarfile.open(mode='r', fileobj=buf) as tar:
|
||||
actual = {t.name: t.type for t in tar.getmembers()}
|
||||
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
|
||||
class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
|
||||
|
||||
subdir = "gnu"
|
||||
|
||||
@@ -1557,6 +1557,7 @@ Ashwin Ramaswami
|
||||
Jeff Ramnani
|
||||
Grant Ramsay
|
||||
Bayard Randel
|
||||
Eashwar Ranganathan
|
||||
Varpu Rantala
|
||||
Brodie Rao
|
||||
Rémi Rampin
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` when parsing
|
||||
GNU long name or link headers.
|
||||
Reference in New Issue
Block a user