Skip to content

Commit fd604bd

Browse files
authored
Merge pull request #120 from jaraco/bugfix/119-malformed-paths
Sanitize malformed paths
2 parents 2d015c2 + c18417e commit fd604bd

File tree

3 files changed

+81
-1
lines changed

3 files changed

+81
-1
lines changed

newsfragments/119.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improved handling of malformed zip files.

tests/test_path.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,3 +571,20 @@ def test_getinfo_missing(self, alpharep):
571571
zipfile.Path(alpharep)
572572
with self.assertRaises(KeyError):
573573
alpharep.getinfo('does-not-exist')
574+
575+
def test_malformed_paths(self):
576+
"""
577+
Path should handle malformed paths.
578+
"""
579+
data = io.BytesIO()
580+
zf = zipfile.ZipFile(data, "w")
581+
zf.writestr("/one-slash.txt", b"content")
582+
zf.writestr("//two-slash.txt", b"content")
583+
zf.writestr("../parent.txt", b"content")
584+
zf.filename = ''
585+
root = zipfile.Path(zf)
586+
assert list(map(str, root.iterdir())) == [
587+
'one-slash.txt',
588+
'two-slash.txt',
589+
'parent.txt',
590+
]

zipp/__init__.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,69 @@ def __setstate__(self, state):
8686
super().__init__(*args, **kwargs)
8787

8888

89-
class CompleteDirs(InitializedState, zipfile.ZipFile):
89+
class SanitizedNames:
90+
"""
91+
ZipFile mix-in to ensure names are sanitized.
92+
"""
93+
94+
def namelist(self):
95+
return list(map(self._sanitize, super().namelist()))
96+
97+
@staticmethod
98+
def _sanitize(name):
99+
r"""
100+
Ensure a relative path with posix separators and no dot names.
101+
102+
Modeled after
103+
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
104+
but provides consistent cross-platform behavior.
105+
106+
>>> san = SanitizedNames._sanitize
107+
>>> san('/foo/bar')
108+
'foo/bar'
109+
>>> san('//foo.txt')
110+
'foo.txt'
111+
>>> san('foo/.././bar.txt')
112+
'foo/bar.txt'
113+
>>> san('foo../.bar.txt')
114+
'foo../.bar.txt'
115+
>>> san('\\foo\\bar.txt')
116+
'foo/bar.txt'
117+
>>> san('D:\\foo.txt')
118+
'D/foo.txt'
119+
>>> san('\\\\server\\share\\file.txt')
120+
'server/share/file.txt'
121+
>>> san('\\\\?\\GLOBALROOT\\Volume3')
122+
'?/GLOBALROOT/Volume3'
123+
>>> san('\\\\.\\PhysicalDrive1\\root')
124+
'PhysicalDrive1/root'
125+
126+
Retain any trailing slash.
127+
>>> san('abc/')
128+
'abc/'
129+
130+
Raises a ValueError if the result is empty.
131+
>>> san('../..')
132+
Traceback (most recent call last):
133+
...
134+
ValueError: Empty filename
135+
"""
136+
137+
def allowed(part):
138+
return part and part not in {'..', '.'}
139+
140+
# Remove the drive letter.
141+
# Don't use ntpath.splitdrive, because that also strips UNC paths
142+
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
143+
clean = bare.replace('\\', '/')
144+
parts = clean.split('/')
145+
joined = '/'.join(filter(allowed, parts))
146+
if not joined:
147+
raise ValueError("Empty filename")
148+
return joined + '/' * name.endswith('/')
149+
150+
151+
class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile):
90152
"""
91153
A ZipFile subclass that ensures that implied directories
92154
are always included in the namelist.

0 commit comments

Comments
 (0)