diff --git a/src/docx/opc/phys_pkg.py b/src/docx/opc/phys_pkg.py
index 5ec32237c..3514d3167 100644
--- a/src/docx/opc/phys_pkg.py
+++ b/src/docx/opc/phys_pkg.py
@@ -1,7 +1,8 @@
"""Provides a general interface to a `physical` OPC package, such as a zip file."""
import os
-from zipfile import ZIP_DEFLATED, ZipFile, is_zipfile
+import zlib
+from zipfile import ZIP_DEFLATED, BadZipFile, ZipFile, is_zipfile
from docx.opc.exceptions import PackageNotFoundError
from docx.opc.packuri import CONTENT_TYPES_URI
@@ -73,14 +74,24 @@ class _ZipPkgReader(PhysPkgReader):
def __init__(self, pkg_file):
super(_ZipPkgReader, self).__init__()
- self._zipf = ZipFile(pkg_file, "r")
+ try:
+ self._zipf = ZipFile(pkg_file, "r")
+ except BadZipFile as e:
+ raise PackageNotFoundError("Package is not a valid zip file: %s" % e) from e
def blob_for(self, pack_uri):
"""Return blob corresponding to `pack_uri`.
Raises |ValueError| if no matching member is present in zip archive.
+ Raises |PackageNotFoundError| if the zip entry cannot be read due to corruption,
+ truncation, or encryption.
"""
- return self._zipf.read(pack_uri.membername)
+ try:
+ return self._zipf.read(pack_uri.membername)
+ except (BadZipFile, zlib.error, EOFError, RuntimeError) as e:
+ raise PackageNotFoundError(
+ "Package member '%s' could not be read: %s" % (pack_uri.membername, e)
+ ) from e
def close(self):
"""Close the zip archive, releasing any resources it is using."""
diff --git a/tests/opc/test_phys_pkg.py b/tests/opc/test_phys_pkg.py
index 6de0d868b..3e8e97c55 100644
--- a/tests/opc/test_phys_pkg.py
+++ b/tests/opc/test_phys_pkg.py
@@ -2,7 +2,9 @@
import hashlib
import io
-from zipfile import ZIP_DEFLATED, ZipFile
+import struct
+import zlib
+from zipfile import ZIP_DEFLATED, BadZipFile, ZipFile
import pytest
@@ -70,6 +72,50 @@ def it_raises_when_pkg_path_is_not_a_package(self):
class DescribeZipPkgReader:
+ def it_raises_PackageNotFoundError_when_stream_is_not_a_zip(self):
+ with pytest.raises(PackageNotFoundError, match="not a valid zip file"):
+ _ZipPkgReader(io.BytesIO(b"not a zip file"))
+
+ def it_raises_PackageNotFoundError_when_blob_has_bad_crc(self):
+ """BadZipFile (CRC mismatch) from ZipFile.read() is wrapped."""
+ # Build a zip with a large enough payload, then flip a byte in the
+ # middle of the compressed data to cause a CRC mismatch at read time.
+ buf = io.BytesIO()
+ with ZipFile(buf, "w", compression=ZIP_DEFLATED) as zf:
+ zf.writestr("[Content_Types].xml", b"" * 50)
+ raw = bytearray(buf.getvalue())
+ sig_pos = raw.find(b"PK\x03\x04")
+ fname_len = struct.unpack_from("" * 1000)
+ raw = bytearray(buf.getvalue())
+ sig_pos = raw.find(b"PK\x03\x04")
+ fname_len = struct.unpack_from("