@@ -137,14 +137,14 @@ def test_iss1943():
137
137
def test_broken_meta_data (pdf_path ):
138
138
with open (pdf_path , "rb" ) as f :
139
139
reader = PdfReader (f )
140
- with pytest . raises (
141
- PdfReadError ,
142
- match = (
143
- "Trailer not found or does not point to document "
144
- "information directory"
145
- ),
146
- ):
147
- reader . metadata
140
+ assert reader . metadata is None
141
+
142
+ with open ( RESOURCE_ROOT / "crazyones.pdf" , "rb" ) as f :
143
+ b = f . read ( - 1 )
144
+ reader = PdfReader ( BytesIO ( b . replace ( b"/Info 2 0 R" , b"/Info 2 " )))
145
+ with pytest . raises ( PdfReadError ) as exc :
146
+ reader . metadata
147
+ assert "does not point to document information directory" in repr ( exc )
148
148
149
149
150
150
@pytest .mark .parametrize (
@@ -621,7 +621,7 @@ def test_read_unknown_zero_pages(caplog):
621
621
assert normalize_warnings (caplog .text ) == warnings
622
622
with pytest .raises (PdfReadError ) as exc :
623
623
len (reader .pages )
624
- assert exc .value .args [0 ] == 'Cannot find "/Root" key in trailer'
624
+ assert exc .value .args [0 ] == "Invalid object in /Pages"
625
625
626
626
627
627
def test_read_encrypted_without_decryption ():
@@ -1712,3 +1712,67 @@ def test_unbalanced_brackets_in_dictionary_object(caplog):
1712
1712
name = "iss2877.pdf" # reused
1713
1713
reader = PdfReader (BytesIO (get_data_from_url (url , name = name )))
1714
1714
assert len (reader .pages ) == 43 # note: /Count = 46 but 3 kids are None
1715
+
1716
+
1717
+ @pytest .mark .enable_socket ()
1718
+ def test_repair_root (caplog ):
1719
+ """Cf #2877"""
1720
+ url = "https://github.com/user-attachments/files/17162216/crash-6620e8b1abfe3da639b654595da859b87f985748.pdf"
1721
+ name = "iss2875.pdf"
1722
+
1723
+ b = get_data_from_url (url , name = name )
1724
+ reader = PdfReader (BytesIO (b ))
1725
+ assert len (reader .pages ) == 1
1726
+ assert all (
1727
+ msg in caplog .text
1728
+ for msg in (
1729
+ "Invalid Root object" ,
1730
+ 'Searching object with "/Catalog" key' ,
1731
+ "Root found at IndirectObject(2, 0," ,
1732
+ )
1733
+ )
1734
+
1735
+ # no /Root Entry
1736
+ reader = PdfReader (BytesIO (b .replace (b"/Root" , b"/Roo " )))
1737
+ caplog .clear ()
1738
+ assert len (reader .pages ) == 1
1739
+ assert all (
1740
+ msg in caplog .text
1741
+ for msg in (
1742
+ 'Cannot find "/Root" key in trailer' ,
1743
+ 'Searching object with "/Catalog" key' ,
1744
+ "Root found at IndirectObject(2, 0," ,
1745
+ )
1746
+ )
1747
+
1748
+ # Invalid /Root Entry
1749
+ caplog .clear ()
1750
+ reader = PdfReader (
1751
+ BytesIO (
1752
+ b .replace (b"/Root 1 0 R" , b"/Root 2 0 R" ).replace (b"/Catalog" , b"/Catalo " )
1753
+ )
1754
+ )
1755
+ with pytest .raises (PdfReadError ):
1756
+ len (reader .pages )
1757
+ assert all (
1758
+ msg in caplog .text
1759
+ for msg in (
1760
+ "Invalid Root object in trailer" ,
1761
+ 'Searching object with "/Catalog" key' ,
1762
+ )
1763
+ )
1764
+
1765
+ # Invalid /Root Entry + error in get_object
1766
+ caplog .clear ()
1767
+ b = b .replace (b"/Root 1 0 R" , b"/Root 2 0 R" ).replace (b"/Catalog" , b"/Catalo " )
1768
+ b = b [:5124 ] + b"A" + b [5125 :]
1769
+ reader = PdfReader (BytesIO (b ))
1770
+ with pytest .raises (PdfReadError ):
1771
+ len (reader .pages )
1772
+ assert all (
1773
+ msg in caplog .text
1774
+ for msg in (
1775
+ "Invalid Root object in trailer" ,
1776
+ 'Searching object with "/Catalog" key' ,
1777
+ )
1778
+ )
0 commit comments