Skip to content

Commit

Permalink
feat: add support for file stream
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Apr 21, 2024
1 parent 46d2926 commit c7c9c71
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pdf2john"
version = "0.1.10"
version = "0.2.0"
description = "A modern refactoring of the legacy pdf2john library"
authors = ["Benjamin Dornel <[email protected]>"]
license = "MIT"
Expand Down
23 changes: 18 additions & 5 deletions src/pdf2john/pdf2john.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import argparse
import logging
import sys
from io import BytesIO
from typing import Optional

from pyhanko.pdf_utils.misc import PdfReadError
from pyhanko.pdf_utils.reader import PdfFileReader
Expand Down Expand Up @@ -50,11 +52,22 @@ class PdfHashExtractor:
- `revision`: Revision of the standard security handler
"""

def __init__(self, file_name: str, strict: bool = False):
self.file_name = file_name

with open(file_name, "rb") as doc:
self.pdf = PdfFileReader(doc, strict=strict)
def __init__(
self,
file_name: Optional[str] = None,
file_bytes: Optional[str] = None,
strict: bool = False,
):
if not any([file_name, file_bytes]):
raise RuntimeError("Either file name or file stream must be passed")

if file_bytes:
stream = BytesIO(file_bytes)
else:
stream = open(file_name, "rb")

with stream:
self.pdf = PdfFileReader(stream, strict=strict)
self.encrypt_dict = self.pdf.encrypt_dict

if not self.encrypt_dict:
Expand Down
14 changes: 12 additions & 2 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,18 @@ def test_main_unencrypted(unencrypted_pdf_path, caplog):


def test_parse_unencrypted_should_not_return_encrypt_dict(unencrypted_pdf_path):
extractor = PdfHashExtractor(unencrypted_pdf_path)
assert not extractor.encrypt_dict
pdf = PdfHashExtractor(unencrypted_pdf_path)
assert not pdf.encrypt_dict


def test_can_read_from_byte_stream():
with open("tests/pdf/pypdf/r6-owner-password.pdf", "rb") as file:
file_bytes = file.read()
pdf = PdfHashExtractor(file_bytes=file_bytes)
assert pdf.algorithm == 5
assert pdf.length == 256
assert pdf.permissions == -4
assert pdf.revision == 6


def test_invalid_pdf():
Expand Down

0 comments on commit c7c9c71

Please sign in to comment.