Skip to content

Commit

Permalink
Add check and test for non-splitter input to split_pdf()
Browse files Browse the repository at this point in the history
  • Loading branch information
holtskinner committed Jul 11, 2024
1 parent 42f4b8c commit 10b4279
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
4 changes: 3 additions & 1 deletion google/cloud/documentai_toolbox/wrappers/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ def entities_to_bigquery(
)

def split_pdf(self, pdf_path: str, output_path: str) -> List[str]:
r"""Splits local PDF file into multiple PDF files based on output from a Splitter/Classifier processor.
r"""Splits local PDF file into multiple PDF files based on output from a Splitter processor.
Args:
pdf_path (str):
Expand All @@ -776,6 +776,8 @@ def split_pdf(self, pdf_path: str, output_path: str) -> List[str]:
List[str]:
A list of output pdf files.
"""
if self.entities[0].start_page is None or self.entities[0].end_page is None:
raise ValueError("Entities do not contain start or end pages.")
output_files: List[str] = []
input_filename, input_extension = os.path.splitext(os.path.basename(pdf_path))
with Pdf.open(pdf_path) as pdf:
Expand Down
16 changes: 16 additions & 0 deletions tests/unit/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,22 @@ def test_split_pdf(mock_Pdf, get_bytes_splitter_mock):
]


def test_split_pdf_with_non_splitter(get_bytes_classifier_mock):
doc = document.Document.from_gcs(
gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0"
)

with pytest.raises(
ValueError,
match="Entities do not contain start or end pages.",
):
doc.split_pdf(
pdf_path="procurement_multi_document.pdf", output_path="splitter/output/"
)

get_bytes_classifier_mock.assert_called_once()


def test_convert_document_to_annotate_file_response():
doc = document.Document.from_document_path(
document_path="tests/unit/resources/0/toolbox_invoice_test-0.json"
Expand Down

0 comments on commit 10b4279

Please sign in to comment.