Skip to content

Commit

Permalink
Linting
Browse files Browse the repository at this point in the history
  • Loading branch information
evekhm committed Jul 18, 2024
1 parent 053f52f commit 2d5d6d8
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions classify-split-extract-workflow/classify-job/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Utility functions for obtaining authentication tokens,
sending callback requests, splitting PDF files into multiple parts,
getting the current UTC timestamp, and deleting directories.
"""

import glob
import io
import shutil
Expand Down Expand Up @@ -73,11 +79,13 @@ def split_pages(file_pattern: str, bucket_name: str, output_dir: str) -> None:
num_pages = len(reader.pages)
num_shards = (num_pages + 14) // 15

pdf_writers = [PyPDF2.PdfWriter() for _ in range(num_shards)]
for page_index, page in enumerate(reader.pages):
pdf_writers[page_index // 15].add_page(page)
for shard_index in range(num_shards):
pdf_writer = PyPDF2.PdfWriter()
for page_index in range(15):
page_number = shard_index * 15 + page_index
if page_number < num_pages:
pdf_writer.add_page(reader.pages[page_number])

for shard_index, pdf_writer in enumerate(pdf_writers):
output_filename = f"{output_dir}/{file_path[3:-4]} - " \
f"part {shard_index + 1} of {num_shards}.pdf"
blob = bucket.blob(output_filename)
Expand Down

0 comments on commit 2d5d6d8

Please sign in to comment.