Skip to content

Commit

Permalink
separate temp files for each worker
Browse files Browse the repository at this point in the history
  • Loading branch information
scheidan committed Jan 16, 2024
1 parent febaa6a commit e730ed3
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 9 deletions.
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ authors = ["Andreas Scheidegger <[email protected]> and contributors"
version = "0.3.1"

[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Poppler_jll = "9c32591e-4766-534b-9725-b71a8799265b"

[compat]
julia = "1.6"
Poppler_jll = "23"
julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,5 @@ be deleted.
All the heavy lifting is done by
[`Poppler`](https://poppler.freedesktop.org/). Thanks to the maintainers
of `Poppler` and [`Poppler_jll.jl`](https://github.com/JuliaBinaryWrappers/Poppler_jll.jl)!

Thanks to @zauster for implementing separate temp files for each worker.
20 changes: 12 additions & 8 deletions src/PDFmerger.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module PDFmerger

import Base.Filesystem
using Poppler_jll: pdfunite, pdfinfo, pdfseparate
using Distributed: myid

export merge_pdfs, split_pdf, append_pdf!

Expand Down Expand Up @@ -33,16 +34,19 @@ function merge_pdfs(files::Vector{T}, destination::AbstractString="merged.pdf";
# See: https://gitlab.freedesktop.org/poppler/poppler/-/issues/334
filemax = 200

# Get the id of the current process/worker, such that separate workers
# create separate temporary files and do not mess up files of other workers.
procid = myid()
k = 1
for files_part in Base.Iterators.partition(files, filemax)
if k == 1
outfile_tmp2 = "_temp_destination_$k"
outfile_tmp2 = "_temp_destination_$(procid)_$k"

run(`$(pdfunite()) $files_part $outfile_tmp2`)

else
outfile_tmp1 = "_temp_destination_$(k-1)"
outfile_tmp2 = "_temp_destination_$k"
outfile_tmp1 = "_temp_destination_$(procid)_$(k-1)"
outfile_tmp2 = "_temp_destination_$(procid)_$k"

run(`$(pdfunite()) $outfile_tmp1 $files_part $outfile_tmp2`)

Expand All @@ -51,12 +55,12 @@ function merge_pdfs(files::Vector{T}, destination::AbstractString="merged.pdf";
end

# rename last file
Filesystem.mv("_temp_destination_$(k-1)", destination, force=true)
Filesystem.mv("_temp_destination_$(procid)_$(k-1)", destination, force=true)

# remove temp files
Filesystem.rm(destination * "_x_", force=true)
Filesystem.rm.("_temp_destination_$(i)" for i in 1:(k-2))
if cleanup
# remove temp files
Filesystem.rm(destination * "_x_", force=true)
Filesystem.rm.("_temp_destination_$(procid)_$(i)" for i in 1:(k-2))
if cleanup
Filesystem.rm.(files, force=true)
end

Expand Down

0 comments on commit e730ed3

Please sign in to comment.