Skip to content

Commit

Permalink
contributes towards sajari#18
Browse files Browse the repository at this point in the history
  • Loading branch information
Barnaby committed Aug 9, 2017
1 parent 3dad0cb commit 30ff2ee
Showing 1 changed file with 75 additions and 1 deletion.
76 changes: 75 additions & 1 deletion pdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import (
"fmt"
"io"
"log"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"time"
)
Expand All @@ -18,7 +21,7 @@ func ConvertPDF(r io.Reader) (string, map[string]string, error) {
defer f.Done()

// Meta data
mc := make(chan map[string]string, 1)
mc := make(chan map[string]string)
go func() {
meta := make(map[string]string)
metaStr, err := exec.Command("pdfinfo", f.Name()).Output()
Expand Down Expand Up @@ -57,6 +60,77 @@ func ConvertPDF(r io.Reader) (string, map[string]string, error) {
// TODO: Remove this.
log.Println("pdftotext:", err)
}
if len(body) == 0 {

// grab the metadata, mitm-style
meta := <-mc
go func() { mc <- meta }()

pages, err := strconv.Atoi(meta["Pages"])
if err != nil {
log.Printf("failed to get number of pages from '%s': %v", meta["pages"], err)
return
}

dir, err := filepath.Abs(filepath.Dir(os.Args[0]))
if err != nil {
log.Fatal(err)
}

for page := 0; page < pages; page++ {
tmpImageFileName := fmt.Sprintf("%s-%d.jpg", f.Name(), page)
args := []string{
fmt.Sprintf("-dFirstPage=%d", page+1),
fmt.Sprintf("-dLastPage=%d", page+1),
"-dNOGC",
// "-dQUIET",
// "-dBandHeight=100",
// "-dBandBufferSpace=500000000",
// "-dBufferSpace=1000000000",
// "-sBandListStorage=memory",
// "-dNumRenderingThreads=2",
"-sDEVICE=jpeg",
"-r300",
"-o", tmpImageFileName,
"-f", filepath.Join(dir, f.Name()),
}

_, err := exec.Command("gs", args...).Output()
if err != nil {
log.Printf("failed to execute gs: %v", err)
return
}

// stdout, err := cmd.StdoutPipe()
// if err != nil {
// log.Printf("failed to get stdout from gs call: %v", err)
// return
// }

tmpImageFile, err := os.Open(tmpImageFileName)
if err != nil {
log.Printf("failed to open temp image file: %v", err)
return
}
defer func() {
err := tmpImageFile.Close()
if err != nil {
log.Printf("failed to close tmpImageFile: %v", err)
}
err = os.Remove(tmpImageFileName)
if err != nil {
log.Printf("failed to remove tmpImageFileName: %v", err)
}
}()

result, _, err := ConvertImage(tmpImageFile)
if err != nil {
log.Printf("failed to run ConvertImage on gs output: %v", err)
return
}
bc <- result
}
}
bc <- string(body)
}()

Expand Down

0 comments on commit 30ff2ee

Please sign in to comment.