Skip to content

Commit

Permalink
Fixing bugs
Browse files Browse the repository at this point in the history
* fixed invalid json format when images occur
* immunize against invalid invocation of doc.select() method (invalid
page number range, invalid document type)
  • Loading branch information
JorjMcKie committed May 1, 2016
1 parent a95e492 commit b0fa9d9
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 18 deletions.
2 changes: 2 additions & 0 deletions examples/PDF2TextJS.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def SortBlocks(blocks):

sblocks = []
for b in blocks:
if b["type"] != "text": # only look at text blocks
continue
x0 = str(int(round(b["bbox"][0],0))).rjust(4,"0") # x coord in pixels
y0 = str(int(round(b["bbox"][1],0))).rjust(4,"0") # y coord in pixels
sortkey = y0 + x0 # = "yx"
Expand Down
22 changes: 18 additions & 4 deletions fitz/fitz.i
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,16 @@ struct fz_document_s {
PyObject *o = PySequence_GetItem($input,i);
if (PyInt_Check(o)) {
$1[i] = (int) PyInt_AsLong(o);
if ($1[i] < 0) {
PyErr_SetString(PyExc_ValueError,"sequence elements must be >= 0");
free($1);
return NULL;
}
if ($1[i] >= fz_count_pages(gctx, arg1)) {
PyErr_SetString(PyExc_ValueError,"sequence elements must be < pageCount");
free($1);
return NULL;
}
}
else {
PyErr_SetString(PyExc_ValueError,"sequence elements must be integers");
Expand All @@ -289,6 +299,7 @@ struct fz_document_s {
pdf_document *pdf = pdf_specifics(gctx, $self);
if (!pdf) {
PyErr_SetString(PyExc_ValueError,"not a valid pdf document");
free(liste);
return -2;
}
globals glo = { 0 };
Expand Down Expand Up @@ -1411,8 +1422,11 @@ fz_send_data_base64(fz_context *ctx, fz_output *out, fz_buffer *buffer)
int c = buffer->data[3*i];
int d = buffer->data[3*i+1];
int e = buffer->data[3*i+2];
if ((i & 15) == 0)
fz_printf(ctx, out, "\n");
/*************************************************/
/* JSON decoders do not like interspersed "\n" ! */
/*************************************************/
//if ((i & 15) == 0)
// fz_printf(ctx, out, "\n");
fz_printf(ctx, out, "%c%c%c%c", set[c>>2], set[((c&3)<<4)|(d>>4)], set[((d&15)<<2)|(e>>6)], set[e & 63]);
}
i *= 3;
Expand Down Expand Up @@ -1499,11 +1513,11 @@ fz_print_stext_page_json(fz_context *ctx, fz_output *out, fz_stext_page *page)
fz_image_block *image = page->blocks[block_n].u.image;

fz_print_rect_json(ctx, out, &(image->bbox));
fz_printf(ctx, out, "\"type\":%d,\"width\":%d,\"height\":%d",
fz_printf(ctx, out, "\"imgtype\":%d,\"width\":%d,\"height\":%d,",
image->image->buffer->params.type,
image->image->w,
image->image->h);
fz_printf(ctx, out, "\"image\":");
fz_printf(ctx, out, "\"image\":\n");
if (image->image->buffer == NULL) {
fz_printf(ctx, out, "null");
} else {
Expand Down
2 changes: 1 addition & 1 deletion fitz/fitz.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class _object:
import os
VersionFitz = "1.9"
VersionBind = "1.9.0"
VersionDate = "2016-04-30 9:07:17"
VersionDate = "2016-05-01 4:00:38"

class Document(_object):
"""Proxy of C fz_document_s struct."""
Expand Down
22 changes: 18 additions & 4 deletions fitz/fitz_wrap.c
Original file line number Diff line number Diff line change
Expand Up @@ -3769,6 +3769,7 @@ SWIGINTERN int fz_document_s__select(struct fz_document_s *self,int *liste,int a
pdf_document *pdf = pdf_specifics(gctx, self);
if (!pdf) {
PyErr_SetString(PyExc_ValueError,"not a valid pdf document");
free(liste);
return -2;
}
globals glo = { 0 };
Expand Down Expand Up @@ -4312,8 +4313,11 @@ fz_send_data_base64(fz_context *ctx, fz_output *out, fz_buffer *buffer)
int c = buffer->data[3*i];
int d = buffer->data[3*i+1];
int e = buffer->data[3*i+2];
if ((i & 15) == 0)
fz_printf(ctx, out, "\n");
/*************************************************/
/* JSON decoders do not like interspersed "\n" ! */
/*************************************************/
//if ((i & 15) == 0)
// fz_printf(ctx, out, "\n");
fz_printf(ctx, out, "%c%c%c%c", set[c>>2], set[((c&3)<<4)|(d>>4)], set[((d&15)<<2)|(e>>6)], set[e & 63]);
}
i *= 3;
Expand Down Expand Up @@ -4400,11 +4404,11 @@ fz_print_stext_page_json(fz_context *ctx, fz_output *out, fz_stext_page *page)
fz_image_block *image = page->blocks[block_n].u.image;

fz_print_rect_json(ctx, out, &(image->bbox));
fz_printf(ctx, out, "\"type\":%d,\"width\":%d,\"height\":%d",
fz_printf(ctx, out, "\"imgtype\":%d,\"width\":%d,\"height\":%d,",
image->image->buffer->params.type,
image->image->w,
image->image->h);
fz_printf(ctx, out, "\"image\":");
fz_printf(ctx, out, "\"image\":\n");
if (image->image->buffer == NULL) {
fz_printf(ctx, out, "null");
} else {
Expand Down Expand Up @@ -4958,6 +4962,16 @@ SWIGINTERN PyObject *_wrap_Document__select(PyObject *SWIGUNUSEDPARM(self), PyOb
PyObject *o = PySequence_GetItem(obj1,i);
if (PyInt_Check(o)) {
arg2[i] = (int) PyInt_AsLong(o);
if (arg2[i] < 0) {
PyErr_SetString(PyExc_ValueError,"sequence elements must be >= 0");
free(arg2);
return NULL;
}
if (arg2[i] >= fz_count_pages(gctx, arg1)) {
PyErr_SetString(PyExc_ValueError,"sequence elements must be < pageCount");
free(arg2);
return NULL;
}
}
else {
PyErr_SetString(PyExc_ValueError,"sequence elements must be integers");
Expand Down
9 changes: 1 addition & 8 deletions fitz/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,7 @@ def select(*arg):
raise ValueError("operation on closed document")
if not doc.name.lower().endswith(("/pdf", ".pdf")):
raise ValueError("only PDF documents supported")
if not isinstance(liste, types.ListType):
raise ValueError("must provide a list of pages")
for l in liste:
if not isinstance(l, numbers.Integral):
raise ValueError("must be sequence of integers")
if l < 0 or l >= doc.pageCount:
raise ValueError("some page numbers outside valid range")
doc._select(liste)
return doc._select(liste)

#==============================================================================
# A function for searching string occurrences on a page.
Expand Down
2 changes: 1 addition & 1 deletion fitz/version.i
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
import os
VersionFitz = "1.9"
VersionBind = "1.9.0"
VersionDate = "2016-04-30 9:07:17"
VersionDate = "2016-05-01 4:00:38"
%}

0 comments on commit b0fa9d9

Please sign in to comment.