Skip to content

Commit

Permalink
PDFBOX-5225: consider the field list parameter when there are widgets…
Browse files Browse the repository at this point in the history
… with missing page references; adjust / add test

git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1922019 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
THausherr committed Nov 22, 2024
1 parent 02c4280 commit e49649a
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,13 @@ private Rectangle2D getTransformedAppearanceBBox(PDAppearanceStream appearanceSt
return transformedAppearanceBox.getBounds2D();
}

/**
* Build a map of pages => widgets
* @param fields a list of fields to be flattened
* @param pages the page tree
* @return
* @throws IOException
*/
private Map<COSDictionary,Set<COSDictionary>> buildPagesWidgetsMap(
List<PDField> fields, PDPageTree pages) throws IOException
{
Expand All @@ -777,6 +784,7 @@ private Map<COSDictionary,Set<COSDictionary>> buildPagesWidgetsMap(
}
else
{
LOG.warn("missing /P entry (page reference) in a widget for field: " + field);
hasMissingPageRef = true;
}
}
Expand All @@ -790,11 +798,21 @@ private Map<COSDictionary,Set<COSDictionary>> buildPagesWidgetsMap(
// If there is a widget with a missing page reference we need to build the map reverse i.e.
// from the annotations to the widget.
LOG.warn("There has been a widget with a missing page reference, will check all page annotations");
Set<COSDictionary> widgetDictionarySet = new HashSet<>();
for (PDField field : fields)
{
List<PDAnnotationWidget> widgets = field.getWidgets();
for (PDAnnotationWidget widget : widgets)
{
widgetDictionarySet.add(widget.getCOSObject());
}
}

for (PDPage page : pages)
{
for (PDAnnotation annotation : page.getAnnotations())
{
if (annotation instanceof PDAnnotationWidget)
if (widgetDictionarySet.contains(annotation.getCOSObject()))
{
fillPagesAnnotationMap(pagesAnnotationsMap, page, (PDAnnotationWidget) annotation);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Stream;

import javax.imageio.ImageIO;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.rendering.TestPDFToImage;
import org.junit.jupiter.api.BeforeAll;
Expand Down Expand Up @@ -130,9 +130,6 @@ static void setUp()
// annotations.
"https://issues.apache.org/jira/secure/attachment/12994791/flatten.pdf,PDFBOX-4788.pdf",

// PDFBOX-4889: appearance streams with empty /BBox.
"https://issues.apache.org/jira/secure/attachment/13005793/f1040sb%20test.pdf,PDFBOX-4889.pdf",

// PDFBOX-4955: appearance streams with forms that are not used.
"https://issues.apache.org/jira/secure/attachment/13011410/PDFBOX-4955.pdf,PDFBOX-4955.pdf",

Expand Down Expand Up @@ -183,8 +180,9 @@ void flattenTestPDFBOX5254() throws IOException, URISyntaxException
{
testPdf.getDocumentCatalog().getAcroForm().flatten();
testPdf.setAllSecurityToBeRemoved(true);
assertTrue(testPdf.getDocumentCatalog().getAcroForm().getFields().isEmpty());
testPdf.save(outputFile, CompressParameters.NO_COMPRESSION);
testPdf.save(outputFile);
assertTrue(testPdf.getDocumentCatalog().getAcroForm(null).getFields().isEmpty());
assertEquals(72, testPdf.getPage(0).getAnnotations().size());
}

// compare rendering
Expand All @@ -204,6 +202,61 @@ void flattenTestPDFBOX5254() throws IOException, URISyntaxException
}
}

/**
* Check that only VN_Name is removed in the field tree and in the annotations list. That field
* has an "orphan" widget that belongs to no page.
*
* @throws IOException
* @throws URISyntaxException
*/
@Test
void flattenTestPDFBOX5225() throws IOException, URISyntaxException
{
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13027311/SourceFailure.pdf";
String targetFileName = "PDFBOX-5225.pdf";

generateSamples(sourceUrl, targetFileName);

File inputFile = new File(IN_DIR, targetFileName);
File outputFile = new File(OUT_DIR, targetFileName);

try (PDDocument testPdf = Loader.loadPDF(inputFile))
{
PDAcroForm acroForm = testPdf.getDocumentCatalog().getAcroForm();
List<PDField> list = new ArrayList<>();
list.add(acroForm.getField("VN_NAME"));
acroForm.flatten(list, false);
testPdf.setAllSecurityToBeRemoved(true);
testPdf.save(outputFile);
int count = 0;
Iterator<PDField> iterator = acroForm.getFieldTree().iterator();
while (iterator.hasNext())
{
iterator.next();
++count;
}
assertEquals(76, count);
assertEquals(59, testPdf.getPage(0).getAnnotations().size());
}

// compare rendering
if (!TestPDFToImage.doTestFile(outputFile, IN_DIR.getAbsolutePath(),
OUT_DIR.getAbsolutePath()))
{
// check manually
System.err.println("Rendering of " + outputFile
+ " failed or is not identical to expected rendering in " + IN_DIR
+ " directory");
}
else
{
// cleanup input and output directory for matching files.
removeAllRenditions(inputFile);
inputFile.delete();
outputFile.delete();
}
}

/*
* Flatten and compare with generated image samples.
*
Expand Down

0 comments on commit e49649a

Please sign in to comment.