From c3065629ca1a66d01a0a382793ade76991f25e0b Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Wed, 24 Jul 2024 18:55:31 +0200 Subject: [PATCH] [Editor] Correctly save a non-ascii alt text --- src/core/annotation.js | 33 ++++++++++------------------- src/core/core_utils.js | 5 +++++ src/core/struct_tree.js | 9 ++++---- test/unit/api_spec.js | 47 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 67 insertions(+), 27 deletions(-) diff --git a/src/core/annotation.js b/src/core/annotation.js index 7ac9a3eed0722..95028cb5e456b 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -42,12 +42,12 @@ import { escapeString, getInheritableProperty, getRotationMatrix, - isAscii, isNumberArray, lookupMatrix, lookupNormalRect, lookupRect, numberToString, + stringToAsciiOrUTF16BE, stringToUTF16String, } from "./core_utils.js"; import { @@ -2133,9 +2133,12 @@ class WidgetAnnotation extends Annotation { value, }; - const encoder = val => - isAscii(val) ? val : stringToUTF16String(val, /* bigEndian = */ true); - dict.set("V", Array.isArray(value) ? value.map(encoder) : encoder(value)); + dict.set( + "V", + Array.isArray(value) + ? value.map(stringToAsciiOrUTF16BE) + : stringToAsciiOrUTF16BE(value) + ); this.amendSavedDict(annotationStorage, dict); const maybeMK = this._getMKDict(rotation); @@ -3852,21 +3855,13 @@ class FreeTextAnnotation extends MarkupAnnotation { freetext.set("Rect", rect); const da = `/Helv ${fontSize} Tf ${getPdfColor(color, /* isFill */ true)}`; freetext.set("DA", da); - freetext.set( - "Contents", - isAscii(value) - ? value - : stringToUTF16String(value, /* bigEndian = */ true) - ); + freetext.set("Contents", stringToAsciiOrUTF16BE(value)); freetext.set("F", 4); freetext.set("Border", [0, 0, 0]); freetext.set("Rotate", rotation); if (user) { - freetext.set( - "T", - isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true) - ); + freetext.set("T", stringToAsciiOrUTF16BE(user)); } if (apRef || ap) { @@ -4600,10 +4595,7 @@ class HighlightAnnotation extends MarkupAnnotation { highlight.set("CA", opacity); if (user) { - highlight.set( - "T", - isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true) - ); + highlight.set("T", stringToAsciiOrUTF16BE(user)); } if (apRef || ap) { @@ -4885,10 +4877,7 @@ class StampAnnotation extends MarkupAnnotation { stamp.set("Rotate", rotation); if (user) { - stamp.set( - "T", - isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true) - ); + stamp.set("T", stringToAsciiOrUTF16BE(user)); } if (apRef || ap) { diff --git a/src/core/core_utils.js b/src/core/core_utils.js index a84373d1614ac..c507dfdcc2ca4 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -613,6 +613,10 @@ function getNewAnnotationsMap(annotationStorage) { return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null; } +function stringToAsciiOrUTF16BE(str) { + return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true); +} + function isAscii(str) { return /^[\x00-\x7F]*$/.test(str); } @@ -699,6 +703,7 @@ export { readUint16, readUint32, recoverJsURL, + stringToAsciiOrUTF16BE, stringToUTF16HexString, stringToUTF16String, toRomanNumerals, diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index 9f4f552b0f3e6..8b1559c90fc10 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -16,6 +16,7 @@ import { AnnotationPrefix, stringToPDFString, warn } from "../shared/util.js"; import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js"; import { NumberTree } from "./name_number_tree.js"; +import { stringToAsciiOrUTF16BE } from "./core_utils.js"; import { writeObject } from "./writer.js"; const MAX_DEPTH = 40; @@ -316,19 +317,19 @@ class StructTreeRoot { tagDict.set("S", Name.get(type)); if (title) { - tagDict.set("T", title); + tagDict.set("T", stringToAsciiOrUTF16BE(title)); } if (lang) { tagDict.set("Lang", lang); } if (alt) { - tagDict.set("Alt", alt); + tagDict.set("Alt", stringToAsciiOrUTF16BE(alt)); } if (expanded) { - tagDict.set("E", expanded); + tagDict.set("E", stringToAsciiOrUTF16BE(expanded)); } if (actualText) { - tagDict.set("ActualText", actualText); + tagDict.set("ActualText", stringToAsciiOrUTF16BE(actualText)); } await this.#updateParentTag({ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 680ec6a9eb080..6d6a2b0bdf5bf 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -2524,6 +2524,21 @@ describe("api", function () { alt: "Hello World", }, }); + // Test if an alt-text using utf-16 is correctly handled. + // The Mahjong tile code is 0x1F000. + pdfDoc.annotationStorage.setValue("pdfjs_internal_editor_1", { + annotationType: AnnotationEditorType.STAMP, + rect: [128, 400, 148, 420], + rotation: 0, + bitmap: structuredClone(bitmap), + bitmapId: "im2", + pageIndex: 0, + structTreeParentId: "p3R_mc14", + accessibilityData: { + type: "Figure", + alt: "Γειά σου with a Mahjong tile 🀀", + }, + }); const data = await pdfDoc.saveDocument(); await loadingTask.destroy(); @@ -2532,7 +2547,7 @@ describe("api", function () { pdfDoc = await loadingTask.promise; const page = await pdfDoc.getPage(1); const tree = await page.getStructTree(); - const [predecessor, leaf] = findNode( + let [predecessor, leaf] = findNode( null, tree, 0, @@ -2560,6 +2575,36 @@ describe("api", function () { alt: "Hello World", }); + let count = 0; + [predecessor, leaf] = findNode(null, tree, 0, node => { + if (node.role === "Figure") { + count += 1; + return count === 2; + } + return false; + }); + + expect(predecessor).toEqual({ + role: "Span", + children: [ + { + type: "content", + id: "p3R_mc14", + }, + ], + }); + + expect(leaf).toEqual({ + role: "Figure", + children: [ + { + type: "annotation", + id: "pdfjs_internal_id_481R", + }, + ], + alt: "Γειά σου with a Mahjong tile 🀀", + }); + await loadingTask.destroy(); });