From 93e5417a4e1dfcdbe7a2a9adab9ae7d917445681 Mon Sep 17 00:00:00 2001 From: Ujjwal Sharma Date: Wed, 11 Dec 2024 00:09:32 +0100 Subject: [PATCH] make processLinks sync by using textContent from textLayer --- web/autolinker.js | 41 +++++++++++++++++--------------------- web/pdf_find_controller.js | 31 ++++++++++------------------ web/pdf_page_view.js | 11 ++++++---- web/text_layer_builder.js | 4 +++- 4 files changed, 39 insertions(+), 48 deletions(-) diff --git a/web/autolinker.js b/web/autolinker.js index 9932fb2db173e1..80d94fc555cf7a 100644 --- a/web/autolinker.js +++ b/web/autolinker.js @@ -1,8 +1,5 @@ import { createValidAbsoluteUrl, Util } from "../src/shared/util.js"; -import { - getOriginalIndex, - normalizedTextContent, -} from "./pdf_find_controller.js"; +import { getOriginalIndex, normalize } from "./pdf_find_controller.js"; class Autolinker { static #urlRegex = @@ -71,25 +68,23 @@ class Autolinker { return linkAnnotations; } - static processLinks(pdfPageView) { - return pdfPageView.pdfPage.getTextContent().then(content => { - const [text, diffs] = normalizedTextContent(content); - const matches = text.matchAll(Autolinker.#urlRegex); - return Array.from(matches, match => { - const url = createValidAbsoluteUrl(match[0]); - if (url) { - const [index, length] = getOriginalIndex( - diffs, - match.index, - match[0].length - ); - return this.#addLinkAnnotations(url.href, index, length, pdfPageView); - } - return url; - }) - .filter(annotation => annotation !== null) - .flat(); - }); + static processLinks(pdfPageView, textContent) { + const [text, diffs] = normalize(textContent.join("")); + const matches = text.matchAll(Autolinker.#urlRegex); + return Array.from(matches, match => { + const url = createValidAbsoluteUrl(match[0]); + if (url) { + const [index, length] = getOriginalIndex( + diffs, + match.index, + match[0].length + ); + return this.#addLinkAnnotations(url.href, index, length, pdfPageView); + } + return url; + }) + .filter(annotation => annotation !== null) + .flat(); } } diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 56692bce9245de..3cec7702f2a398 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -384,19 +384,6 @@ function getOriginalIndex(diffs, pos, len) { return [oldStart, oldLen]; } -function normalizedTextContent(textContent) { - const strBuf = []; - - for (const textItem of textContent.items) { - strBuf.push(textItem.str); - if (textItem.hasEOL) { - strBuf.push("\n"); - } - } - - return normalize(strBuf.join("")); -} - /** * @typedef {Object} PDFFindControllerOptions * @property {IPDFLinkService} linkService - The navigation/linking service. @@ -892,12 +879,21 @@ class PDFFindController { .then(pdfPage => pdfPage.getTextContent(textOptions)) .then( textContent => { + const strBuf = []; + + for (const textItem of textContent.items) { + strBuf.push(textItem.str); + if (textItem.hasEOL) { + strBuf.push("\n"); + } + } + // Store the normalized page content (text items) as one string. [ this._pageContents[i], this._pageDiffs[i], this._hasDiacritics[i], - ] = normalizedTextContent(textContent); + ] = normalize(strBuf.join("")); resolve(); }, reason => { @@ -1175,9 +1171,4 @@ class PDFFindController { } } -export { - FindState, - getOriginalIndex, - normalizedTextContent, - PDFFindController, -}; +export { FindState, getOriginalIndex, normalize, PDFFindController }; diff --git a/web/pdf_page_view.js b/web/pdf_page_view.js index e92b434a17b77d..8ac0d14aea4c06 100644 --- a/web/pdf_page_view.js +++ b/web/pdf_page_view.js @@ -463,15 +463,16 @@ class PDFPageView { async #renderTextLayer() { if (!this.textLayer) { - return; + return []; } let error = null; + let textContent; try { - await this.textLayer.render(this.viewport); + textContent = await this.textLayer.render(this.viewport); } catch (ex) { if (ex instanceof AbortException) { - return; + return []; } console.error("#renderTextLayer:", ex); error = ex; @@ -479,6 +480,7 @@ class PDFPageView { this.#dispatchLayerRendered("textlayerrendered", error); this.#renderStructTreeLayer(); + return textContent; } /** @@ -1098,7 +1100,8 @@ class PDFPageView { if (this.annotationLayer) { await textLayerP; if (this.#enableAutolinking) { - this.#linkAnnotations = await Autolinker.processLinks(this); + const textContent = await textLayerP; + this.#linkAnnotations = Autolinker.processLinks(this, textContent); } await this.#renderAnnotationLayer(); } diff --git a/web/text_layer_builder.js b/web/text_layer_builder.js index f642382b606431..89adcd967c9841 100644 --- a/web/text_layer_builder.js +++ b/web/text_layer_builder.js @@ -72,6 +72,7 @@ class TextLayerBuilder { * Renders the text layer. * @param {PageViewport} viewport * @param {Object} [textContentParams] + * @returns {Array} */ async render(viewport, textContentParams = null) { if (this.#renderingDone && this.#textLayer) { @@ -80,7 +81,7 @@ class TextLayerBuilder { onBefore: this.hide.bind(this), }); this.show(); - return; + return []; } this.cancel(); @@ -112,6 +113,7 @@ class TextLayerBuilder { this.#onAppend?.(this.div); this.highlighter?.enable(); this.accessibilityManager?.enable(); + return textContentItemsStr; } hide() {