Skip to content

Commit

Permalink
move linking logic to 'static' class
Browse files Browse the repository at this point in the history
  • Loading branch information
ryzokuken committed Dec 10, 2024
1 parent d164f05 commit 8ee17f7
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 91 deletions.
96 changes: 96 additions & 0 deletions web/autolinker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { createValidAbsoluteUrl, Util } from "../src/shared/util.js";
import {
getOriginalIndex,
normalizedTextContent,
} from "./pdf_find_controller.js";

class Autolinker {
static #urlRegex =
/\b(?:https?:\/\/|mailto:|www.)(?:[[\S--\[]--\p{P}]|\/|[\p{P}--\[]+[[\S--\[]--\p{P}])+/gmv;

static #addLinkAnnotations(url, index, length, pdfPageView) {
// TODO refactor out the logic for a single match from this function
const convertedMatch = pdfPageView._textHighlighter._convertMatches(
[index],
[length]
)[0];

const range = new Range();
range.setStart(
pdfPageView._textHighlighter.textDivs[convertedMatch.begin.divIdx]
.firstChild,
convertedMatch.begin.offset
);
range.setEnd(
pdfPageView._textHighlighter.textDivs[convertedMatch.end.divIdx]
.firstChild,
convertedMatch.end.offset
);

const pageBox = pdfPageView.textLayer.div.getBoundingClientRect();
const linkAnnotations = [];
for (const linkBox of range.getClientRects()) {
if (linkBox.width === 0 || linkBox.height === 0) {
continue;
}

const bottomLeft = pdfPageView.getPagePoint(
linkBox.left - pageBox.left,
linkBox.top - pageBox.top
);
const topRight = pdfPageView.getPagePoint(
linkBox.left - pageBox.left + linkBox.width,
linkBox.top - pageBox.top + linkBox.height
);

const rect = Util.normalizeRect([
bottomLeft[0],
bottomLeft[1],
topRight[0],
topRight[1],
]);

linkAnnotations.push({
unsafeUrl: url,
url,
rect,
annotationType: 2,
rotation: 0,
// This is just the default for AnnotationBorderStyle. At some point we
// should switch to something better like `new LinkAnnotation` here.
borderStyle: {
width: 1,
rawWidth: 1,
style: 1, // SOLID
dashArray: [3],
horizontalCornerRadius: 0,
verticalCornerRadius: 0,
},
});
}
return linkAnnotations;
}

static processLinks(pdfPageView) {
return pdfPageView.pdfPage.getTextContent().then(content => {
const [text, diffs] = normalizedTextContent(content);
const matches = text.matchAll(Autolinker.#urlRegex);
return Array.from(matches, match => {
const url = createValidAbsoluteUrl(match[0]);
if (url) {
const [index, length] = getOriginalIndex(
diffs,
match.index,
match[0].length
);
return this.#addLinkAnnotations(url.href, index, length, pdfPageView);
}
return url;
})
.filter(annotation => annotation !== null)
.flat();
});
}
}

export { Autolinker };
93 changes: 2 additions & 91 deletions web/pdf_page_view.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,11 @@
import {
AbortException,
AnnotationMode,
createValidAbsoluteUrl,
OutputScale,
PixelsPerInch,
RenderingCancelledException,
setLayerDimensions,
shadow,
Util,
} from "pdfjs-lib";
import {
approximateFraction,
Expand All @@ -42,13 +40,10 @@ import {
RenderingStates,
TextLayerMode,
} from "./ui_utils.js";
import {
getOriginalIndex,
normalizedTextContent,
} from "./pdf_find_controller.js";
import { AnnotationEditorLayerBuilder } from "./annotation_editor_layer_builder.js";
import { AnnotationLayerBuilder } from "./annotation_layer_builder.js";
import { AppOptions } from "./app_options.js";
import { Autolinker } from "./autolinker.js";
import { DrawLayerBuilder } from "./draw_layer_builder.js";
import { GenericL10n } from "web-null_l10n";
import { SimpleLinkService } from "./pdf_link_service.js";
Expand Down Expand Up @@ -1120,7 +1115,7 @@ class PDFPageView {
if (this.annotationLayer) {
await textLayerP;
if (this.#enableAutolinking) {
await this.#processLinks();
this.#linkAnnotations = await Autolinker.processLinks(this);
}
await this.#renderAnnotationLayer();
}
Expand Down Expand Up @@ -1213,90 +1208,6 @@ class PDFPageView {
? this.canvas
: null;
}

#addLinkAnnotations(url, index, length) {
// TODO refactor out the logic for a single match from this function
const convertedMatch = this._textHighlighter._convertMatches(
[index],
[length]
)[0];

const range = new Range();
range.setStart(
this._textHighlighter.textDivs[convertedMatch.begin.divIdx].firstChild,
convertedMatch.begin.offset
);
range.setEnd(
this._textHighlighter.textDivs[convertedMatch.end.divIdx].firstChild,
convertedMatch.end.offset
);

const pageBox = this.textLayer.div.getBoundingClientRect();
const linkAnnotations = [];
for (const linkBox of range.getClientRects()) {
if (linkBox.width === 0 || linkBox.height === 0) {
continue;
}

const bottomLeft = this.getPagePoint(
linkBox.left - pageBox.left,
linkBox.top - pageBox.top
);
const topRight = this.getPagePoint(
linkBox.left - pageBox.left + linkBox.width,
linkBox.top - pageBox.top + linkBox.height
);

const rect = Util.normalizeRect([
bottomLeft[0],
bottomLeft[1],
topRight[0],
topRight[1],
]);

linkAnnotations.push({
unsafeUrl: url,
url,
rect,
annotationType: 2,
rotation: 0,
// This is just the default for AnnotationBorderStyle. At some point we
// should switch to something better like `new LinkAnnotation` here.
borderStyle: {
width: 1,
rawWidth: 1,
style: 1, // SOLID
dashArray: [3],
horizontalCornerRadius: 0,
verticalCornerRadius: 0,
},
});
}
return linkAnnotations;
}

#processLinks() {
return this.pdfPage.getTextContent().then(content => {
const [text, diffs] = normalizedTextContent(content);
const urlRegex =
/\b(?:https?:\/\/|mailto:|www.)(?:[[\S--\[]--\p{P}]|\/|[\p{P}--\[]+[[\S--\[]--\p{P}])+/gmv;
const matches = text.matchAll(urlRegex);
this.#linkAnnotations = Array.from(matches, match => {
const url = createValidAbsoluteUrl(match[0]);
if (url) {
const [index, length] = getOriginalIndex(
diffs,
match.index,
match[0].length
);
return this.#addLinkAnnotations(url.href, index, length);
}
return url;
})
.filter(annotation => annotation !== null)
.flat();
});
}
}

export { PDFPageView };

0 comments on commit 8ee17f7

Please sign in to comment.