1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

Enable automatic URL linking

Automatically detect links in the text content of a file and automatically
generate link annotations at the appropriate locations to achieve
automatic link detection and hyperlinking.
This commit is contained in:
Ujjwal Sharma 2024-11-12 11:26:32 +05:30
parent 90a5c37cb0
commit 61ba1ea48c
20 changed files with 656 additions and 14 deletions

View file

@ -27,7 +27,12 @@
// eslint-disable-next-line max-len
/** @typedef {import("../src/display/editor/tools.js").AnnotationEditorUIManager} AnnotationEditorUIManager */
import { AnnotationLayer } from "pdfjs-lib";
import {
AnnotationLayer,
AnnotationType,
setLayerDimensions,
Util,
} from "pdfjs-lib";
import { PresentationModeState } from "./ui_utils.js";
/**
@ -56,7 +61,18 @@ import { PresentationModeState } from "./ui_utils.js";
* @property {StructTreeLayerBuilder} [structTreeLayer]
*/
/**
* @typedef {Object} InjectLinkAnnotationsOptions
* @property {Array<Object>} inferredLinks
* @property {PageViewport} viewport
* @property {StructTreeLayerBuilder} [structTreeLayer]
*/
class AnnotationLayerBuilder {
#annotations = null;
#externalHide = false;
#onAppend = null;
#eventAbortController = null;
@ -133,19 +149,13 @@ class AnnotationLayerBuilder {
this.#onAppend?.(div);
if (annotations.length === 0) {
this.hide();
this.#annotations = annotations;
this.hide(/* internal = */ true);
return;
}
this.annotationLayer = new AnnotationLayer({
div,
accessibilityManager: this._accessibilityManager,
annotationCanvasMap: this._annotationCanvasMap,
annotationEditorUIManager: this._annotationEditorUIManager,
page: this.pdfPage,
viewport: viewport.clone({ dontFlip: true }),
structTreeLayer,
});
this.#initAnnotationLayer(viewport, structTreeLayer);
await this.annotationLayer.render({
annotations,
@ -159,6 +169,8 @@ class AnnotationLayerBuilder {
fieldObjects,
});
this.#annotations = annotations;
// Ensure that interactive form elements in the annotationLayer are
// disabled while PresentationMode is active (see issue 12232).
if (this.linkService.isInPresentationMode) {
@ -177,6 +189,18 @@ class AnnotationLayerBuilder {
}
}
#initAnnotationLayer(viewport, structTreeLayer) {
this.annotationLayer = new AnnotationLayer({
div: this.div,
accessibilityManager: this._accessibilityManager,
annotationCanvasMap: this._annotationCanvasMap,
annotationEditorUIManager: this._annotationEditorUIManager,
page: this.pdfPage,
viewport: viewport.clone({ dontFlip: true }),
structTreeLayer,
});
}
cancel() {
this._cancelled = true;
@ -184,7 +208,8 @@ class AnnotationLayerBuilder {
this.#eventAbortController = null;
}
hide() {
hide(internal = false) {
this.#externalHide = !internal;
if (!this.div) {
return;
}
@ -195,6 +220,45 @@ class AnnotationLayerBuilder {
return !!this.annotationLayer?.hasEditableAnnotations();
}
/**
* @param {InjectLinkAnnotationsOptions} options
* @returns {Promise<void>} A promise that is resolved when the inferred links
* are added to the annotation layer.
*/
async injectLinkAnnotations({
inferredLinks,
viewport,
structTreeLayer = null,
}) {
if (this.#annotations === null) {
throw new Error(
"`render` method must be called before `injectLinkAnnotations`."
);
}
if (this._cancelled) {
return;
}
const newLinks = this.#annotations.length
? this.#checkInferredLinks(inferredLinks)
: inferredLinks;
if (!newLinks.length) {
return;
}
if (!this.annotationLayer) {
this.#initAnnotationLayer(viewport, structTreeLayer);
setLayerDimensions(this.div, viewport);
}
await this.annotationLayer.addLinkAnnotations(newLinks, this.linkService);
// Don't show the annotation layer if it was explicitly hidden previously.
if (!this.#externalHide) {
this.div.hidden = false;
}
}
#updatePresentationModeState(state) {
if (!this.div) {
return;
@ -217,6 +281,75 @@ class AnnotationLayerBuilder {
section.inert = disableFormElements;
}
}
#checkInferredLinks(inferredLinks) {
function annotationRects(annot) {
if (!annot.quadPoints) {
return [annot.rect];
}
const rects = [];
for (let i = 2, ii = annot.quadPoints.length; i < ii; i += 8) {
const trX = annot.quadPoints[i];
const trY = annot.quadPoints[i + 1];
const blX = annot.quadPoints[i + 2];
const blY = annot.quadPoints[i + 3];
rects.push([blX, blY, trX, trY]);
}
return rects;
}
function intersectAnnotations(annot1, annot2) {
const intersections = [];
const annot1Rects = annotationRects(annot1);
const annot2Rects = annotationRects(annot2);
for (const rect1 of annot1Rects) {
for (const rect2 of annot2Rects) {
const intersection = Util.intersect(rect1, rect2);
if (intersection) {
intersections.push(intersection);
}
}
}
return intersections;
}
function areaRects(rects) {
let totalArea = 0;
for (const rect of rects) {
totalArea += Math.abs((rect[2] - rect[0]) * (rect[3] - rect[1]));
}
return totalArea;
}
return inferredLinks.filter(link => {
let linkAreaRects;
for (const annotation of this.#annotations) {
if (
annotation.annotationType !== AnnotationType.LINK ||
annotation.url !== link.url
) {
continue;
}
// TODO: Add a test case to verify that we can find the intersection
// between two annotations with quadPoints properly.
const intersections = intersectAnnotations(annotation, link);
if (intersections.length === 0) {
continue;
}
linkAreaRects ??= areaRects(annotationRects(link));
if (
areaRects(intersections) / linkAreaRects >
0.5 /* If the overlap is more than 50%. */
) {
return false;
}
}
return true;
});
}
}
export { AnnotationLayerBuilder };

View file

@ -353,6 +353,7 @@ const PDFViewerApplication = {
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("TESTING")) {
Object.assign(opts, {
enableAltText: x => x === "true",
enableAutoLinking: x => x === "true",
enableFakeMLManager: x => x === "true",
enableGuessAltText: x => x === "true",
enableUpdatedAddImage: x => x === "true",
@ -492,6 +493,7 @@ const PDFViewerApplication = {
abortSignal: this._globalAbortController.signal,
enableHWA,
supportsPinchToZoom: this.supportsPinchToZoom,
enableAutoLinking: AppOptions.get("enableAutoLinking"),
});
this.pdfViewer = pdfViewer;

View file

@ -195,6 +195,11 @@ const defaultOptions = {
value: true,
kind: OptionKind.VIEWER + OptionKind.PREFERENCE + OptionKind.EVENT_DISPATCH,
},
enableAutoLinking: {
/** @type {boolean} */
value: false,
kind: OptionKind.VIEWER + OptionKind.PREFERENCE,
},
enableGuessAltText: {
/** @type {boolean} */
value: true,

147
web/autolinker.js Normal file
View file

@ -0,0 +1,147 @@
/* Copyright 2025 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
AnnotationBorderStyleType,
AnnotationType,
createValidAbsoluteUrl,
Util,
} from "pdfjs-lib";
import { getOriginalIndex, normalize } from "./pdf_find_controller.js";
function DOMRectToPDF({ width, height, left, top }, pdfPageView) {
if (width === 0 || height === 0) {
return null;
}
const pageBox = pdfPageView.textLayer.div.getBoundingClientRect();
const bottomLeft = pdfPageView.getPagePoint(
left - pageBox.left,
top - pageBox.top
);
const topRight = pdfPageView.getPagePoint(
left - pageBox.left + width,
top - pageBox.top + height
);
return Util.normalizeRect([
bottomLeft[0],
bottomLeft[1],
topRight[0],
topRight[1],
]);
}
function calculateLinkPosition(range, pdfPageView) {
const rangeRects = range.getClientRects();
if (rangeRects.length === 1) {
return { rect: DOMRectToPDF(rangeRects[0], pdfPageView) };
}
const rect = [Infinity, Infinity, -Infinity, -Infinity];
const quadPoints = [];
let i = 0;
for (const domRect of rangeRects) {
const normalized = DOMRectToPDF(domRect, pdfPageView);
if (normalized === null) {
continue;
}
quadPoints[i] = quadPoints[i + 4] = normalized[0];
quadPoints[i + 1] = quadPoints[i + 3] = normalized[3];
quadPoints[i + 2] = quadPoints[i + 6] = normalized[2];
quadPoints[i + 5] = quadPoints[i + 7] = normalized[1];
rect[0] = Math.min(rect[0], normalized[0]);
rect[1] = Math.min(rect[1], normalized[1]);
rect[2] = Math.max(rect[2], normalized[2]);
rect[3] = Math.max(rect[3], normalized[3]);
i += 8;
}
return { quadPoints, rect };
}
function createLinkAnnotation({ url, index, length }, pdfPageView, id) {
const highlighter = pdfPageView._textHighlighter;
const [{ begin, end }] = highlighter._convertMatches([index], [length]);
const range = new Range();
range.setStart(highlighter.textDivs[begin.divIdx].firstChild, begin.offset);
range.setEnd(highlighter.textDivs[end.divIdx].firstChild, end.offset);
return {
id: `inferred_link_${id}`,
unsafeUrl: url,
url,
annotationType: AnnotationType.LINK,
rotation: 0,
...calculateLinkPosition(range, pdfPageView),
// This is just the default for AnnotationBorderStyle.
borderStyle: {
width: 1,
rawWidth: 1,
style: AnnotationBorderStyleType.SOLID,
dashArray: [3],
horizontalCornerRadius: 0,
verticalCornerRadius: 0,
},
};
}
class Autolinker {
static #index = 0;
static #regex;
static findLinks(text) {
// Regex can be tested and verified at https://regex101.com/r/zgDwPE/1.
this.#regex ??=
/\b(?:https?:\/\/|mailto:|www\.)(?:[[\S--\[]--\p{P}]|\/|[\p{P}--\[]+[[\S--\[]--\p{P}])+|\b[[\S--@]--\{]+@[\S--.]+\.[[\S--\[]--\p{P}]{2,}/gmv;
const [normalizedText, diffs] = normalize(text);
const matches = normalizedText.matchAll(this.#regex);
const links = [];
for (const match of matches) {
const raw =
match[0].startsWith("www.") ||
match[0].startsWith("mailto:") ||
match[0].startsWith("http://") ||
match[0].startsWith("https://")
? match[0]
: `mailto:${match[0]}`;
const url = createValidAbsoluteUrl(raw, null, {
addDefaultProtocol: true,
});
if (url) {
const [index, length] = getOriginalIndex(
diffs,
match.index,
match[0].length
);
links.push({ url: url.href, index, length });
}
}
return links;
}
static processLinks(pdfPageView) {
return this.findLinks(
pdfPageView._textHighlighter.textContentItemsStr.join("\n")
).map(link => createLinkAnnotation(link, pdfPageView, this.#index++));
}
}
export { Autolinker };

View file

@ -1185,4 +1185,4 @@ class PDFFindController {
}
}
export { FindState, PDFFindController };
export { FindState, getOriginalIndex, normalize, PDFFindController };

View file

@ -43,6 +43,7 @@ import {
import { AnnotationEditorLayerBuilder } from "./annotation_editor_layer_builder.js";
import { AnnotationLayerBuilder } from "./annotation_layer_builder.js";
import { AppOptions } from "./app_options.js";
import { Autolinker } from "./autolinker.js";
import { DrawLayerBuilder } from "./draw_layer_builder.js";
import { GenericL10n } from "web-null_l10n";
import { SimpleLinkService } from "./pdf_link_service.js";
@ -84,6 +85,8 @@ import { XfaLayerBuilder } from "./xfa_layer_builder.js";
* the necessary layer-properties.
* @property {boolean} [enableHWA] - Enables hardware acceleration for
* rendering. The default value is `false`.
* @property {boolean} [enableAutoLinking] - Enable creation of hyperlinks from
* text that look like URLs. The default value is `false`.
*/
const DEFAULT_LAYER_PROPERTIES =
@ -120,6 +123,8 @@ class PDFPageView {
#enableHWA = false;
#enableAutoLinking = false;
#hasRestrictedScaling = false;
#isEditing = false;
@ -177,6 +182,7 @@ class PDFPageView {
options.maxCanvasPixels ?? AppOptions.get("maxCanvasPixels");
this.pageColors = options.pageColors || null;
this.#enableHWA = options.enableHWA || false;
this.#enableAutoLinking = options.enableAutoLinking || false;
this.eventBus = options.eventBus;
this.renderingQueue = options.renderingQueue;
@ -1100,10 +1106,19 @@ class PDFPageView {
viewport.rawDims
);
this.#renderTextLayer();
const textLayerPromise = this.#renderTextLayer();
if (this.annotationLayer) {
await this.#renderAnnotationLayer();
if (this.#enableAutoLinking) {
await textLayerPromise;
this.annotationLayer.injectLinkAnnotations({
inferredLinks: Autolinker.processLinks(this),
viewport: this.viewport,
structTreeLayer: this.structTreeLayer,
});
}
}
const { annotationEditorUIManager } = this.#layerProperties;

View file

@ -128,6 +128,8 @@ function isValidAnnotationEditorMode(mode) {
* rendering. The default value is `false`.
* @property {boolean} [supportsPinchToZoom] - Enable zooming on pinch gesture.
* The default value is `true`.
* @property {boolean} [enableAutoLinking] - Enable creation of hyperlinks from
* text that look like URLs. The default value is `false`.
*/
class PDFPageViewBuffer {
@ -228,6 +230,8 @@ class PDFViewer {
#enableNewAltTextWhenAddingImage = false;
#enableAutoLinking = false;
#eventAbortController = null;
#mlManager = null;
@ -321,6 +325,7 @@ class PDFViewer {
this.#mlManager = options.mlManager || null;
this.#enableHWA = options.enableHWA || false;
this.#supportsPinchToZoom = options.supportsPinchToZoom !== false;
this.#enableAutoLinking = options.enableAutoLinking || false;
this.defaultRenderingQueue = !options.renderingQueue;
if (
@ -990,6 +995,7 @@ class PDFViewer {
l10n: this.l10n,
layerProperties: this._layerProperties,
enableHWA: this.#enableHWA,
enableAutoLinking: this.#enableAutoLinking,
});
this._pages.push(pageView);
}

View file

@ -15,12 +15,14 @@
const {
AbortException,
AnnotationBorderStyleType,
AnnotationEditorLayer,
AnnotationEditorParamsType,
AnnotationEditorType,
AnnotationEditorUIManager,
AnnotationLayer,
AnnotationMode,
AnnotationType,
build,
ColorPicker,
createValidAbsoluteUrl,
@ -63,12 +65,14 @@ const {
export {
AbortException,
AnnotationBorderStyleType,
AnnotationEditorLayer,
AnnotationEditorParamsType,
AnnotationEditorType,
AnnotationEditorUIManager,
AnnotationLayer,
AnnotationMode,
AnnotationType,
build,
ColorPicker,
createValidAbsoluteUrl,