2024-11-12 11:26:32 +05:30
|
|
|
/* Copyright 2025 Mozilla Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2025-02-10 10:03:28 +01:00
|
|
|
import { AnnotationType, createValidAbsoluteUrl, Util } from "pdfjs-lib";
|
2024-11-12 11:26:32 +05:30
|
|
|
import { getOriginalIndex, normalize } from "./pdf_find_controller.js";
|
|
|
|
|
|
|
|
function DOMRectToPDF({ width, height, left, top }, pdfPageView) {
|
|
|
|
if (width === 0 || height === 0) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
const pageBox = pdfPageView.textLayer.div.getBoundingClientRect();
|
|
|
|
const bottomLeft = pdfPageView.getPagePoint(
|
|
|
|
left - pageBox.left,
|
|
|
|
top - pageBox.top
|
|
|
|
);
|
|
|
|
const topRight = pdfPageView.getPagePoint(
|
|
|
|
left - pageBox.left + width,
|
|
|
|
top - pageBox.top + height
|
|
|
|
);
|
|
|
|
|
|
|
|
return Util.normalizeRect([
|
|
|
|
bottomLeft[0],
|
|
|
|
bottomLeft[1],
|
|
|
|
topRight[0],
|
|
|
|
topRight[1],
|
|
|
|
]);
|
|
|
|
}
|
|
|
|
|
|
|
|
function calculateLinkPosition(range, pdfPageView) {
|
|
|
|
const rangeRects = range.getClientRects();
|
|
|
|
if (rangeRects.length === 1) {
|
|
|
|
return { rect: DOMRectToPDF(rangeRects[0], pdfPageView) };
|
|
|
|
}
|
|
|
|
|
|
|
|
const rect = [Infinity, Infinity, -Infinity, -Infinity];
|
|
|
|
const quadPoints = [];
|
|
|
|
let i = 0;
|
|
|
|
for (const domRect of rangeRects) {
|
|
|
|
const normalized = DOMRectToPDF(domRect, pdfPageView);
|
|
|
|
if (normalized === null) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
quadPoints[i] = quadPoints[i + 4] = normalized[0];
|
|
|
|
quadPoints[i + 1] = quadPoints[i + 3] = normalized[3];
|
|
|
|
quadPoints[i + 2] = quadPoints[i + 6] = normalized[2];
|
|
|
|
quadPoints[i + 5] = quadPoints[i + 7] = normalized[1];
|
|
|
|
|
2025-03-23 12:09:51 +01:00
|
|
|
Util.rectBoundingBox(...normalized, rect);
|
2024-11-12 11:26:32 +05:30
|
|
|
i += 8;
|
|
|
|
}
|
|
|
|
return { quadPoints, rect };
|
|
|
|
}
|
|
|
|
|
2025-02-28 14:20:31 +01:00
|
|
|
/**
|
|
|
|
* Given a DOM node `container` and an index into its text contents `offset`,
|
|
|
|
* returns a pair consisting of text node that the `offset` actually points
|
|
|
|
* to, together with the offset relative to that text node.
|
|
|
|
* When the offset points at the boundary between two node, the result will
|
|
|
|
* point to the first text node in depth-first traversal order.
|
|
|
|
*
|
|
|
|
* For example, given this DOM:
|
|
|
|
* <p>abc<span>def</span>ghi</p>
|
|
|
|
*
|
|
|
|
* textPosition(p, 0) -> [#text "abc", 0] (before `a`)
|
|
|
|
* textPosition(p, 2) -> [#text "abc", 2] (between `b` and `c`)
|
|
|
|
* textPosition(p, 3) -> [#text "abc", 3] (after `c`)
|
|
|
|
* textPosition(p, 5) -> [#text "def", 2] (between `e` and `f`)
|
|
|
|
* textPosition(p, 6) -> [#text "def", 3] (after `f`)
|
|
|
|
*/
|
|
|
|
function textPosition(container, offset) {
|
|
|
|
let currentContainer = container;
|
|
|
|
do {
|
|
|
|
if (currentContainer.nodeType === Node.TEXT_NODE) {
|
|
|
|
const currentLength = currentContainer.textContent.length;
|
|
|
|
if (offset <= currentLength) {
|
|
|
|
return [currentContainer, offset];
|
|
|
|
}
|
|
|
|
offset -= currentLength;
|
|
|
|
} else if (currentContainer.firstChild) {
|
|
|
|
currentContainer = currentContainer.firstChild;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (!currentContainer.nextSibling && currentContainer !== container) {
|
|
|
|
currentContainer = currentContainer.parentNode;
|
|
|
|
}
|
|
|
|
if (currentContainer !== container) {
|
|
|
|
currentContainer = currentContainer.nextSibling;
|
|
|
|
}
|
|
|
|
} while (currentContainer !== container);
|
|
|
|
throw new Error("Offset is bigger than container's contents length.");
|
|
|
|
}
|
|
|
|
|
2024-11-12 11:26:32 +05:30
|
|
|
function createLinkAnnotation({ url, index, length }, pdfPageView, id) {
|
|
|
|
const highlighter = pdfPageView._textHighlighter;
|
|
|
|
const [{ begin, end }] = highlighter._convertMatches([index], [length]);
|
|
|
|
|
|
|
|
const range = new Range();
|
2025-02-28 14:20:31 +01:00
|
|
|
range.setStart(
|
|
|
|
...textPosition(highlighter.textDivs[begin.divIdx], begin.offset)
|
|
|
|
);
|
|
|
|
range.setEnd(...textPosition(highlighter.textDivs[end.divIdx], end.offset));
|
2024-11-12 11:26:32 +05:30
|
|
|
|
|
|
|
return {
|
|
|
|
id: `inferred_link_${id}`,
|
|
|
|
unsafeUrl: url,
|
|
|
|
url,
|
|
|
|
annotationType: AnnotationType.LINK,
|
|
|
|
rotation: 0,
|
|
|
|
...calculateLinkPosition(range, pdfPageView),
|
2025-02-10 10:03:28 +01:00
|
|
|
// Populated in the annotationLayer to avoid unnecessary object creation,
|
|
|
|
// since most inferred links overlap existing LinkAnnotations:
|
|
|
|
borderStyle: null,
|
2024-11-12 11:26:32 +05:30
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
class Autolinker {
|
|
|
|
static #index = 0;
|
|
|
|
|
|
|
|
static #regex;
|
|
|
|
|
|
|
|
static findLinks(text) {
|
2025-02-10 16:29:43 +01:00
|
|
|
// Regex can be tested and verified at https://regex101.com/r/rXoLiT/2.
|
2024-11-12 11:26:32 +05:30
|
|
|
this.#regex ??=
|
2025-02-10 16:29:43 +01:00
|
|
|
/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;
|
2024-11-12 11:26:32 +05:30
|
|
|
|
|
|
|
const [normalizedText, diffs] = normalize(text);
|
|
|
|
const matches = normalizedText.matchAll(this.#regex);
|
|
|
|
const links = [];
|
|
|
|
for (const match of matches) {
|
2025-02-10 16:29:43 +01:00
|
|
|
const [url, emailDomain] = match;
|
|
|
|
let raw;
|
|
|
|
if (
|
|
|
|
url.startsWith("www.") ||
|
|
|
|
url.startsWith("http://") ||
|
|
|
|
url.startsWith("https://")
|
|
|
|
) {
|
|
|
|
raw = url;
|
|
|
|
} else if (URL.canParse(`http://${emailDomain}`)) {
|
|
|
|
raw = url.startsWith("mailto:") ? url : `mailto:${url}`;
|
|
|
|
} else {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
const absoluteURL = createValidAbsoluteUrl(raw, null, {
|
2024-11-12 11:26:32 +05:30
|
|
|
addDefaultProtocol: true,
|
|
|
|
});
|
2025-02-10 16:29:43 +01:00
|
|
|
if (absoluteURL) {
|
2024-11-12 11:26:32 +05:30
|
|
|
const [index, length] = getOriginalIndex(
|
|
|
|
diffs,
|
|
|
|
match.index,
|
2025-02-10 16:29:43 +01:00
|
|
|
url.length
|
2024-11-12 11:26:32 +05:30
|
|
|
);
|
2025-02-10 16:29:43 +01:00
|
|
|
links.push({ url: absoluteURL.href, index, length });
|
2024-11-12 11:26:32 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
return links;
|
|
|
|
}
|
|
|
|
|
|
|
|
static processLinks(pdfPageView) {
|
|
|
|
return this.findLinks(
|
|
|
|
pdfPageView._textHighlighter.textContentItemsStr.join("\n")
|
|
|
|
).map(link => createLinkAnnotation(link, pdfPageView, this.#index++));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export { Autolinker };
|