/* Copyright 2025 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import { AnnotationType, createValidAbsoluteUrl, Util } from "pdfjs-lib"; import { getOriginalIndex, normalize } from "./pdf_find_controller.js"; function DOMRectToPDF({ width, height, left, top }, pdfPageView) { if (width === 0 || height === 0) { return null; } const pageBox = pdfPageView.textLayer.div.getBoundingClientRect(); const bottomLeft = pdfPageView.getPagePoint( left - pageBox.left, top - pageBox.top ); const topRight = pdfPageView.getPagePoint( left - pageBox.left + width, top - pageBox.top + height ); return Util.normalizeRect([ bottomLeft[0], bottomLeft[1], topRight[0], topRight[1], ]); } function calculateLinkPosition(range, pdfPageView) { const rangeRects = range.getClientRects(); if (rangeRects.length === 1) { return { rect: DOMRectToPDF(rangeRects[0], pdfPageView) }; } const rect = [Infinity, Infinity, -Infinity, -Infinity]; const quadPoints = []; let i = 0; for (const domRect of rangeRects) { const normalized = DOMRectToPDF(domRect, pdfPageView); if (normalized === null) { continue; } quadPoints[i] = quadPoints[i + 4] = normalized[0]; quadPoints[i + 1] = quadPoints[i + 3] = normalized[3]; quadPoints[i + 2] = quadPoints[i + 6] = normalized[2]; quadPoints[i + 5] = quadPoints[i + 7] = normalized[1]; Util.rectBoundingBox(...normalized, rect); i += 8; } return { quadPoints, rect }; } /** * Given a DOM node `container` and an index into its text contents `offset`, * returns a pair consisting of text node that the `offset` actually points * to, together with the offset relative to that text node. * When the offset points at the boundary between two node, the result will * point to the first text node in depth-first traversal order. * * For example, given this DOM: *

abcdefghi

* * textPosition(p, 0) -> [#text "abc", 0] (before `a`) * textPosition(p, 2) -> [#text "abc", 2] (between `b` and `c`) * textPosition(p, 3) -> [#text "abc", 3] (after `c`) * textPosition(p, 5) -> [#text "def", 2] (between `e` and `f`) * textPosition(p, 6) -> [#text "def", 3] (after `f`) */ function textPosition(container, offset) { let currentContainer = container; do { if (currentContainer.nodeType === Node.TEXT_NODE) { const currentLength = currentContainer.textContent.length; if (offset <= currentLength) { return [currentContainer, offset]; } offset -= currentLength; } else if (currentContainer.firstChild) { currentContainer = currentContainer.firstChild; continue; } while (!currentContainer.nextSibling && currentContainer !== container) { currentContainer = currentContainer.parentNode; } if (currentContainer !== container) { currentContainer = currentContainer.nextSibling; } } while (currentContainer !== container); throw new Error("Offset is bigger than container's contents length."); } function createLinkAnnotation({ url, index, length }, pdfPageView, id) { const highlighter = pdfPageView._textHighlighter; const [{ begin, end }] = highlighter._convertMatches([index], [length]); const range = new Range(); range.setStart( ...textPosition(highlighter.textDivs[begin.divIdx], begin.offset) ); range.setEnd(...textPosition(highlighter.textDivs[end.divIdx], end.offset)); return { id: `inferred_link_${id}`, unsafeUrl: url, url, annotationType: AnnotationType.LINK, rotation: 0, ...calculateLinkPosition(range, pdfPageView), // Populated in the annotationLayer to avoid unnecessary object creation, // since most inferred links overlap existing LinkAnnotations: borderStyle: null, }; } class Autolinker { static #index = 0; static #regex; static findLinks(text) { // Regex can be tested and verified at https://regex101.com/r/rXoLiT/2. this.#regex ??= /\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv; const [normalizedText, diffs] = normalize(text); const matches = normalizedText.matchAll(this.#regex); const links = []; for (const match of matches) { const [url, emailDomain] = match; let raw; if ( url.startsWith("www.") || url.startsWith("http://") || url.startsWith("https://") ) { raw = url; } else if (URL.canParse(`http://${emailDomain}`)) { raw = url.startsWith("mailto:") ? url : `mailto:${url}`; } else { continue; } const absoluteURL = createValidAbsoluteUrl(raw, null, { addDefaultProtocol: true, }); if (absoluteURL) { const [index, length] = getOriginalIndex( diffs, match.index, url.length ); links.push({ url: absoluteURL.href, index, length }); } } return links; } static processLinks(pdfPageView) { return this.findLinks( pdfPageView._textHighlighter.textContentItemsStr.join("\n") ).map(link => createLinkAnnotation(link, pdfPageView, this.#index++)); } } export { Autolinker };