From 70e287343022205eb6bc61be42f9e866f7b3ac1b Mon Sep 17 00:00:00 2001 From: Ujjwal Sharma Date: Mon, 10 Feb 2025 16:29:43 +0100 Subject: [PATCH 1/2] Fix autolinking errors due to regex and email validation Fix some edge cases in the autolinking logic with the regex as well as validating email domains and add unit tests for them. Fixes: https://github.com/mozilla/pdf.js/issues/19462 --- test/unit/autolinker_spec.js | 4 ++++ web/autolinker.js | 32 +++++++++++++++++++------------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/test/unit/autolinker_spec.js b/test/unit/autolinker_spec.js index 23a30446c..2d2e4d673 100644 --- a/test/unit/autolinker_spec.js +++ b/test/unit/autolinker_spec.js @@ -87,6 +87,9 @@ describe("autolinker", function () { "CAP.cap@Gmail.Com", // Keep the original case. "mailto:CAP.cap@Gmail.Com", ], + ["partl@mail.boku.ac.at", "mailto:partl@mail.boku.ac.at"], + ["Irene.Hyna@bmwf.ac.at", "mailto:Irene.Hyna@bmwf.ac.at"], + ["", "mailto:hi@foo.bar.baz"], ]); }); @@ -140,6 +143,7 @@ describe("autolinker", function () { "http//[00:00:00:00:00:00", // Invalid IPv6 address. "http//[]", // Empty IPv6 address. "abc.example.com", // URL without scheme. + "JD?M$0QP)lKn06l1apKDC@\\qJ4B!!(5m+j.7F790m", // Not a valid email. ].join("\n") ); expect(matches.length).toEqual(0); diff --git a/web/autolinker.js b/web/autolinker.js index 098acb273..d3ee14c28 100644 --- a/web/autolinker.js +++ b/web/autolinker.js @@ -96,31 +96,37 @@ class Autolinker { static #regex; static findLinks(text) { - // Regex can be tested and verified at https://regex101.com/r/zgDwPE/1. + // Regex can be tested and verified at https://regex101.com/r/rXoLiT/2. this.#regex ??= - /\b(?:https?:\/\/|mailto:|www\.)(?:[[\S--\[]--\p{P}]|\/|[\p{P}--\[]+[[\S--\[]--\p{P}])+|\b[[\S--@]--\{]+@[\S--.]+\.[[\S--\[]--\p{P}]{2,}/gmv; + /\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv; const [normalizedText, diffs] = normalize(text); const matches = normalizedText.matchAll(this.#regex); const links = []; for (const match of matches) { - const raw = - match[0].startsWith("www.") || - match[0].startsWith("mailto:") || - match[0].startsWith("http://") || - match[0].startsWith("https://") - ? match[0] - : `mailto:${match[0]}`; - const url = createValidAbsoluteUrl(raw, null, { + const [url, emailDomain] = match; + let raw; + if ( + url.startsWith("www.") || + url.startsWith("http://") || + url.startsWith("https://") + ) { + raw = url; + } else if (URL.canParse(`http://${emailDomain}`)) { + raw = url.startsWith("mailto:") ? url : `mailto:${url}`; + } else { + continue; + } + const absoluteURL = createValidAbsoluteUrl(raw, null, { addDefaultProtocol: true, }); - if (url) { + if (absoluteURL) { const [index, length] = getOriginalIndex( diffs, match.index, - match[0].length + url.length ); - links.push({ url: url.href, index, length }); + links.push({ url: absoluteURL.href, index, length }); } } return links; From 38ab358fb1162ad1f2fc0921a0b218ae481bf371 Mon Sep 17 00:00:00 2001 From: Ujjwal Sharma Date: Wed, 12 Feb 2025 01:30:38 +0100 Subject: [PATCH 2/2] Fix autolinking error due to redundant annotations on zooming Fix an issue where redundant links were being added to the annotation layer on zooming on the page with the links. --- test/integration/autolinker_spec.mjs | 59 +++++++++++++++++++++++++++- web/annotation_layer_builder.js | 5 ++- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/test/integration/autolinker_spec.mjs b/test/integration/autolinker_spec.mjs index 4492fa556..5a8321924 100644 --- a/test/integration/autolinker_spec.mjs +++ b/test/integration/autolinker_spec.mjs @@ -13,7 +13,15 @@ * limitations under the License. */ -import { closePages, loadAndWait } from "./test_utils.mjs"; +import { closePages, createPromise, loadAndWait } from "./test_utils.mjs"; + +function waitForLinkAnnotations(page) { + return createPromise(page, resolve => { + window.PDFViewerApplication.eventBus.on("linkannotationsadded", resolve, { + once: true, + }); + }); +} describe("autolinker", function () { describe("bug1019475_2.pdf", function () { @@ -38,6 +46,7 @@ describe("autolinker", function () { it("must appropriately add link annotations when relevant", async () => { await Promise.all( pages.map(async ([browserName, page]) => { + await waitForLinkAnnotations(page); const url = await page.$$eval( ".annotationLayer > .linkAnnotation > a", annotations => annotations.map(a => a.href) @@ -73,6 +82,7 @@ describe("autolinker", function () { it("must not add links when unnecessary", async () => { await Promise.all( pages.map(async ([browserName, page]) => { + await waitForLinkAnnotations(page); const linkIds = await page.$$eval( ".annotationLayer > .linkAnnotation > a", annotations => @@ -106,6 +116,7 @@ describe("autolinker", function () { it("must not add links that overlap even if the URLs are different", async () => { await Promise.all( pages.map(async ([browserName, page]) => { + await waitForLinkAnnotations(page); const linkIds = await page.$$eval( ".annotationLayer > .linkAnnotation > a", annotations => @@ -121,4 +132,50 @@ describe("autolinker", function () { ); }); }); + + describe("PR 19470", function () { + let pages; + + beforeAll(async () => { + pages = await loadAndWait( + "bug1019475_2.pdf", + ".annotationLayer", + null, + null, + { + enableAutoLinking: true, + } + ); + }); + + afterAll(async () => { + await closePages(pages); + }); + + it("must not repeatedly add link annotations redundantly", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + await waitForLinkAnnotations(page); + let url = await page.$$eval( + ".annotationLayer > .linkAnnotation > a", + annotations => annotations.map(a => a.href) + ); + expect(url.length).withContext(`In ${browserName}`).toEqual(1); + + await page.evaluate(() => + window.PDFViewerApplication.pdfViewer.updateScale({ + drawingDelay: -1, + scaleFactor: 2, + }) + ); + await waitForLinkAnnotations(page); + url = await page.$$eval( + ".annotationLayer > .linkAnnotation > a", + annotations => annotations.map(a => a.href) + ); + expect(url.length).withContext(`In ${browserName}`).toEqual(1); + }) + ); + }); + }); }); diff --git a/web/annotation_layer_builder.js b/web/annotation_layer_builder.js index 24c4668c2..ba6ad0f9b 100644 --- a/web/annotation_layer_builder.js +++ b/web/annotation_layer_builder.js @@ -77,6 +77,8 @@ class AnnotationLayerBuilder { #eventAbortController = null; + #linksInjected = false; + /** * @param {AnnotationLayerBuilderOptions} options */ @@ -235,9 +237,10 @@ class AnnotationLayerBuilder { "`render` method must be called before `injectLinkAnnotations`." ); } - if (this._cancelled) { + if (this._cancelled || this.#linksInjected) { return; } + this.#linksInjected = true; const newLinks = this.#annotations.length ? this.#checkInferredLinks(inferredLinks)