1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Merge pull request #19470 from ryzokuken/fix-autolinking-edge-cases

Fix autolinking errors
This commit is contained in:
Jonas Jenwald 2025-02-19 14:43:14 +01:00 committed by GitHub
commit d10cea5dd0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 85 additions and 15 deletions

View file

@ -13,7 +13,15 @@
* limitations under the License.
*/
import { closePages, loadAndWait } from "./test_utils.mjs";
import { closePages, createPromise, loadAndWait } from "./test_utils.mjs";
function waitForLinkAnnotations(page) {
return createPromise(page, resolve => {
window.PDFViewerApplication.eventBus.on("linkannotationsadded", resolve, {
once: true,
});
});
}
describe("autolinker", function () {
describe("bug1019475_2.pdf", function () {
@ -38,6 +46,7 @@ describe("autolinker", function () {
it("must appropriately add link annotations when relevant", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
await waitForLinkAnnotations(page);
const url = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations => annotations.map(a => a.href)
@ -73,6 +82,7 @@ describe("autolinker", function () {
it("must not add links when unnecessary", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
await waitForLinkAnnotations(page);
const linkIds = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations =>
@ -106,6 +116,7 @@ describe("autolinker", function () {
it("must not add links that overlap even if the URLs are different", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
await waitForLinkAnnotations(page);
const linkIds = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations =>
@ -121,4 +132,50 @@ describe("autolinker", function () {
);
});
});
describe("PR 19470", function () {
let pages;
beforeAll(async () => {
pages = await loadAndWait(
"bug1019475_2.pdf",
".annotationLayer",
null,
null,
{
enableAutoLinking: true,
}
);
});
afterAll(async () => {
await closePages(pages);
});
it("must not repeatedly add link annotations redundantly", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
await waitForLinkAnnotations(page);
let url = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations => annotations.map(a => a.href)
);
expect(url.length).withContext(`In ${browserName}`).toEqual(1);
await page.evaluate(() =>
window.PDFViewerApplication.pdfViewer.updateScale({
drawingDelay: -1,
scaleFactor: 2,
})
);
await waitForLinkAnnotations(page);
url = await page.$$eval(
".annotationLayer > .linkAnnotation > a",
annotations => annotations.map(a => a.href)
);
expect(url.length).withContext(`In ${browserName}`).toEqual(1);
})
);
});
});
});

View file

@ -87,6 +87,9 @@ describe("autolinker", function () {
"CAP.cap@Gmail.Com", // Keep the original case.
"mailto:CAP.cap@Gmail.Com",
],
["partl@mail.boku.ac.at", "mailto:partl@mail.boku.ac.at"],
["Irene.Hyna@bmwf.ac.at", "mailto:Irene.Hyna@bmwf.ac.at"],
["<hi@foo.bar.baz>", "mailto:hi@foo.bar.baz"],
]);
});
@ -140,6 +143,7 @@ describe("autolinker", function () {
"http//[00:00:00:00:00:00", // Invalid IPv6 address.
"http//[]", // Empty IPv6 address.
"abc.example.com", // URL without scheme.
"JD?M$0QP)lKn06l1apKDC@\\qJ4B!!(5m+j.7F790m", // Not a valid email.
].join("\n")
);
expect(matches.length).toEqual(0);

View file

@ -77,6 +77,8 @@ class AnnotationLayerBuilder {
#eventAbortController = null;
#linksInjected = false;
/**
* @param {AnnotationLayerBuilderOptions} options
*/
@ -235,9 +237,10 @@ class AnnotationLayerBuilder {
"`render` method must be called before `injectLinkAnnotations`."
);
}
if (this._cancelled) {
if (this._cancelled || this.#linksInjected) {
return;
}
this.#linksInjected = true;
const newLinks = this.#annotations.length
? this.#checkInferredLinks(inferredLinks)

View file

@ -96,31 +96,37 @@ class Autolinker {
static #regex;
static findLinks(text) {
// Regex can be tested and verified at https://regex101.com/r/zgDwPE/1.
// Regex can be tested and verified at https://regex101.com/r/rXoLiT/2.
this.#regex ??=
/\b(?:https?:\/\/|mailto:|www\.)(?:[[\S--\[]--\p{P}]|\/|[\p{P}--\[]+[[\S--\[]--\p{P}])+|\b[[\S--@]--\{]+@[\S--.]+\.[[\S--\[]--\p{P}]{2,}/gmv;
/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;
const [normalizedText, diffs] = normalize(text);
const matches = normalizedText.matchAll(this.#regex);
const links = [];
for (const match of matches) {
const raw =
match[0].startsWith("www.") ||
match[0].startsWith("mailto:") ||
match[0].startsWith("http://") ||
match[0].startsWith("https://")
? match[0]
: `mailto:${match[0]}`;
const url = createValidAbsoluteUrl(raw, null, {
const [url, emailDomain] = match;
let raw;
if (
url.startsWith("www.") ||
url.startsWith("http://") ||
url.startsWith("https://")
) {
raw = url;
} else if (URL.canParse(`http://${emailDomain}`)) {
raw = url.startsWith("mailto:") ? url : `mailto:${url}`;
} else {
continue;
}
const absoluteURL = createValidAbsoluteUrl(raw, null, {
addDefaultProtocol: true,
});
if (url) {
if (absoluteURL) {
const [index, length] = getOriginalIndex(
diffs,
match.index,
match[0].length
url.length
);
links.push({ url: url.href, index, length });
links.push({ url: absoluteURL.href, index, length });
}
}
return links;