mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 22:58:07 +02:00
Fix autolinking errors due to regex and email validation
Fix some edge cases in the autolinking logic with the regex as well as validating email domains and add unit tests for them. Fixes: https://github.com/mozilla/pdf.js/issues/19462
This commit is contained in:
parent
a857ca3261
commit
70e2873430
2 changed files with 23 additions and 13 deletions
|
@ -96,31 +96,37 @@ class Autolinker {
|
|||
static #regex;
|
||||
|
||||
static findLinks(text) {
|
||||
// Regex can be tested and verified at https://regex101.com/r/zgDwPE/1.
|
||||
// Regex can be tested and verified at https://regex101.com/r/rXoLiT/2.
|
||||
this.#regex ??=
|
||||
/\b(?:https?:\/\/|mailto:|www\.)(?:[[\S--\[]--\p{P}]|\/|[\p{P}--\[]+[[\S--\[]--\p{P}])+|\b[[\S--@]--\{]+@[\S--.]+\.[[\S--\[]--\p{P}]{2,}/gmv;
|
||||
/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;
|
||||
|
||||
const [normalizedText, diffs] = normalize(text);
|
||||
const matches = normalizedText.matchAll(this.#regex);
|
||||
const links = [];
|
||||
for (const match of matches) {
|
||||
const raw =
|
||||
match[0].startsWith("www.") ||
|
||||
match[0].startsWith("mailto:") ||
|
||||
match[0].startsWith("http://") ||
|
||||
match[0].startsWith("https://")
|
||||
? match[0]
|
||||
: `mailto:${match[0]}`;
|
||||
const url = createValidAbsoluteUrl(raw, null, {
|
||||
const [url, emailDomain] = match;
|
||||
let raw;
|
||||
if (
|
||||
url.startsWith("www.") ||
|
||||
url.startsWith("http://") ||
|
||||
url.startsWith("https://")
|
||||
) {
|
||||
raw = url;
|
||||
} else if (URL.canParse(`http://${emailDomain}`)) {
|
||||
raw = url.startsWith("mailto:") ? url : `mailto:${url}`;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
const absoluteURL = createValidAbsoluteUrl(raw, null, {
|
||||
addDefaultProtocol: true,
|
||||
});
|
||||
if (url) {
|
||||
if (absoluteURL) {
|
||||
const [index, length] = getOriginalIndex(
|
||||
diffs,
|
||||
match.index,
|
||||
match[0].length
|
||||
url.length
|
||||
);
|
||||
links.push({ url: url.href, index, length });
|
||||
links.push({ url: absoluteURL.href, index, length });
|
||||
}
|
||||
}
|
||||
return links;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue