1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Correctly validate URLs in XFA documents (bug 1731240)

With this patch we'll ensure that only valid absolute URLs can be used in XFA documents, similar to the existing validation done for "regular" PDF documents.
Furthermore, we'll also attempt to add a default protocol (i.e. `http`) to URLs beginning with "www." in XFA documents as well; this on its own is enough to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1731240
This commit is contained in:
Jonas Jenwald 2021-09-20 14:36:43 +02:00
parent 580bfad628
commit 81a1c1cef7
4 changed files with 106 additions and 21 deletions

View file

@ -13,6 +13,13 @@
* limitations under the License.
*/
import {
addDefaultProtocolToUrl,
collectActions,
MissingDataException,
toRomanNumerals,
tryConvertUrlEncoding,
} from "./core_utils.js";
import {
clearPrimitiveCaches,
Dict,
@ -24,11 +31,6 @@ import {
RefSet,
RefSetCache,
} from "./primitives.js";
import {
collectActions,
MissingDataException,
toRomanNumerals,
} from "./core_utils.js";
import {
createPromiseCapability,
createValidAbsoluteUrl,
@ -1283,21 +1285,6 @@ class Catalog {
* @param {ParseDestDictionaryParameters} params
*/
static parseDestDictionary(params) {
// Lets URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
return url.startsWith("www.") ? `http://${url}` : url;
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see Bugzilla 1122280.
function tryConvertUrlEncoding(url) {
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}
const destDict = params.destDict;
if (!isDict(destDict)) {
warn("parseDestDictionary: `destDict` must be a dictionary.");

View file

@ -18,6 +18,7 @@ import {
BaseException,
objectSize,
stringToPDFString,
stringToUTF8String,
warn,
} from "../shared/util.js";
import { Dict, isName, isRef, isStream, RefSet } from "./primitives.js";
@ -451,7 +452,23 @@ function validateCSSFont(cssFontInfo) {
return true;
}
// Let URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
return url.startsWith("www.") ? `http://${url}` : url;
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see Bugzilla 1122280.
function tryConvertUrlEncoding(url) {
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}
export {
addDefaultProtocolToUrl,
collectActions,
encodeToXmlString,
escapePDFName,
@ -467,6 +484,7 @@ export {
readUint16,
readUint32,
toRomanNumerals,
tryConvertUrlEncoding,
validateCSSFont,
XRefEntryException,
XRefParseException,

View file

@ -29,8 +29,13 @@ import {
XmlObject,
} from "./xfa_object.js";
import { $buildXFAObject, NamespaceIds } from "./namespaces.js";
import {
addDefaultProtocolToUrl,
tryConvertUrlEncoding,
} from "../core_utils.js";
import { fixTextIndent, measureToString, setFontFamily } from "./html_utils.js";
import { getMeasurement, HTMLResult, stripQuotes } from "./utils.js";
import { createValidAbsoluteUrl } from "../../shared/util.js";
const XHTML_NS_ID = NamespaceIds.xhtml.id;
@ -321,7 +326,16 @@ class XhtmlObject extends XmlObject {
class A extends XhtmlObject {
constructor(attributes) {
super(attributes, "a");
this.href = attributes.href || "";
let href = "";
if (typeof attributes.href === "string") {
let url = addDefaultProtocolToUrl(attributes.href);
url = tryConvertUrlEncoding(url);
const absoluteUrl = createValidAbsoluteUrl(url);
if (absoluteUrl) {
href = absoluteUrl.href;
}
}
this.href = href;
}
}