From 3c0b8073be4880d50cca05bdd4776157083246ac Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Wed, 22 Jan 2014 12:27:44 +0100 Subject: [PATCH 1/2] Try to recover from bad URI value See https://github.com/mozilla/pdf.js/issues/4159 BAD (http://cms.di.unipi.it/files/bbec7791fac20e98127c77531e4031912392156c/testo.pdf) << /S /URI /URI /v#2findex.php#2fFile:Logo.png >> GOOD (http://www.ioi2012.org/wp-content/uploads/2011/12/practice.pdf): << /S /URI /URI (http://127.0.0.1/v/index.php/File:Logo.png >> The URL should be wrapped in parentheses, but sometimes it isn't. Consequently, the value is interpreted as a Name (because of the leading "/"), and the resulting object is `{name: "v/index.php/File:Logo.png" }`. Obviously, this is not a string, so `url.indexOf` throws an error here. --- src/shared/annotation.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/shared/annotation.js b/src/shared/annotation.js index ea504e8aa..f54732ea0 100644 --- a/src/shared/annotation.js +++ b/src/shared/annotation.js @@ -640,7 +640,13 @@ var LinkAnnotation = (function LinkAnnotationClosure() { if (action) { var linkType = action.get('S').name; if (linkType === 'URI') { - var url = addDefaultProtocolToUrl(action.get('URI')); + var url = action.get('URI'); + if (isName(url)) { + // Some bad PDFs do not put parentheses around relative URLs. + url = '/' + url.name; + } else { + url = addDefaultProtocolToUrl(url); + } // TODO: pdf spec mentions urls can be relative to a Base // entry in the dictionary. if (!isValidUrl(url, false)) { From b35ced8c9e8ca8c0004ab8187e0c1ab771b08be7 Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Wed, 22 Jan 2014 12:39:42 +0100 Subject: [PATCH 2/2] RFC 3986-compliant isValidUrl (protocol parsing) To avoid misinterpreting URLs like "/File:wikipedia.png" as a non-relative URLs. --- src/shared/util.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/shared/util.js b/src/shared/util.js index 51fd42bee..312445565 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -223,7 +223,7 @@ var UnsupportedManager = PDFJS.UnsupportedManager = function combineUrl(baseUrl, url) { if (!url) return baseUrl; - if (url.indexOf(':') >= 0) + if (/^[a-z][a-z0-9+\-.]*:/i.test(url)) return url; if (url.charAt(0) == '/') { // absolute path @@ -247,11 +247,13 @@ function isValidUrl(url, allowRelative) { if (!url) { return false; } - var colon = url.indexOf(':'); - if (colon < 0) { + // RFC 3986 (http://tools.ietf.org/html/rfc3986#section-3.1) + // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + var protocol = /^[a-z][a-z0-9+\-.]*(?=:)/i.exec(url); + if (!protocol) { return allowRelative; } - var protocol = url.substr(0, colon); + protocol = protocol[0].toLowerCase(); switch (protocol) { case 'http': case 'https':