From a61a4b18cc4b195c7109e9f4673c840f3852e47a Mon Sep 17 00:00:00 2001 From: Martin Heller Date: Mon, 11 May 2015 00:46:59 +0200 Subject: [PATCH] URL annotations handled as UTF-8 to accommodate some bad PDFs. For proper 7-bit ASCII this makes no difference. Fixes Bug 1122280. --- src/core/annotation.js | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/core/annotation.js b/src/core/annotation.js index bd4fed891..32974b568 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -16,7 +16,8 @@ */ /* globals PDFJS, Util, isDict, isName, stringToPDFString, warn, Dict, Stream, stringToBytes, assert, Promise, isArray, ObjectLoader, OperatorList, - isValidUrl, OPS, createPromiseCapability, AnnotationType */ + isValidUrl, OPS, createPromiseCapability, AnnotationType, + stringToUTF8String */ 'use strict'; @@ -500,7 +501,15 @@ var LinkAnnotation = (function LinkAnnotationClosure() { if (!isValidUrl(url, false)) { url = ''; } - data.url = url; + // According to ISO 32000-1:2008, section 12.6.4.7, + // URI should to be encoded in 7-bit ASCII. + // Some bad PDFs may have URIs in UTF-8 encoding, see Bugzilla 1122280. + try { + data.url = stringToUTF8String(url); + } catch (e) { + // Fall back to a simple copy. + data.url = url; + } } else if (linkType === 'GoTo') { data.dest = action.get('D'); } else if (linkType === 'GoToR') {