diff --git a/src/core/annotation.js b/src/core/annotation.js index 4e881c6c9..d6b612b14 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -49,6 +49,8 @@ import { lookupNormalRect, lookupRect, numberToString, + RESOURCES_KEYS_OPERATOR_LIST, + RESOURCES_KEYS_TEXT_CONTENT, stringToAsciiOrUTF16BE, stringToUTF16String, } from "./core_utils.js"; @@ -1196,7 +1198,7 @@ class Annotation { const appearanceDict = appearance.dict; const resources = await this.loadResources( - ["ExtGState", "ColorSpace", "Pattern", "Shading", "XObject", "Font"], + RESOURCES_KEYS_OPERATOR_LIST, appearance ); const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]); @@ -1257,7 +1259,7 @@ class Annotation { } const resources = await this.loadResources( - ["ExtGState", "Font", "Properties", "XObject"], + RESOURCES_KEYS_TEXT_CONTENT, this.appearance ); diff --git a/src/core/core_utils.js b/src/core/core_utils.js index 28c4004c0..b23072d59 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -32,6 +32,23 @@ const MIN_INT_32 = -(2 ** 31); const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0]; +const RESOURCES_KEYS_OPERATOR_LIST = [ + "ColorSpace", + "ExtGState", + "Font", + "Pattern", + "Properties", + "Shading", + "XObject", +]; + +const RESOURCES_KEYS_TEXT_CONTENT = [ + "ExtGState", + "Font", + "Properties", + "XObject", +]; + function getLookupTableFactory(initializer) { let lookup; return function () { @@ -745,6 +762,8 @@ export { readUint16, readUint32, recoverJsURL, + RESOURCES_KEYS_OPERATOR_LIST, + RESOURCES_KEYS_TEXT_CONTENT, stringToAsciiOrUTF16BE, stringToUTF16HexString, stringToUTF16String, diff --git a/src/core/document.js b/src/core/document.js index daa40bfcc..c543f718b 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -45,6 +45,8 @@ import { lookupNormalRect, MissingDataException, PDF_VERSION_REGEXP, + RESOURCES_KEYS_OPERATOR_LIST, + RESOURCES_KEYS_TEXT_CONTENT, validateCSSFont, XRefEntryException, XRefParseException, @@ -419,6 +421,25 @@ class Page { await objectLoader.load(); } + async #getMergedResources(streamDict, keys) { + // In rare cases /Resources are also found in the /Contents stream-dict, + // in addition to in the /Page dict, hence we need to prefer those when + // available (see issue18894.pdf). + const localResources = streamDict?.get("Resources"); + + if (!(localResources instanceof Dict)) { + return this.resources; + } + const objectLoader = new ObjectLoader(localResources, keys, this.xref); + await objectLoader.load(); + + return Dict.merge({ + xref: this.xref, + dictArray: [localResources, this.resources], + mergeSubDicts: true, + }); + } + async getOperatorList({ handler, sink, @@ -429,15 +450,7 @@ class Page { modifiedIds = null, }) { const contentStreamPromise = this.getContentStream(); - const resourcesPromise = this.loadResources([ - "ColorSpace", - "ExtGState", - "Font", - "Pattern", - "Properties", - "Shading", - "XObject", - ]); + const resourcesPromise = this.loadResources(RESOURCES_KEYS_OPERATOR_LIST); const partialEvaluator = new PartialEvaluator({ xref: this.xref, @@ -525,11 +538,15 @@ class Page { contentStreamPromise, resourcesPromise, ]).then(async ([contentStream]) => { + const resources = await this.#getMergedResources( + contentStream.dict, + RESOURCES_KEYS_OPERATOR_LIST + ); const opList = new OperatorList(intent, sink); handler.send("StartRenderPage", { transparency: partialEvaluator.hasBlendModes( - this.resources, + resources, this.nonBlendModesSet ), pageIndex: this.pageIndex, @@ -539,7 +556,7 @@ class Page { await partialEvaluator.getOperatorList({ stream: contentStream, task, - resources: this.resources, + resources, operatorList: opList, }); return opList; @@ -642,12 +659,7 @@ class Page { sink, }) { const contentStreamPromise = this.getContentStream(); - const resourcesPromise = this.loadResources([ - "ExtGState", - "Font", - "Properties", - "XObject", - ]); + const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT); const langPromise = this.pdfManager.ensureCatalog("lang"); const [contentStream, , lang] = await Promise.all([ @@ -655,6 +667,11 @@ class Page { resourcesPromise, langPromise, ]); + const resources = await this.#getMergedResources( + contentStream.dict, + RESOURCES_KEYS_TEXT_CONTENT + ); + const partialEvaluator = new PartialEvaluator({ xref: this.xref, handler, @@ -672,7 +689,7 @@ class Page { return partialEvaluator.getTextContent({ stream: contentStream, task, - resources: this.resources, + resources, includeMarkedContent, disableNormalization, sink,