1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-18 14:18:23 +02:00

Prefer /Resources from the /Contents stream-dict, if available

In rare cases /Resources are also found in the /Contents stream-dict, in addition to in the /Page dict, hence we need to prefer those when available; see `issue18894.pdf`.
This commit is contained in:
Jonas Jenwald 2025-04-11 12:35:53 +02:00
parent 53c5a53668
commit 7a94fafd30
3 changed files with 58 additions and 20 deletions

View file

@ -49,6 +49,8 @@ import {
lookupNormalRect,
lookupRect,
numberToString,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
stringToAsciiOrUTF16BE,
stringToUTF16String,
} from "./core_utils.js";
@ -1196,7 +1198,7 @@ class Annotation {
const appearanceDict = appearance.dict;
const resources = await this.loadResources(
["ExtGState", "ColorSpace", "Pattern", "Shading", "XObject", "Font"],
RESOURCES_KEYS_OPERATOR_LIST,
appearance
);
const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]);
@ -1257,7 +1259,7 @@ class Annotation {
}
const resources = await this.loadResources(
["ExtGState", "Font", "Properties", "XObject"],
RESOURCES_KEYS_TEXT_CONTENT,
this.appearance
);

View file

@ -32,6 +32,23 @@ const MIN_INT_32 = -(2 ** 31);
const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0];
const RESOURCES_KEYS_OPERATOR_LIST = [
"ColorSpace",
"ExtGState",
"Font",
"Pattern",
"Properties",
"Shading",
"XObject",
];
const RESOURCES_KEYS_TEXT_CONTENT = [
"ExtGState",
"Font",
"Properties",
"XObject",
];
function getLookupTableFactory(initializer) {
let lookup;
return function () {
@ -745,6 +762,8 @@ export {
readUint16,
readUint32,
recoverJsURL,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
stringToAsciiOrUTF16BE,
stringToUTF16HexString,
stringToUTF16String,

View file

@ -45,6 +45,8 @@ import {
lookupNormalRect,
MissingDataException,
PDF_VERSION_REGEXP,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
validateCSSFont,
XRefEntryException,
XRefParseException,
@ -419,6 +421,25 @@ class Page {
await objectLoader.load();
}
async #getMergedResources(streamDict, keys) {
// In rare cases /Resources are also found in the /Contents stream-dict,
// in addition to in the /Page dict, hence we need to prefer those when
// available (see issue18894.pdf).
const localResources = streamDict?.get("Resources");
if (!(localResources instanceof Dict)) {
return this.resources;
}
const objectLoader = new ObjectLoader(localResources, keys, this.xref);
await objectLoader.load();
return Dict.merge({
xref: this.xref,
dictArray: [localResources, this.resources],
mergeSubDicts: true,
});
}
async getOperatorList({
handler,
sink,
@ -429,15 +450,7 @@ class Page {
modifiedIds = null,
}) {
const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([
"ColorSpace",
"ExtGState",
"Font",
"Pattern",
"Properties",
"Shading",
"XObject",
]);
const resourcesPromise = this.loadResources(RESOURCES_KEYS_OPERATOR_LIST);
const partialEvaluator = new PartialEvaluator({
xref: this.xref,
@ -525,11 +538,15 @@ class Page {
contentStreamPromise,
resourcesPromise,
]).then(async ([contentStream]) => {
const resources = await this.#getMergedResources(
contentStream.dict,
RESOURCES_KEYS_OPERATOR_LIST
);
const opList = new OperatorList(intent, sink);
handler.send("StartRenderPage", {
transparency: partialEvaluator.hasBlendModes(
this.resources,
resources,
this.nonBlendModesSet
),
pageIndex: this.pageIndex,
@ -539,7 +556,7 @@ class Page {
await partialEvaluator.getOperatorList({
stream: contentStream,
task,
resources: this.resources,
resources,
operatorList: opList,
});
return opList;
@ -642,12 +659,7 @@ class Page {
sink,
}) {
const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([
"ExtGState",
"Font",
"Properties",
"XObject",
]);
const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT);
const langPromise = this.pdfManager.ensureCatalog("lang");
const [contentStream, , lang] = await Promise.all([
@ -655,6 +667,11 @@ class Page {
resourcesPromise,
langPromise,
]);
const resources = await this.#getMergedResources(
contentStream.dict,
RESOURCES_KEYS_TEXT_CONTENT
);
const partialEvaluator = new PartialEvaluator({
xref: this.xref,
handler,
@ -672,7 +689,7 @@ class Page {
return partialEvaluator.getTextContent({
stream: contentStream,
task,
resources: this.resources,
resources,
includeMarkedContent,
disableNormalization,
sink,