1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-18 14:18:23 +02:00

Merge pull request #19803 from Snuffleupagus/issue-19800

Catch circular references in /Form XObjects (issue 19800)
This commit is contained in:
Jonas Jenwald 2025-04-11 17:03:42 +02:00 committed by GitHub
commit 9c6e2e6df0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 201 additions and 25 deletions

View file

@ -49,6 +49,8 @@ import {
lookupNormalRect,
lookupRect,
numberToString,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
stringToAsciiOrUTF16BE,
stringToUTF16String,
} from "./core_utils.js";
@ -1196,7 +1198,7 @@ class Annotation {
const appearanceDict = appearance.dict;
const resources = await this.loadResources(
["ExtGState", "ColorSpace", "Pattern", "Shading", "XObject", "Font"],
RESOURCES_KEYS_OPERATOR_LIST,
appearance
);
const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]);
@ -1257,7 +1259,7 @@ class Annotation {
}
const resources = await this.loadResources(
["ExtGState", "Font", "Properties", "XObject"],
RESOURCES_KEYS_TEXT_CONTENT,
this.appearance
);

View file

@ -32,6 +32,23 @@ const MIN_INT_32 = -(2 ** 31);
const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0];
const RESOURCES_KEYS_OPERATOR_LIST = [
"ColorSpace",
"ExtGState",
"Font",
"Pattern",
"Properties",
"Shading",
"XObject",
];
const RESOURCES_KEYS_TEXT_CONTENT = [
"ExtGState",
"Font",
"Properties",
"XObject",
];
function getLookupTableFactory(initializer) {
let lookup;
return function () {
@ -745,6 +762,8 @@ export {
readUint16,
readUint32,
recoverJsURL,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
stringToAsciiOrUTF16BE,
stringToUTF16HexString,
stringToUTF16String,

View file

@ -45,6 +45,8 @@ import {
lookupNormalRect,
MissingDataException,
PDF_VERSION_REGEXP,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
validateCSSFont,
XRefEntryException,
XRefParseException,
@ -419,6 +421,25 @@ class Page {
await objectLoader.load();
}
async #getMergedResources(streamDict, keys) {
// In rare cases /Resources are also found in the /Contents stream-dict,
// in addition to in the /Page dict, hence we need to prefer those when
// available (see issue18894.pdf).
const localResources = streamDict?.get("Resources");
if (!(localResources instanceof Dict)) {
return this.resources;
}
const objectLoader = new ObjectLoader(localResources, keys, this.xref);
await objectLoader.load();
return Dict.merge({
xref: this.xref,
dictArray: [localResources, this.resources],
mergeSubDicts: true,
});
}
async getOperatorList({
handler,
sink,
@ -429,15 +450,7 @@ class Page {
modifiedIds = null,
}) {
const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([
"ColorSpace",
"ExtGState",
"Font",
"Pattern",
"Properties",
"Shading",
"XObject",
]);
const resourcesPromise = this.loadResources(RESOURCES_KEYS_OPERATOR_LIST);
const partialEvaluator = new PartialEvaluator({
xref: this.xref,
@ -525,11 +538,15 @@ class Page {
contentStreamPromise,
resourcesPromise,
]).then(async ([contentStream]) => {
const resources = await this.#getMergedResources(
contentStream.dict,
RESOURCES_KEYS_OPERATOR_LIST
);
const opList = new OperatorList(intent, sink);
handler.send("StartRenderPage", {
transparency: partialEvaluator.hasBlendModes(
this.resources,
resources,
this.nonBlendModesSet
),
pageIndex: this.pageIndex,
@ -539,7 +556,7 @@ class Page {
await partialEvaluator.getOperatorList({
stream: contentStream,
task,
resources: this.resources,
resources,
operatorList: opList,
});
return opList;
@ -642,12 +659,7 @@ class Page {
sink,
}) {
const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([
"ExtGState",
"Font",
"Properties",
"XObject",
]);
const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT);
const langPromise = this.pdfManager.ensureCatalog("lang");
const [contentStream, , lang] = await Promise.all([
@ -655,6 +667,11 @@ class Page {
resourcesPromise,
langPromise,
]);
const resources = await this.#getMergedResources(
contentStream.dict,
RESOURCES_KEYS_TEXT_CONTENT
);
const partialEvaluator = new PartialEvaluator({
xref: this.xref,
handler,
@ -672,7 +689,7 @@ class Page {
return partialEvaluator.getTextContent({
stream: contentStream,
task,
resources: this.resources,
resources,
includeMarkedContent,
disableNormalization,
sink,

View file

@ -462,7 +462,8 @@ class PartialEvaluator {
operatorList,
task,
initialState,
localColorSpaceCache
localColorSpaceCache,
seenRefs
) {
const dict = xobj.dict;
const matrix = lookupMatrix(dict.getArray("Matrix"), null);
@ -526,6 +527,7 @@ class PartialEvaluator {
resources: dict.get("Resources") || resources,
operatorList,
initialState,
prevRefs: seenRefs,
});
operatorList.addOp(OPS.paintFormXObjectEnd, []);
@ -850,7 +852,8 @@ class PartialEvaluator {
operatorList,
task,
stateManager,
localColorSpaceCache
localColorSpaceCache,
seenRefs
) {
const smaskContent = smask.get("G");
const smaskOptions = {
@ -880,7 +883,8 @@ class PartialEvaluator {
operatorList,
task,
stateManager.state.clone({ newPath: true }),
localColorSpaceCache
localColorSpaceCache,
seenRefs
);
}
@ -1065,6 +1069,7 @@ class PartialEvaluator {
stateManager,
localGStateCache,
localColorSpaceCache,
seenRefs,
}) {
const gStateRef = gState.objId;
let isSimpleGState = true;
@ -1127,7 +1132,8 @@ class PartialEvaluator {
operatorList,
task,
stateManager,
localColorSpaceCache
localColorSpaceCache,
seenRefs
)
);
gStateObj.push([key, true]);
@ -1696,7 +1702,19 @@ class PartialEvaluator {
operatorList,
initialState = null,
fallbackFontDict = null,
prevRefs = null,
}) {
const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs);
if (objId) {
if (prevRefs?.has(objId)) {
throw new Error(
`getOperatorList - ignoring circular reference: ${objId}`
);
}
seenRefs.put(objId);
}
// Ensure that `resources`/`initialState` is correctly initialized,
// even if the provided parameter is e.g. `null`.
resources ||= Dict.empty;
@ -1808,7 +1826,8 @@ class PartialEvaluator {
operatorList,
task,
stateManager.state.clone({ newPath: true }),
localColorSpaceCache
localColorSpaceCache,
seenRefs
)
.then(function () {
stateManager.restore();
@ -2158,6 +2177,7 @@ class PartialEvaluator {
stateManager,
localGStateCache,
localColorSpaceCache,
seenRefs,
})
.then(resolveGState, rejectGState);
}).catch(function (reason) {
@ -2339,7 +2359,19 @@ class PartialEvaluator {
markedContentData = null,
disableNormalization = false,
keepWhiteSpace = false,
prevRefs = null,
}) {
const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs);
if (objId) {
if (prevRefs?.has(objId)) {
throw new Error(
`getTextContent - ignoring circular reference: ${objId}`
);
}
seenRefs.put(objId);
}
// Ensure that `resources`/`stateManager` is correctly initialized,
// even if the provided parameter is e.g. `null`.
resources ||= Dict.empty;
@ -3326,6 +3358,7 @@ class PartialEvaluator {
markedContentData,
disableNormalization,
keepWhiteSpace,
prevRefs: seenRefs,
})
.then(function () {
if (!sinkWrapper.enqueueInvoked) {

View file

@ -206,6 +206,7 @@
!issue3928.pdf
!issue8565.pdf
!clippath.pdf
!issue19800.pdf
!issue8795_reduced.pdf
!bug1755507.pdf
!close-path-bug.pdf

90
test/pdfs/issue19800.pdf Normal file
View file

@ -0,0 +1,90 @@
%PDF-1.4
1 0 obj
<<
/Type /Catalog
/Outlines 2 0 R
/Pages 3 0 R
>>
endobj
2 0 obj
<<
/Type /Outlines
/Count 0
>>
endobj
3 0 obj
<<
/Type /Pages
/Kids [ 4 0 R ]
/Count 1
>>
endobj
4 0 obj
<<
/Type /Page
/Parent 3 0 R
/MediaBox [ 0 0 500 300 ]
/Contents 5 0 R
/Resources <<
/ProcSet [ /PDF /Text ]
/XObject << /X1 6 0 R >>
>>
>>
endobj
5 0 obj
<< /Length 24 >>
stream
1 0 0 1 25 25 cm /X1 Do
endstream
endobj
6 0 obj
<< /Subtype /Form
/BBox [0 0 1000 1000]
/Length 61
/Resources <<
/ProcSet [ /PDF /Text ]
/Font << /F1 7 0 R >>
/XObject << /X0 8 0 R >>
>>
>>
stream
BT
/F1 24 Tf
(Hello world) Tj
ET
0.5 0 0 0.5 25 25 cm /X0 Do
endstream
endobj
7 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /MacRomanEncoding
>>
endobj
8 0 obj
<< /Subtype /Form
/BBox [0 0 1000 1000]
/Length 61
/Resources <<
/ProcSet [ /PDF /Text ]
/Font << /F1 7 0 R >>
/XObject << /X1 6 0 R >>
>>
>>
stream
BT
/F1 24 Tf
(Hello world) Tj
ET
0.5 0 0 0.5 25 25 cm /X1 Do
endstream
endobj
trailer
<<
/Size 8
/Root 1 0 R
>>
%%EOF

View file

@ -6897,6 +6897,20 @@
"lastPage": 2,
"type": "eq"
},
{
"id": "issue19800-eq",
"file": "pdfs/issue19800.pdf",
"md5": "92825d3178196bdd01096c4081609efd",
"rounds": 1,
"type": "eq"
},
{
"id": "issue19800-text",
"file": "pdfs/issue19800.pdf",
"md5": "92825d3178196bdd01096c4081609efd",
"rounds": 1,
"type": "text"
},
{
"id": "issue3438",
"file": "pdfs/issue3438.pdf",