mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-25 09:38:06 +02:00
[api-minor] Clear all caches in XRef.indexObjects
, and improve /Root dictionary validation in XRef.parse
(issue 14303)
*This patch improves handling of a couple of PDF documents from issue 14303.* - Update `XRef.indexObjects` to actually clear *all* XRef-caches. Invalid XRef tables *usually* cause issues early enough during parsing that we've not populated the XRef-cache, however to prevent any issues we obviously need to clear that one as well. - Improve the /Root dictionary validation in `XRef.parse` (PR 9827 follow-up). In addition to checking that a /Pages entry exists, we'll now also check that it can be successfully fetched *and* that it's of the correct type. There's really no point trying to use a /Root dictionary that e.g. `Catalog.toplevelPagesDict` will reject, and this way we'll be able to fallback to indexing the objects in corrupt documents. - Throw an `InvalidPDFException`, rather than a general `FormatError`, in `XRef.parse` when no usable /Root dictionary could be found. That really seems more appropriate overall, since all attempts at parsing/recovery have failed. (This part of the patch is API-observable, hence the tag.) With these changes, two existing test-cases are improved and the unit-tests are updated/re-factored to highlight that. In particular `GHOSTSCRIPT-698804-1-fuzzed.pdf` will now both load and "render" correctly, whereas `poppler-395-0-fuzzed.pdf` will now fail immediately upon loading (rather than *appearing* to work).
This commit is contained in:
parent
e9e4b913c0
commit
ad3a271fc4
2 changed files with 66 additions and 43 deletions
|
@ -107,14 +107,26 @@ class XRef {
|
|||
}
|
||||
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
|
||||
}
|
||||
if (root instanceof Dict && root.has("Pages")) {
|
||||
this.root = root;
|
||||
} else {
|
||||
if (!recoveryMode) {
|
||||
throw new XRefParseException();
|
||||
if (root instanceof Dict) {
|
||||
try {
|
||||
const pages = root.get("Pages");
|
||||
if (pages instanceof Dict) {
|
||||
this.root = root;
|
||||
return;
|
||||
}
|
||||
} catch (ex) {
|
||||
if (ex instanceof MissingDataException) {
|
||||
throw ex;
|
||||
}
|
||||
warn(`XRef.parse - Invalid "Pages" reference: "${ex}".`);
|
||||
}
|
||||
throw new FormatError("Invalid root reference");
|
||||
}
|
||||
|
||||
if (!recoveryMode) {
|
||||
throw new XRefParseException();
|
||||
}
|
||||
// Even recovery failed, there's nothing more we can do here.
|
||||
throw new InvalidPDFException("Invalid Root reference.");
|
||||
}
|
||||
|
||||
processXRefTable(parser) {
|
||||
|
@ -417,6 +429,7 @@ class XRef {
|
|||
|
||||
// Clear out any existing entries, since they may be bogus.
|
||||
this.entries.length = 0;
|
||||
this._cacheMap.clear();
|
||||
|
||||
const stream = this.stream;
|
||||
stream.pos = 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue