mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Send fetch requests for all page dict lookups in parallel
- When adding page dict candidates to the lookup tree, also initiate fetching them from xref, so if they are not yet loaded at all, the XHR will be sent - Only at the top level - assume that if there is a /Pages tree, it is sensibly structured and the number of requests won't be too bad - We can then await on the cached Promise without making the requests pipeline - This has a significant performance improvement for load-on-demand (i.e. with auto-fetch turned off) when a PDF has a large number of pages in the top level /Pages collection, and those pages are spread through a file, so every candidate needs to be fetched separately - PDFs with many pages where each page is a big image and all the pages are at the top level are quite a common output for digitisation programmes - I would have liked to do something like "if it's the top level collection and page count = number of kids, then just fetch that page without traversing the tree" but unfortunately I agree with comments on #8088 that there is no good general solution to allow for /Pages nodes with empty /Kids arrays
This commit is contained in:
parent
b47c7eca83
commit
a67b9aec6c
1 changed files with 18 additions and 3 deletions
|
@ -143,6 +143,7 @@ class Catalog {
|
|||
this.globalImageCache = new GlobalImageCache();
|
||||
this.pageKidsCountCache = new RefSetCache();
|
||||
this.pageIndexCache = new RefSetCache();
|
||||
this.pageDictCache = new RefSetCache();
|
||||
this.nonBlendModesSet = new RefSet();
|
||||
this.systemFontCache = new Map();
|
||||
}
|
||||
|
@ -1161,6 +1162,7 @@ class Catalog {
|
|||
this.globalImageCache.clear(/* onlyData = */ manuallyTriggered);
|
||||
this.pageKidsCountCache.clear();
|
||||
this.pageIndexCache.clear();
|
||||
this.pageDictCache.clear();
|
||||
this.nonBlendModesSet.clear();
|
||||
|
||||
const translatedFonts = await Promise.all(this.fontCache);
|
||||
|
@ -1184,7 +1186,8 @@ class Catalog {
|
|||
}
|
||||
const xref = this.xref,
|
||||
pageKidsCountCache = this.pageKidsCountCache,
|
||||
pageIndexCache = this.pageIndexCache;
|
||||
pageIndexCache = this.pageIndexCache,
|
||||
pageDictCache = this.pageDictCache;
|
||||
let currentPageIndex = 0;
|
||||
|
||||
while (nodesToVisit.length) {
|
||||
|
@ -1203,7 +1206,8 @@ class Catalog {
|
|||
}
|
||||
visitedNodes.put(currentNode);
|
||||
|
||||
const obj = await xref.fetchAsync(currentNode);
|
||||
const obj = await (pageDictCache.get(currentNode) ||
|
||||
xref.fetchAsync(currentNode));
|
||||
if (obj instanceof Dict) {
|
||||
let type = obj.getRaw("Type");
|
||||
if (type instanceof Ref) {
|
||||
|
@ -1285,7 +1289,18 @@ class Catalog {
|
|||
// node further down in the tree (see issue5644.pdf, issue8088.pdf),
|
||||
// and to ensure that we actually find the correct `Page` dict.
|
||||
for (let last = kids.length - 1; last >= 0; last--) {
|
||||
nodesToVisit.push(kids[last]);
|
||||
const lastKid = kids[last];
|
||||
nodesToVisit.push(lastKid);
|
||||
|
||||
// Launch all requests in parallel so we don't wait for each one in turn
|
||||
// when looking for a page near the end, if all the pages are top level.
|
||||
if (
|
||||
currentNode === this.toplevelPagesDict &&
|
||||
lastKid instanceof Ref &&
|
||||
!pageDictCache.has(lastKid)
|
||||
) {
|
||||
pageDictCache.put(lastKid, xref.fetchAsync(lastKid));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue