1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-25 09:38:06 +02:00
pdf.js/src/core/chunked_stream.js

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

558 lines
16 KiB
JavaScript
Raw Normal View History

2013-02-06 15:19:29 -08:00
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { arrayBuffersToBytes, MissingDataException } from "./core_utils.js";
[api-minor] Replace the `PromiseCapability` with `Promise.withResolvers()` This replaces our custom `PromiseCapability`-class with the new native `Promise.withResolvers()` functionality, which does *almost* the same thing[1]; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/withResolvers The only difference is that `PromiseCapability` also had a `settled`-getter, which was however not widely used and the call-sites can either be removed or re-factored to avoid it. In particular: - In `src/display/api.js` we can tweak the `PDFObjects`-class to use a "special" initial data-value and just compare against that, in order to replace the `settled`-state. - In `web/app.js` we change the only case to manually track the `settled`-state, which should hopefully be OK given how this is being used. - In `web/pdf_outline_viewer.js` we can remove the `settled`-checks, since the code should work just fine without it. The only thing that could potentially happen is that we try to `resolve` a Promise multiple times, which is however *not* a problem since the value of a Promise cannot be changed once fulfilled or rejected. - In `web/pdf_viewer.js` we can remove the `settled`-checks, since the code should work fine without them: - For the `_onePageRenderedCapability` case the `settled`-check is used in a `EventBus`-listener which is *removed* on its first (valid) invocation. - For the `_pagesCapability` case the `settled`-check is used in a print-related helper that works just fine with "only" the other checks. - In `test/unit/api_spec.js` we can change the few relevant cases to manually track the `settled`-state, since this is both simple and *test-only* code. --- [1] In browsers/environments that lack native support, note [the compatibility data](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/withResolvers#browser_compatibility), it'll be polyfilled via the `core-js` library (but only in `legacy` builds).
2024-03-28 16:42:37 +01:00
import { assert } from "../shared/util.js";
import { Stream } from "./stream.js";
class ChunkedStream extends Stream {
constructor(length, chunkSize, manager) {
super(
/* arrayBuffer = */ new Uint8Array(length),
/* start = */ 0,
/* length = */ length,
/* dict = */ null
);
2013-02-06 15:19:29 -08:00
this.chunkSize = chunkSize;
this._loadedChunks = new Set();
2013-02-06 15:19:29 -08:00
this.numChunks = Math.ceil(length / chunkSize);
this.manager = manager;
this.progressiveDataLength = 0;
this.lastSuccessfulEnsureByteChunk = -1; // Single-entry cache
2013-02-06 15:19:29 -08:00
}
// If a particular stream does not implement one or more of these methods,
// an error should be thrown.
getMissingChunks() {
const chunks = [];
for (let chunk = 0, n = this.numChunks; chunk < n; ++chunk) {
if (!this._loadedChunks.has(chunk)) {
chunks.push(chunk);
2013-02-06 15:19:29 -08:00
}
}
return chunks;
}
2013-02-06 15:19:29 -08:00
get numChunksLoaded() {
return this._loadedChunks.size;
}
get isDataLoaded() {
return this.numChunksLoaded === this.numChunks;
}
2013-02-06 15:19:29 -08:00
onReceiveData(begin, chunk) {
const chunkSize = this.chunkSize;
if (begin % chunkSize !== 0) {
throw new Error(`Bad begin offset: ${begin}`);
}
// Using `this.length` is inaccurate here since `this.start` can be moved
// (see the `moveStart` method).
const end = begin + chunk.byteLength;
if (end % chunkSize !== 0 && end !== this.bytes.length) {
throw new Error(`Bad end offset: ${end}`);
}
this.bytes.set(new Uint8Array(chunk), begin);
const beginChunk = Math.floor(begin / chunkSize);
const endChunk = Math.floor((end - 1) / chunkSize) + 1;
for (let curChunk = beginChunk; curChunk < endChunk; ++curChunk) {
// Since a value can only occur *once* in a `Set`, there's no need to
// manually check `Set.prototype.has()` before adding the value here.
this._loadedChunks.add(curChunk);
}
}
2013-02-06 15:19:29 -08:00
onReceiveProgressiveData(data) {
let position = this.progressiveDataLength;
const beginChunk = Math.floor(position / this.chunkSize);
2013-02-06 15:19:29 -08:00
this.bytes.set(new Uint8Array(data), position);
position += data.byteLength;
this.progressiveDataLength = position;
const endChunk =
position >= this.end
? this.numChunks
: Math.floor(position / this.chunkSize);
2013-02-06 15:19:29 -08:00
for (let curChunk = beginChunk; curChunk < endChunk; ++curChunk) {
// Since a value can only occur *once* in a `Set`, there's no need to
// manually check `Set.prototype.has()` before adding the value here.
this._loadedChunks.add(curChunk);
}
}
ensureByte(pos) {
if (pos < this.progressiveDataLength) {
return;
}
const chunk = Math.floor(pos / this.chunkSize);
if (chunk > this.numChunks) {
return;
}
if (chunk === this.lastSuccessfulEnsureByteChunk) {
return;
}
if (!this._loadedChunks.has(chunk)) {
throw new MissingDataException(pos, pos + 1);
}
this.lastSuccessfulEnsureByteChunk = chunk;
}
ensureRange(begin, end) {
if (begin >= end) {
return;
}
if (end <= this.progressiveDataLength) {
return;
}
const beginChunk = Math.floor(begin / this.chunkSize);
if (beginChunk > this.numChunks) {
return;
}
const endChunk = Math.min(
Math.floor((end - 1) / this.chunkSize) + 1,
this.numChunks
);
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
if (!this._loadedChunks.has(chunk)) {
throw new MissingDataException(begin, end);
2013-02-06 15:19:29 -08:00
}
}
}
2013-02-06 15:19:29 -08:00
nextEmptyChunk(beginChunk) {
const numChunks = this.numChunks;
for (let i = 0; i < numChunks; ++i) {
const chunk = (beginChunk + i) % numChunks; // Wrap around to beginning.
if (!this._loadedChunks.has(chunk)) {
return chunk;
}
}
return null;
}
hasChunk(chunk) {
return this._loadedChunks.has(chunk);
}
2013-02-06 15:19:29 -08:00
getByte() {
const pos = this.pos;
if (pos >= this.end) {
return -1;
}
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing. The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls. Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`). This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea. Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`. This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file): ``` [ { "id": "tracemonkey-eq", "file": "pdfs/tracemonkey.pdf", "md5": "9a192d8b1a7dc652a19835f6f08098bd", "rounds": 250, "type": "eq" } ] ``` I get the following complete results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 | Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster ``` Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page: ``` -- Grouped By page, stat -- page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ---- | ------------ | ----- | ------------ | ----------- | --- | ------ | ------------- 0 | Overall | 250 | 74 | 75 | 1 | 0.69 | 0 | Page Request | 250 | 1 | 1 | 0 | 33.20 | 0 | Rendering | 250 | 73 | 74 | 0 | 0.25 | 1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster 1 | Page Request | 250 | 3 | 2 | 0 | -11.73 | 1 | Rendering | 250 | 121 | 119 | -2 | -1.67 | 2 | Overall | 250 | 64 | 63 | -1 | -1.91 | 2 | Page Request | 250 | 1 | 1 | 0 | 8.81 | 2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster 3 | Overall | 250 | 97 | 97 | 0 | -0.06 | 3 | Page Request | 250 | 1 | 1 | 0 | 25.37 | 3 | Rendering | 250 | 96 | 95 | 0 | -0.34 | 4 | Overall | 250 | 97 | 97 | 0 | -0.38 | 4 | Page Request | 250 | 1 | 1 | 0 | -5.97 | 4 | Rendering | 250 | 96 | 96 | 0 | -0.27 | 5 | Overall | 250 | 99 | 97 | -3 | -2.92 | 5 | Page Request | 250 | 2 | 1 | 0 | -17.20 | 5 | Rendering | 250 | 98 | 95 | -3 | -2.68 | 6 | Overall | 250 | 99 | 99 | 0 | -0.14 | 6 | Page Request | 250 | 2 | 2 | 0 | -16.49 | 6 | Rendering | 250 | 97 | 98 | 0 | 0.16 | 7 | Overall | 250 | 96 | 95 | -1 | -0.55 | 7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower 7 | Rendering | 250 | 95 | 94 | -1 | -1.19 | 8 | Overall | 250 | 92 | 92 | -1 | -0.69 | 8 | Page Request | 250 | 1 | 1 | 0 | -17.60 | 8 | Rendering | 250 | 91 | 91 | 0 | -0.52 | 9 | Overall | 250 | 112 | 112 | 0 | 0.29 | 9 | Page Request | 250 | 2 | 1 | 0 | -7.92 | 9 | Rendering | 250 | 110 | 111 | 0 | 0.37 | 10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster 10 | Page Request | 250 | 14 | 13 | 0 | -1.26 | 10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster 11 | Overall | 250 | 66 | 66 | -1 | -0.86 | 11 | Page Request | 250 | 1 | 1 | 0 | -16.48 | 11 | Rendering | 250 | 65 | 65 | 0 | -0.62 | 12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster 12 | Page Request | 250 | 2 | 2 | 0 | 12.93 | 12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster 13 | Overall | 250 | 48 | 47 | 0 | -0.45 | 13 | Page Request | 250 | 1 | 1 | 0 | 1.59 | 13 | Rendering | 250 | 47 | 46 | 0 | -0.52 | ``` Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial). There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 16:24:25 +02:00
if (pos >= this.progressiveDataLength) {
this.ensureByte(pos);
}
return this.bytes[this.pos++];
}
2013-02-06 15:19:29 -08:00
getBytes(length) {
const bytes = this.bytes;
const pos = this.pos;
const strEnd = this.end;
2013-02-06 15:19:29 -08:00
if (!length) {
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing. The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls. Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`). This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea. Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`. This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file): ``` [ { "id": "tracemonkey-eq", "file": "pdfs/tracemonkey.pdf", "md5": "9a192d8b1a7dc652a19835f6f08098bd", "rounds": 250, "type": "eq" } ] ``` I get the following complete results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 | Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster ``` Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page: ``` -- Grouped By page, stat -- page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ---- | ------------ | ----- | ------------ | ----------- | --- | ------ | ------------- 0 | Overall | 250 | 74 | 75 | 1 | 0.69 | 0 | Page Request | 250 | 1 | 1 | 0 | 33.20 | 0 | Rendering | 250 | 73 | 74 | 0 | 0.25 | 1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster 1 | Page Request | 250 | 3 | 2 | 0 | -11.73 | 1 | Rendering | 250 | 121 | 119 | -2 | -1.67 | 2 | Overall | 250 | 64 | 63 | -1 | -1.91 | 2 | Page Request | 250 | 1 | 1 | 0 | 8.81 | 2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster 3 | Overall | 250 | 97 | 97 | 0 | -0.06 | 3 | Page Request | 250 | 1 | 1 | 0 | 25.37 | 3 | Rendering | 250 | 96 | 95 | 0 | -0.34 | 4 | Overall | 250 | 97 | 97 | 0 | -0.38 | 4 | Page Request | 250 | 1 | 1 | 0 | -5.97 | 4 | Rendering | 250 | 96 | 96 | 0 | -0.27 | 5 | Overall | 250 | 99 | 97 | -3 | -2.92 | 5 | Page Request | 250 | 2 | 1 | 0 | -17.20 | 5 | Rendering | 250 | 98 | 95 | -3 | -2.68 | 6 | Overall | 250 | 99 | 99 | 0 | -0.14 | 6 | Page Request | 250 | 2 | 2 | 0 | -16.49 | 6 | Rendering | 250 | 97 | 98 | 0 | 0.16 | 7 | Overall | 250 | 96 | 95 | -1 | -0.55 | 7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower 7 | Rendering | 250 | 95 | 94 | -1 | -1.19 | 8 | Overall | 250 | 92 | 92 | -1 | -0.69 | 8 | Page Request | 250 | 1 | 1 | 0 | -17.60 | 8 | Rendering | 250 | 91 | 91 | 0 | -0.52 | 9 | Overall | 250 | 112 | 112 | 0 | 0.29 | 9 | Page Request | 250 | 2 | 1 | 0 | -7.92 | 9 | Rendering | 250 | 110 | 111 | 0 | 0.37 | 10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster 10 | Page Request | 250 | 14 | 13 | 0 | -1.26 | 10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster 11 | Overall | 250 | 66 | 66 | -1 | -0.86 | 11 | Page Request | 250 | 1 | 1 | 0 | -16.48 | 11 | Rendering | 250 | 65 | 65 | 0 | -0.62 | 12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster 12 | Page Request | 250 | 2 | 2 | 0 | 12.93 | 12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster 13 | Overall | 250 | 48 | 47 | 0 | -0.45 | 13 | Page Request | 250 | 1 | 1 | 0 | 1.59 | 13 | Rendering | 250 | 47 | 46 | 0 | -0.52 | ``` Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial). There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 16:24:25 +02:00
if (strEnd > this.progressiveDataLength) {
this.ensureRange(pos, strEnd);
}
return bytes.subarray(pos, strEnd);
}
let end = pos + length;
if (end > strEnd) {
end = strEnd;
}
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing. The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls. Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`). This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea. Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`. This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file): ``` [ { "id": "tracemonkey-eq", "file": "pdfs/tracemonkey.pdf", "md5": "9a192d8b1a7dc652a19835f6f08098bd", "rounds": 250, "type": "eq" } ] ``` I get the following complete results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 | Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster ``` Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page: ``` -- Grouped By page, stat -- page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ---- | ------------ | ----- | ------------ | ----------- | --- | ------ | ------------- 0 | Overall | 250 | 74 | 75 | 1 | 0.69 | 0 | Page Request | 250 | 1 | 1 | 0 | 33.20 | 0 | Rendering | 250 | 73 | 74 | 0 | 0.25 | 1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster 1 | Page Request | 250 | 3 | 2 | 0 | -11.73 | 1 | Rendering | 250 | 121 | 119 | -2 | -1.67 | 2 | Overall | 250 | 64 | 63 | -1 | -1.91 | 2 | Page Request | 250 | 1 | 1 | 0 | 8.81 | 2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster 3 | Overall | 250 | 97 | 97 | 0 | -0.06 | 3 | Page Request | 250 | 1 | 1 | 0 | 25.37 | 3 | Rendering | 250 | 96 | 95 | 0 | -0.34 | 4 | Overall | 250 | 97 | 97 | 0 | -0.38 | 4 | Page Request | 250 | 1 | 1 | 0 | -5.97 | 4 | Rendering | 250 | 96 | 96 | 0 | -0.27 | 5 | Overall | 250 | 99 | 97 | -3 | -2.92 | 5 | Page Request | 250 | 2 | 1 | 0 | -17.20 | 5 | Rendering | 250 | 98 | 95 | -3 | -2.68 | 6 | Overall | 250 | 99 | 99 | 0 | -0.14 | 6 | Page Request | 250 | 2 | 2 | 0 | -16.49 | 6 | Rendering | 250 | 97 | 98 | 0 | 0.16 | 7 | Overall | 250 | 96 | 95 | -1 | -0.55 | 7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower 7 | Rendering | 250 | 95 | 94 | -1 | -1.19 | 8 | Overall | 250 | 92 | 92 | -1 | -0.69 | 8 | Page Request | 250 | 1 | 1 | 0 | -17.60 | 8 | Rendering | 250 | 91 | 91 | 0 | -0.52 | 9 | Overall | 250 | 112 | 112 | 0 | 0.29 | 9 | Page Request | 250 | 2 | 1 | 0 | -7.92 | 9 | Rendering | 250 | 110 | 111 | 0 | 0.37 | 10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster 10 | Page Request | 250 | 14 | 13 | 0 | -1.26 | 10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster 11 | Overall | 250 | 66 | 66 | -1 | -0.86 | 11 | Page Request | 250 | 1 | 1 | 0 | -16.48 | 11 | Rendering | 250 | 65 | 65 | 0 | -0.62 | 12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster 12 | Page Request | 250 | 2 | 2 | 0 | 12.93 | 12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster 13 | Overall | 250 | 48 | 47 | 0 | -0.45 | 13 | Page Request | 250 | 1 | 1 | 0 | 1.59 | 13 | Rendering | 250 | 47 | 46 | 0 | -0.52 | ``` Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial). There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 16:24:25 +02:00
if (end > this.progressiveDataLength) {
this.ensureRange(pos, end);
}
this.pos = end;
return bytes.subarray(pos, end);
}
getByteRange(begin, end) {
if (begin < 0) {
begin = 0;
}
if (end > this.end) {
end = this.end;
}
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing. The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls. Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`). This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea. Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`. This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file): ``` [ { "id": "tracemonkey-eq", "file": "pdfs/tracemonkey.pdf", "md5": "9a192d8b1a7dc652a19835f6f08098bd", "rounds": 250, "type": "eq" } ] ``` I get the following complete results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 | Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster ``` Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page: ``` -- Grouped By page, stat -- page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ---- | ------------ | ----- | ------------ | ----------- | --- | ------ | ------------- 0 | Overall | 250 | 74 | 75 | 1 | 0.69 | 0 | Page Request | 250 | 1 | 1 | 0 | 33.20 | 0 | Rendering | 250 | 73 | 74 | 0 | 0.25 | 1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster 1 | Page Request | 250 | 3 | 2 | 0 | -11.73 | 1 | Rendering | 250 | 121 | 119 | -2 | -1.67 | 2 | Overall | 250 | 64 | 63 | -1 | -1.91 | 2 | Page Request | 250 | 1 | 1 | 0 | 8.81 | 2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster 3 | Overall | 250 | 97 | 97 | 0 | -0.06 | 3 | Page Request | 250 | 1 | 1 | 0 | 25.37 | 3 | Rendering | 250 | 96 | 95 | 0 | -0.34 | 4 | Overall | 250 | 97 | 97 | 0 | -0.38 | 4 | Page Request | 250 | 1 | 1 | 0 | -5.97 | 4 | Rendering | 250 | 96 | 96 | 0 | -0.27 | 5 | Overall | 250 | 99 | 97 | -3 | -2.92 | 5 | Page Request | 250 | 2 | 1 | 0 | -17.20 | 5 | Rendering | 250 | 98 | 95 | -3 | -2.68 | 6 | Overall | 250 | 99 | 99 | 0 | -0.14 | 6 | Page Request | 250 | 2 | 2 | 0 | -16.49 | 6 | Rendering | 250 | 97 | 98 | 0 | 0.16 | 7 | Overall | 250 | 96 | 95 | -1 | -0.55 | 7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower 7 | Rendering | 250 | 95 | 94 | -1 | -1.19 | 8 | Overall | 250 | 92 | 92 | -1 | -0.69 | 8 | Page Request | 250 | 1 | 1 | 0 | -17.60 | 8 | Rendering | 250 | 91 | 91 | 0 | -0.52 | 9 | Overall | 250 | 112 | 112 | 0 | 0.29 | 9 | Page Request | 250 | 2 | 1 | 0 | -7.92 | 9 | Rendering | 250 | 110 | 111 | 0 | 0.37 | 10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster 10 | Page Request | 250 | 14 | 13 | 0 | -1.26 | 10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster 11 | Overall | 250 | 66 | 66 | -1 | -0.86 | 11 | Page Request | 250 | 1 | 1 | 0 | -16.48 | 11 | Rendering | 250 | 65 | 65 | 0 | -0.62 | 12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster 12 | Page Request | 250 | 2 | 2 | 0 | 12.93 | 12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster 13 | Overall | 250 | 48 | 47 | 0 | -0.45 | 13 | Page Request | 250 | 1 | 1 | 0 | 1.59 | 13 | Rendering | 250 | 47 | 46 | 0 | -0.52 | ``` Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial). There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 16:24:25 +02:00
if (end > this.progressiveDataLength) {
this.ensureRange(begin, end);
}
return this.bytes.subarray(begin, end);
}
makeSubStream(start, length, dict = null) {
if (length) {
Attempt to significantly reduce the number of `ChunkedStream.{ensureByte, ensureRange}` calls by inlining the `this.progressiveDataLength` checks at the call-sites The number of in particular `ChunkedStream.ensureByte` calls is often absolutely *huge* (on the order of million calls) when loading and rendering even moderately complicated PDF files, which isn't entirely surprising considering that the `getByte`/`getBytes`/`peekByte`/`peekBytes` methods are used for essentially all data reading/parsing. The idea implemented in this patch is to inline an inverted `progressiveDataLength` check at all of the `ensureByte`/`ensureRange` call-sites, which in practice will often result in *several* orders of magnitude fewer function calls. Obviously this patch will only help if the browser supports streaming, which all reasonably modern browsers now do (including the Firefox built-in PDF viewer), and assuming that the user didn't set the `disableStream` option (e.g. for using `disableAutoFetch`). However, I think we should be able to improve performance for the default out-of-the-box use case, without worrying about e.g. older browsers (where this patch will thus incur *one* additional check before calling `ensureByte`/`ensureRange`). This patch was inspired by the *first* commit in PR 5005, which was subsequently backed out in PR 5145 for causing regressions. Since the general idea of avoiding unnecessary function calls was really nice, I figured that re-attempting this in one way or another wouldn't be a bad idea. Given that streaming is now supported, which it wasn't back then, using `progressiveDataLength` seemed like an easier approach in general since it also allowed supporting both `ensureByte` and `ensureRange`. This sort of patch obviously needs data to back it up, hence I've benchmarked the changes using the following manifest file (with the default `tracemonkey` file): ``` [ { "id": "tracemonkey-eq", "file": "pdfs/tracemonkey.pdf", "md5": "9a192d8b1a7dc652a19835f6f08098bd", "rounds": 250, "type": "eq" } ] ``` I get the following complete results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 3500 | 140 | 134 | -6 | -4.46 | faster Firefox | Page Request | 3500 | 2 | 2 | 0 | -0.10 | Firefox | Rendering | 3500 | 138 | 131 | -6 | -4.54 | faster ``` Here it's pretty clear that the patch does have a positive net effect, even for a PDF file of fairly moderate size and complexity. However, in this case it's probably interesting to also look at the results per page: ``` -- Grouped By page, stat -- page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ---- | ------------ | ----- | ------------ | ----------- | --- | ------ | ------------- 0 | Overall | 250 | 74 | 75 | 1 | 0.69 | 0 | Page Request | 250 | 1 | 1 | 0 | 33.20 | 0 | Rendering | 250 | 73 | 74 | 0 | 0.25 | 1 | Overall | 250 | 123 | 121 | -2 | -1.87 | faster 1 | Page Request | 250 | 3 | 2 | 0 | -11.73 | 1 | Rendering | 250 | 121 | 119 | -2 | -1.67 | 2 | Overall | 250 | 64 | 63 | -1 | -1.91 | 2 | Page Request | 250 | 1 | 1 | 0 | 8.81 | 2 | Rendering | 250 | 63 | 62 | -1 | -2.13 | faster 3 | Overall | 250 | 97 | 97 | 0 | -0.06 | 3 | Page Request | 250 | 1 | 1 | 0 | 25.37 | 3 | Rendering | 250 | 96 | 95 | 0 | -0.34 | 4 | Overall | 250 | 97 | 97 | 0 | -0.38 | 4 | Page Request | 250 | 1 | 1 | 0 | -5.97 | 4 | Rendering | 250 | 96 | 96 | 0 | -0.27 | 5 | Overall | 250 | 99 | 97 | -3 | -2.92 | 5 | Page Request | 250 | 2 | 1 | 0 | -17.20 | 5 | Rendering | 250 | 98 | 95 | -3 | -2.68 | 6 | Overall | 250 | 99 | 99 | 0 | -0.14 | 6 | Page Request | 250 | 2 | 2 | 0 | -16.49 | 6 | Rendering | 250 | 97 | 98 | 0 | 0.16 | 7 | Overall | 250 | 96 | 95 | -1 | -0.55 | 7 | Page Request | 250 | 1 | 2 | 1 | 66.67 | slower 7 | Rendering | 250 | 95 | 94 | -1 | -1.19 | 8 | Overall | 250 | 92 | 92 | -1 | -0.69 | 8 | Page Request | 250 | 1 | 1 | 0 | -17.60 | 8 | Rendering | 250 | 91 | 91 | 0 | -0.52 | 9 | Overall | 250 | 112 | 112 | 0 | 0.29 | 9 | Page Request | 250 | 2 | 1 | 0 | -7.92 | 9 | Rendering | 250 | 110 | 111 | 0 | 0.37 | 10 | Overall | 250 | 589 | 522 | -67 | -11.38 | faster 10 | Page Request | 250 | 14 | 13 | 0 | -1.26 | 10 | Rendering | 250 | 575 | 508 | -67 | -11.62 | faster 11 | Overall | 250 | 66 | 66 | -1 | -0.86 | 11 | Page Request | 250 | 1 | 1 | 0 | -16.48 | 11 | Rendering | 250 | 65 | 65 | 0 | -0.62 | 12 | Overall | 250 | 303 | 291 | -12 | -4.07 | faster 12 | Page Request | 250 | 2 | 2 | 0 | 12.93 | 12 | Rendering | 250 | 301 | 289 | -13 | -4.19 | faster 13 | Overall | 250 | 48 | 47 | 0 | -0.45 | 13 | Page Request | 250 | 1 | 1 | 0 | 1.59 | 13 | Rendering | 250 | 47 | 46 | 0 | -0.52 | ``` Here it's clear that this patch *significantly* improves the rendering performance of the slowest pages, while not causing any big regressions elsewhere. As expected, this patch thus helps larger and/or more complex pages the most (which is also where even small improvements will be most beneficial). There's obviously the question if this is *slightly* regressing simpler pages, but given just how short the times are in most cases it's not inconceivable that the page results above are simply caused be e.g. limited `Date.now()` and/or limited numerical precision.
2019-07-18 16:24:25 +02:00
if (start + length > this.progressiveDataLength) {
this.ensureRange(start, start + length);
}
} else if (start >= this.progressiveDataLength) {
// When the `length` is undefined you do *not*, under any circumstances,
// want to fallback on calling `this.ensureRange(start, this.end)` since
// that would force the *entire* PDF file to be loaded, thus completely
// breaking the whole purpose of using streaming and/or range requests.
//
// However, not doing any checking here could very easily lead to wasted
// time/resources during e.g. parsing, since `MissingDataException`s will
// require data to be re-parsed, which we attempt to minimize by at least
// checking that the *beginning* of the data is available here.
this.ensureByte(start);
}
function ChunkedStreamSubstream() {}
ChunkedStreamSubstream.prototype = Object.create(this);
ChunkedStreamSubstream.prototype.getMissingChunks = function () {
const chunkSize = this.chunkSize;
const beginChunk = Math.floor(this.start / chunkSize);
const endChunk = Math.floor((this.end - 1) / chunkSize) + 1;
const missingChunks = [];
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
if (!this._loadedChunks.has(chunk)) {
missingChunks.push(chunk);
}
}
return missingChunks;
};
Object.defineProperty(ChunkedStreamSubstream.prototype, "isDataLoaded", {
get() {
if (this.numChunksLoaded === this.numChunks) {
return true;
}
return this.getMissingChunks().length === 0;
},
configurable: true,
});
const subStream = new ChunkedStreamSubstream();
subStream.pos = subStream.start = start;
subStream.end = start + length || this.end;
subStream.dict = dict;
return subStream;
}
getBaseStreams() {
return [this];
}
}
class ChunkedStreamManager {
constructor(pdfNetworkStream, args) {
this.length = args.length;
this.chunkSize = args.rangeChunkSize;
this.stream = new ChunkedStream(this.length, this.chunkSize, this);
this.pdfNetworkStream = pdfNetworkStream;
this.disableAutoFetch = args.disableAutoFetch;
this.msgHandler = args.msgHandler;
2013-02-06 15:19:29 -08:00
this.currRequestId = 0;
this._chunksNeededByRequest = new Map();
this._requestsByChunk = new Map();
this._promisesByRequest = new Map();
this.progressiveDataLength = 0;
this.aborted = false;
2013-02-06 15:19:29 -08:00
[api-minor] Replace the `PromiseCapability` with `Promise.withResolvers()` This replaces our custom `PromiseCapability`-class with the new native `Promise.withResolvers()` functionality, which does *almost* the same thing[1]; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/withResolvers The only difference is that `PromiseCapability` also had a `settled`-getter, which was however not widely used and the call-sites can either be removed or re-factored to avoid it. In particular: - In `src/display/api.js` we can tweak the `PDFObjects`-class to use a "special" initial data-value and just compare against that, in order to replace the `settled`-state. - In `web/app.js` we change the only case to manually track the `settled`-state, which should hopefully be OK given how this is being used. - In `web/pdf_outline_viewer.js` we can remove the `settled`-checks, since the code should work just fine without it. The only thing that could potentially happen is that we try to `resolve` a Promise multiple times, which is however *not* a problem since the value of a Promise cannot be changed once fulfilled or rejected. - In `web/pdf_viewer.js` we can remove the `settled`-checks, since the code should work fine without them: - For the `_onePageRenderedCapability` case the `settled`-check is used in a `EventBus`-listener which is *removed* on its first (valid) invocation. - For the `_pagesCapability` case the `settled`-check is used in a print-related helper that works just fine with "only" the other checks. - In `test/unit/api_spec.js` we can change the few relevant cases to manually track the `settled`-state, since this is both simple and *test-only* code. --- [1] In browsers/environments that lack native support, note [the compatibility data](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/withResolvers#browser_compatibility), it'll be polyfilled via the `core-js` library (but only in `legacy` builds).
2024-03-28 16:42:37 +01:00
this._loadedStreamCapability = Promise.withResolvers();
2013-02-06 15:19:29 -08:00
}
sendRequest(begin, end) {
const rangeReader = this.pdfNetworkStream.getRangeReader(begin, end);
if (!rangeReader.isStreamingSupported) {
rangeReader.onProgress = this.onProgress.bind(this);
}
let chunks = [],
loaded = 0;
return new Promise((resolve, reject) => {
const readChunk = ({ value, done }) => {
try {
if (done) {
const chunkData = arrayBuffersToBytes(chunks);
chunks = null;
resolve(chunkData);
return;
}
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
value instanceof ArrayBuffer,
"readChunk (sendRequest) - expected an ArrayBuffer."
);
}
loaded += value.byteLength;
if (rangeReader.isStreamingSupported) {
this.onProgress({ loaded });
}
chunks.push(value);
rangeReader.read().then(readChunk, reject);
} catch (e) {
reject(e);
2013-02-06 15:19:29 -08:00
}
};
rangeReader.read().then(readChunk, reject);
}).then(data => {
if (this.aborted) {
return; // Ignoring any data after abort.
}
this.onReceiveData({ chunk: data, begin });
});
}
2013-02-06 15:19:29 -08:00
/**
* Get all the chunks that are not yet loaded and group them into
* contiguous ranges to load in as few requests as possible.
*/
requestAllChunks(noFetch = false) {
if (!noFetch) {
const missingChunks = this.stream.getMissingChunks();
this._requestChunks(missingChunks);
}
return this._loadedStreamCapability.promise;
}
2013-02-06 15:19:29 -08:00
_requestChunks(chunks) {
const requestId = this.currRequestId++;
2013-02-06 15:19:29 -08:00
const chunksNeeded = new Set();
this._chunksNeededByRequest.set(requestId, chunksNeeded);
for (const chunk of chunks) {
if (!this.stream.hasChunk(chunk)) {
chunksNeeded.add(chunk);
2013-02-06 15:19:29 -08:00
}
}
2013-02-06 15:19:29 -08:00
if (chunksNeeded.size === 0) {
return Promise.resolve();
}
2013-02-06 15:19:29 -08:00
[api-minor] Replace the `PromiseCapability` with `Promise.withResolvers()` This replaces our custom `PromiseCapability`-class with the new native `Promise.withResolvers()` functionality, which does *almost* the same thing[1]; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/withResolvers The only difference is that `PromiseCapability` also had a `settled`-getter, which was however not widely used and the call-sites can either be removed or re-factored to avoid it. In particular: - In `src/display/api.js` we can tweak the `PDFObjects`-class to use a "special" initial data-value and just compare against that, in order to replace the `settled`-state. - In `web/app.js` we change the only case to manually track the `settled`-state, which should hopefully be OK given how this is being used. - In `web/pdf_outline_viewer.js` we can remove the `settled`-checks, since the code should work just fine without it. The only thing that could potentially happen is that we try to `resolve` a Promise multiple times, which is however *not* a problem since the value of a Promise cannot be changed once fulfilled or rejected. - In `web/pdf_viewer.js` we can remove the `settled`-checks, since the code should work fine without them: - For the `_onePageRenderedCapability` case the `settled`-check is used in a `EventBus`-listener which is *removed* on its first (valid) invocation. - For the `_pagesCapability` case the `settled`-check is used in a print-related helper that works just fine with "only" the other checks. - In `test/unit/api_spec.js` we can change the few relevant cases to manually track the `settled`-state, since this is both simple and *test-only* code. --- [1] In browsers/environments that lack native support, note [the compatibility data](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/withResolvers#browser_compatibility), it'll be polyfilled via the `core-js` library (but only in `legacy` builds).
2024-03-28 16:42:37 +01:00
const capability = Promise.withResolvers();
this._promisesByRequest.set(requestId, capability);
2013-02-06 15:19:29 -08:00
const chunksToRequest = [];
for (const chunk of chunksNeeded) {
let requestIds = this._requestsByChunk.get(chunk);
if (!requestIds) {
requestIds = [];
this._requestsByChunk.set(chunk, requestIds);
chunksToRequest.push(chunk);
2013-02-06 15:19:29 -08:00
}
requestIds.push(requestId);
}
if (chunksToRequest.length > 0) {
const groupedChunksToRequest = this.groupChunks(chunksToRequest);
for (const groupedChunk of groupedChunksToRequest) {
const begin = groupedChunk.beginChunk * this.chunkSize;
const end = Math.min(
groupedChunk.endChunk * this.chunkSize,
this.length
);
this.sendRequest(begin, end).catch(capability.reject);
}
}
return capability.promise.catch(reason => {
if (this.aborted) {
return; // Ignoring any pending requests after abort.
}
throw reason;
});
}
getStream() {
return this.stream;
}
/**
* Loads any chunks in the requested range that are not yet loaded.
*/
requestRange(begin, end) {
end = Math.min(end, this.length);
const beginChunk = this.getBeginChunk(begin);
const endChunk = this.getEndChunk(end);
const chunks = [];
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
chunks.push(chunk);
}
return this._requestChunks(chunks);
}
requestRanges(ranges = []) {
const chunksToRequest = [];
for (const range of ranges) {
const beginChunk = this.getBeginChunk(range.begin);
const endChunk = this.getEndChunk(range.end);
for (let chunk = beginChunk; chunk < endChunk; ++chunk) {
if (!chunksToRequest.includes(chunk)) {
chunksToRequest.push(chunk);
}
}
}
chunksToRequest.sort(function (a, b) {
return a - b;
});
return this._requestChunks(chunksToRequest);
}
2013-02-06 15:19:29 -08:00
/**
* Groups a sorted array of chunks into as few contiguous larger
* chunks as possible.
*/
groupChunks(chunks) {
const groupedChunks = [];
let beginChunk = -1;
let prevChunk = -1;
2013-02-06 15:19:29 -08:00
for (let i = 0, ii = chunks.length; i < ii; ++i) {
const chunk = chunks[i];
if (beginChunk < 0) {
beginChunk = chunk;
2013-02-06 15:19:29 -08:00
}
if (prevChunk >= 0 && prevChunk + 1 !== chunk) {
groupedChunks.push({ beginChunk, endChunk: prevChunk + 1 });
beginChunk = chunk;
}
if (i + 1 === chunks.length) {
groupedChunks.push({ beginChunk, endChunk: chunk + 1 });
2013-02-06 15:19:29 -08:00
}
prevChunk = chunk;
}
return groupedChunks;
}
2013-02-06 15:19:29 -08:00
onProgress(args) {
this.msgHandler.send("DocProgress", {
loaded: this.stream.numChunksLoaded * this.chunkSize + args.loaded,
total: this.length,
});
}
onReceiveData(args) {
const chunk = args.chunk;
const isProgressive = args.begin === undefined;
const begin = isProgressive ? this.progressiveDataLength : args.begin;
const end = begin + chunk.byteLength;
2013-02-06 15:19:29 -08:00
const beginChunk = Math.floor(begin / this.chunkSize);
const endChunk =
end < this.length
? Math.floor(end / this.chunkSize)
: Math.ceil(end / this.chunkSize);
if (isProgressive) {
this.stream.onReceiveProgressiveData(chunk);
this.progressiveDataLength = end;
} else {
this.stream.onReceiveData(begin, chunk);
}
if (this.stream.isDataLoaded) {
this._loadedStreamCapability.resolve(this.stream);
}
const loadedRequests = [];
for (let curChunk = beginChunk; curChunk < endChunk; ++curChunk) {
// The server might return more chunks than requested.
const requestIds = this._requestsByChunk.get(curChunk);
if (!requestIds) {
continue;
}
this._requestsByChunk.delete(curChunk);
for (const requestId of requestIds) {
const chunksNeeded = this._chunksNeededByRequest.get(requestId);
if (chunksNeeded.has(curChunk)) {
chunksNeeded.delete(curChunk);
2013-02-06 15:19:29 -08:00
}
if (chunksNeeded.size > 0) {
continue;
2013-02-06 15:19:29 -08:00
}
loadedRequests.push(requestId);
}
}
// If there are no pending requests, automatically fetch the next
// unfetched chunk of the PDF file.
if (!this.disableAutoFetch && this._requestsByChunk.size === 0) {
let nextEmptyChunk;
if (this.stream.numChunksLoaded === 1) {
// This is a special optimization so that after fetching the first
// chunk, rather than fetching the second chunk, we fetch the last
// chunk.
const lastChunk = this.stream.numChunks - 1;
if (!this.stream.hasChunk(lastChunk)) {
nextEmptyChunk = lastChunk;
2013-02-06 15:19:29 -08:00
}
} else {
nextEmptyChunk = this.stream.nextEmptyChunk(endChunk);
2013-02-06 15:19:29 -08:00
}
if (Number.isInteger(nextEmptyChunk)) {
this._requestChunks([nextEmptyChunk]);
2013-02-06 15:19:29 -08:00
}
}
2013-02-06 15:19:29 -08:00
for (const requestId of loadedRequests) {
const capability = this._promisesByRequest.get(requestId);
this._promisesByRequest.delete(requestId);
capability.resolve();
}
this.msgHandler.send("DocProgress", {
loaded: this.stream.numChunksLoaded * this.chunkSize,
total: this.length,
});
}
2013-02-06 15:19:29 -08:00
onError(err) {
this._loadedStreamCapability.reject(err);
}
getBeginChunk(begin) {
return Math.floor(begin / this.chunkSize);
}
getEndChunk(end) {
return Math.floor((end - 1) / this.chunkSize) + 1;
}
abort(reason) {
this.aborted = true;
this.pdfNetworkStream?.cancelAllRequests(reason);
for (const capability of this._promisesByRequest.values()) {
capability.reject(reason);
}
}
}
export { ChunkedStream, ChunkedStreamManager };