mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 10:08:06 +02:00
*Please note:* These changes will primarily benefit longer documents, somewhat at the expense of e.g. one-page documents.
The existing `PDFDocumentProxy.getStats` function, which in the default viewer is called for each rendered page, requires a round-trip to the worker-thread in order to obtain the current document stats. In the default viewer, we currently make one such API-call for *every rendered* page.
This patch proposes replacing that method with a *synchronous* `PDFDocumentProxy.stats` getter instead, combined with re-factoring the worker-thread code by adding a `DocStats`-class to track Stream/Font-types and *only send* them to the main-thread *the first time* that a type is encountered.
Note that in practice most PDF documents only use a fairly limited number of Stream/Font-types, which means that in longer documents most of the `PDFDocumentProxy.getStats`-calls will return the same data.[1]
This re-factoring will obviously benefit longer document the most[2], and could actually be seen as a regression for one-page documents, since in practice there'll usually be a couple of "DocStats" messages sent during the parsing of the first page. However, if the user zooms/rotates the document (which causes re-rendering), note that even a one-page document would start to benefit from these changes.
Another benefit of having the data available/cached in the API is that unless the document stats change during parsing, repeated `PDFDocumentProxy.stats`-calls will return *the same identical* object.
This is something that we can easily take advantage of in the default viewer, by now *only* reporting "documentStats" telemetry[3] when the data actually have changed rather than once per rendered page (again beneficial in longer documents).
---
[1] Furthermore, the maximium number of `StreamType`/`FontType` are `10` respectively `12`, which means that regardless of the complexity and page count in a PDF document there'll never be more than twenty-two "DocStats" messages sent; see 41ac3f0c07/src/shared/util.js (L206-L232)
[2] One example is the `pdf.pdf` document in the test-suite, where rendering all of its 1310 pages only result in a total of seven "DocStats" messages being sent from the worker-thread.
[3] Reporting telemetry, in Firefox, includes using `JSON.stringify` on the data and then sending an event to the `PdfStreamConverter.jsm`-code.
In that code the event is handled and `JSON.parse` is used to retrieve the data, and in the "documentStats"-case we'll then iterate through the data to avoid double-reporting telemetry; see https://searchfox.org/mozilla-central/rev/8f4c180b87e52f3345ef8a3432d6e54bd1eb18dc/toolkit/components/pdfjs/content/PdfStreamConverter.jsm#515-549
226 lines
5.2 KiB
JavaScript
226 lines
5.2 KiB
JavaScript
/* Copyright 2012 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js";
|
|
import { ChunkedStreamManager } from "./chunked_stream.js";
|
|
import { MissingDataException } from "./core_utils.js";
|
|
import { PDFDocument } from "./document.js";
|
|
import { Stream } from "./stream.js";
|
|
|
|
function parseDocBaseUrl(url) {
|
|
if (url) {
|
|
const absoluteUrl = createValidAbsoluteUrl(url);
|
|
if (absoluteUrl) {
|
|
return absoluteUrl.href;
|
|
}
|
|
warn(`Invalid absolute docBaseUrl: "${url}".`);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
class BasePdfManager {
|
|
constructor() {
|
|
if (this.constructor === BasePdfManager) {
|
|
unreachable("Cannot initialize BasePdfManager.");
|
|
}
|
|
}
|
|
|
|
get docId() {
|
|
return this._docId;
|
|
}
|
|
|
|
get password() {
|
|
return this._password;
|
|
}
|
|
|
|
get docBaseUrl() {
|
|
return this._docBaseUrl;
|
|
}
|
|
|
|
onLoadedStream() {
|
|
unreachable("Abstract method `onLoadedStream` called");
|
|
}
|
|
|
|
ensureDoc(prop, args) {
|
|
return this.ensure(this.pdfDocument, prop, args);
|
|
}
|
|
|
|
ensureXRef(prop, args) {
|
|
return this.ensure(this.pdfDocument.xref, prop, args);
|
|
}
|
|
|
|
ensureCatalog(prop, args) {
|
|
return this.ensure(this.pdfDocument.catalog, prop, args);
|
|
}
|
|
|
|
getPage(pageIndex) {
|
|
return this.pdfDocument.getPage(pageIndex);
|
|
}
|
|
|
|
fontFallback(id, handler) {
|
|
return this.pdfDocument.fontFallback(id, handler);
|
|
}
|
|
|
|
loadXfaFonts(handler, task) {
|
|
return this.pdfDocument.loadXfaFonts(handler, task);
|
|
}
|
|
|
|
loadXfaImages() {
|
|
return this.pdfDocument.loadXfaImages();
|
|
}
|
|
|
|
serializeXfaData(annotationStorage) {
|
|
return this.pdfDocument.serializeXfaData(annotationStorage);
|
|
}
|
|
|
|
cleanup(manuallyTriggered = false) {
|
|
return this.pdfDocument.cleanup(manuallyTriggered);
|
|
}
|
|
|
|
async ensure(obj, prop, args) {
|
|
unreachable("Abstract method `ensure` called");
|
|
}
|
|
|
|
requestRange(begin, end) {
|
|
unreachable("Abstract method `requestRange` called");
|
|
}
|
|
|
|
requestLoadedStream() {
|
|
unreachable("Abstract method `requestLoadedStream` called");
|
|
}
|
|
|
|
sendProgressiveData(chunk) {
|
|
unreachable("Abstract method `sendProgressiveData` called");
|
|
}
|
|
|
|
updatePassword(password) {
|
|
this._password = password;
|
|
}
|
|
|
|
terminate(reason) {
|
|
unreachable("Abstract method `terminate` called");
|
|
}
|
|
}
|
|
|
|
class LocalPdfManager extends BasePdfManager {
|
|
constructor(
|
|
docId,
|
|
data,
|
|
password,
|
|
msgHandler,
|
|
evaluatorOptions,
|
|
enableXfa,
|
|
docBaseUrl
|
|
) {
|
|
super();
|
|
|
|
this._docId = docId;
|
|
this._password = password;
|
|
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
|
|
this.msgHandler = msgHandler;
|
|
this.evaluatorOptions = evaluatorOptions;
|
|
this.enableXfa = enableXfa;
|
|
|
|
const stream = new Stream(data);
|
|
this.pdfDocument = new PDFDocument(this, stream);
|
|
this._loadedStreamPromise = Promise.resolve(stream);
|
|
}
|
|
|
|
async ensure(obj, prop, args) {
|
|
const value = obj[prop];
|
|
if (typeof value === "function") {
|
|
return value.apply(obj, args);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
requestRange(begin, end) {
|
|
return Promise.resolve();
|
|
}
|
|
|
|
requestLoadedStream() {}
|
|
|
|
onLoadedStream() {
|
|
return this._loadedStreamPromise;
|
|
}
|
|
|
|
terminate(reason) {}
|
|
}
|
|
|
|
class NetworkPdfManager extends BasePdfManager {
|
|
constructor(
|
|
docId,
|
|
pdfNetworkStream,
|
|
args,
|
|
evaluatorOptions,
|
|
enableXfa,
|
|
docBaseUrl
|
|
) {
|
|
super();
|
|
|
|
this._docId = docId;
|
|
this._password = args.password;
|
|
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
|
|
this.msgHandler = args.msgHandler;
|
|
this.evaluatorOptions = evaluatorOptions;
|
|
this.enableXfa = enableXfa;
|
|
|
|
this.streamManager = new ChunkedStreamManager(pdfNetworkStream, {
|
|
msgHandler: args.msgHandler,
|
|
length: args.length,
|
|
disableAutoFetch: args.disableAutoFetch,
|
|
rangeChunkSize: args.rangeChunkSize,
|
|
});
|
|
this.pdfDocument = new PDFDocument(this, this.streamManager.getStream());
|
|
}
|
|
|
|
async ensure(obj, prop, args) {
|
|
try {
|
|
const value = obj[prop];
|
|
if (typeof value === "function") {
|
|
return value.apply(obj, args);
|
|
}
|
|
return value;
|
|
} catch (ex) {
|
|
if (!(ex instanceof MissingDataException)) {
|
|
throw ex;
|
|
}
|
|
await this.requestRange(ex.begin, ex.end);
|
|
return this.ensure(obj, prop, args);
|
|
}
|
|
}
|
|
|
|
requestRange(begin, end) {
|
|
return this.streamManager.requestRange(begin, end);
|
|
}
|
|
|
|
requestLoadedStream() {
|
|
this.streamManager.requestAllChunks();
|
|
}
|
|
|
|
sendProgressiveData(chunk) {
|
|
this.streamManager.onReceiveData({ chunk });
|
|
}
|
|
|
|
onLoadedStream() {
|
|
return this.streamManager.onLoadedStream();
|
|
}
|
|
|
|
terminate(reason) {
|
|
this.streamManager.abort(reason);
|
|
}
|
|
}
|
|
|
|
export { LocalPdfManager, NetworkPdfManager };
|