mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-26 01:58:06 +02:00
[api-minor] Replace PDFDocumentProxy.getStats
with a synchronous PDFDocumentProxy.stats
getter
*Please note:* These changes will primarily benefit longer documents, somewhat at the expense of e.g. one-page documents.
The existing `PDFDocumentProxy.getStats` function, which in the default viewer is called for each rendered page, requires a round-trip to the worker-thread in order to obtain the current document stats. In the default viewer, we currently make one such API-call for *every rendered* page.
This patch proposes replacing that method with a *synchronous* `PDFDocumentProxy.stats` getter instead, combined with re-factoring the worker-thread code by adding a `DocStats`-class to track Stream/Font-types and *only send* them to the main-thread *the first time* that a type is encountered.
Note that in practice most PDF documents only use a fairly limited number of Stream/Font-types, which means that in longer documents most of the `PDFDocumentProxy.getStats`-calls will return the same data.[1]
This re-factoring will obviously benefit longer document the most[2], and could actually be seen as a regression for one-page documents, since in practice there'll usually be a couple of "DocStats" messages sent during the parsing of the first page. However, if the user zooms/rotates the document (which causes re-rendering), note that even a one-page document would start to benefit from these changes.
Another benefit of having the data available/cached in the API is that unless the document stats change during parsing, repeated `PDFDocumentProxy.stats`-calls will return *the same identical* object.
This is something that we can easily take advantage of in the default viewer, by now *only* reporting "documentStats" telemetry[3] when the data actually have changed rather than once per rendered page (again beneficial in longer documents).
---
[1] Furthermore, the maximium number of `StreamType`/`FontType` are `10` respectively `12`, which means that regardless of the complexity and page count in a PDF document there'll never be more than twenty-two "DocStats" messages sent; see 41ac3f0c07/src/shared/util.js (L206-L232)
[2] One example is the `pdf.pdf` document in the test-suite, where rendering all of its 1310 pages only result in a total of seven "DocStats" messages being sent from the worker-thread.
[3] Reporting telemetry, in Firefox, includes using `JSON.stringify` on the data and then sending an event to the `PdfStreamConverter.jsm`-code.
In that code the event is handled and `JSON.parse` is used to retrieve the data, and in the "documentStats"-case we'll then iterate through the data to avoid double-reporting telemetry; see https://searchfox.org/mozilla-central/rev/8f4c180b87e52f3345ef8a3432d6e54bd1eb18dc/toolkit/components/pdfjs/content/PdfStreamConverter.jsm#515-549
This commit is contained in:
parent
41ac3f0c07
commit
6da0944fc7
10 changed files with 158 additions and 67 deletions
|
@ -16,7 +16,9 @@
|
|||
import {
|
||||
assert,
|
||||
BaseException,
|
||||
FontType,
|
||||
objectSize,
|
||||
StreamType,
|
||||
stringToPDFString,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
|
@ -76,6 +78,55 @@ class XRefParseException extends BaseException {
|
|||
}
|
||||
}
|
||||
|
||||
class DocStats {
|
||||
constructor(handler) {
|
||||
this._handler = handler;
|
||||
|
||||
this._streamTypes = new Set();
|
||||
this._fontTypes = new Set();
|
||||
}
|
||||
|
||||
_send() {
|
||||
const streamTypes = Object.create(null),
|
||||
fontTypes = Object.create(null);
|
||||
for (const type of this._streamTypes) {
|
||||
streamTypes[type] = true;
|
||||
}
|
||||
for (const type of this._fontTypes) {
|
||||
fontTypes[type] = true;
|
||||
}
|
||||
this._handler.send("DocStats", { streamTypes, fontTypes });
|
||||
}
|
||||
|
||||
addStreamType(type) {
|
||||
if (
|
||||
typeof PDFJSDev === "undefined" ||
|
||||
PDFJSDev.test("!PRODUCTION || TESTING")
|
||||
) {
|
||||
assert(StreamType[type] === type, 'addStreamType: Invalid "type" value.');
|
||||
}
|
||||
if (this._streamTypes.has(type)) {
|
||||
return;
|
||||
}
|
||||
this._streamTypes.add(type);
|
||||
this._send();
|
||||
}
|
||||
|
||||
addFontType(type) {
|
||||
if (
|
||||
typeof PDFJSDev === "undefined" ||
|
||||
PDFJSDev.test("!PRODUCTION || TESTING")
|
||||
) {
|
||||
assert(FontType[type] === type, 'addFontType: Invalid "type" value.');
|
||||
}
|
||||
if (this._fontTypes.has(type)) {
|
||||
return;
|
||||
}
|
||||
this._fontTypes.add(type);
|
||||
this._send();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value of an inheritable property.
|
||||
*
|
||||
|
@ -481,6 +532,7 @@ function recoverJsURL(str) {
|
|||
|
||||
export {
|
||||
collectActions,
|
||||
DocStats,
|
||||
encodeToXmlString,
|
||||
escapePDFName,
|
||||
getArrayLookupTableFactory,
|
||||
|
|
|
@ -1248,8 +1248,7 @@ class PartialEvaluator {
|
|||
this.translateFont(preEvaluatedFont)
|
||||
.then(translatedFont => {
|
||||
if (translatedFont.fontType !== undefined) {
|
||||
const xrefFontStats = xref.stats.fontTypes;
|
||||
xrefFontStats[translatedFont.fontType] = true;
|
||||
xref.stats.addFontType(translatedFont.fontType);
|
||||
}
|
||||
|
||||
fontCapability.resolve(
|
||||
|
@ -1277,8 +1276,9 @@ class PartialEvaluator {
|
|||
preEvaluatedFont.type,
|
||||
subtype && subtype.name
|
||||
);
|
||||
const xrefFontStats = xref.stats.fontTypes;
|
||||
xrefFontStats[fontType] = true;
|
||||
if (fontType !== undefined) {
|
||||
xref.stats.addFontType(fontType);
|
||||
}
|
||||
} catch (ex) {}
|
||||
|
||||
fontCapability.resolve(
|
||||
|
|
|
@ -741,13 +741,13 @@ class Parser {
|
|||
warn(`Empty "${name}" stream.`);
|
||||
return new NullStream();
|
||||
}
|
||||
const xrefStats = this.xref.stats;
|
||||
|
||||
try {
|
||||
const xrefStreamStats = this.xref.stats.streamTypes;
|
||||
switch (name) {
|
||||
case "Fl":
|
||||
case "FlateDecode":
|
||||
xrefStreamStats[StreamType.FLATE] = true;
|
||||
xrefStats.addStreamType(StreamType.FLATE);
|
||||
if (params) {
|
||||
return new PredictorStream(
|
||||
new FlateStream(stream, maybeLength),
|
||||
|
@ -758,7 +758,7 @@ class Parser {
|
|||
return new FlateStream(stream, maybeLength);
|
||||
case "LZW":
|
||||
case "LZWDecode":
|
||||
xrefStreamStats[StreamType.LZW] = true;
|
||||
xrefStats.addStreamType(StreamType.LZW);
|
||||
let earlyChange = 1;
|
||||
if (params) {
|
||||
if (params.has("EarlyChange")) {
|
||||
|
@ -773,30 +773,30 @@ class Parser {
|
|||
return new LZWStream(stream, maybeLength, earlyChange);
|
||||
case "DCT":
|
||||
case "DCTDecode":
|
||||
xrefStreamStats[StreamType.DCT] = true;
|
||||
xrefStats.addStreamType(StreamType.DCT);
|
||||
return new JpegStream(stream, maybeLength, params);
|
||||
case "JPX":
|
||||
case "JPXDecode":
|
||||
xrefStreamStats[StreamType.JPX] = true;
|
||||
xrefStats.addStreamType(StreamType.JPX);
|
||||
return new JpxStream(stream, maybeLength, params);
|
||||
case "A85":
|
||||
case "ASCII85Decode":
|
||||
xrefStreamStats[StreamType.A85] = true;
|
||||
xrefStats.addStreamType(StreamType.A85);
|
||||
return new Ascii85Stream(stream, maybeLength);
|
||||
case "AHx":
|
||||
case "ASCIIHexDecode":
|
||||
xrefStreamStats[StreamType.AHX] = true;
|
||||
xrefStats.addStreamType(StreamType.AHX);
|
||||
return new AsciiHexStream(stream, maybeLength);
|
||||
case "CCF":
|
||||
case "CCITTFaxDecode":
|
||||
xrefStreamStats[StreamType.CCF] = true;
|
||||
xrefStats.addStreamType(StreamType.CCF);
|
||||
return new CCITTFaxStream(stream, maybeLength, params);
|
||||
case "RL":
|
||||
case "RunLengthDecode":
|
||||
xrefStreamStats[StreamType.RLX] = true;
|
||||
xrefStats.addStreamType(StreamType.RLX);
|
||||
return new RunLengthStream(stream, maybeLength);
|
||||
case "JBIG2Decode":
|
||||
xrefStreamStats[StreamType.JBIG] = true;
|
||||
xrefStats.addStreamType(StreamType.JBIG);
|
||||
return new Jbig2Stream(stream, maybeLength, params);
|
||||
}
|
||||
warn(`Filter "${name}" is not supported.`);
|
||||
|
|
|
@ -115,12 +115,21 @@ class BasePdfManager {
|
|||
}
|
||||
|
||||
class LocalPdfManager extends BasePdfManager {
|
||||
constructor(docId, data, password, evaluatorOptions, enableXfa, docBaseUrl) {
|
||||
constructor(
|
||||
docId,
|
||||
data,
|
||||
password,
|
||||
msgHandler,
|
||||
evaluatorOptions,
|
||||
enableXfa,
|
||||
docBaseUrl
|
||||
) {
|
||||
super();
|
||||
|
||||
this._docId = docId;
|
||||
this._password = password;
|
||||
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
|
||||
this.msgHandler = msgHandler;
|
||||
this.evaluatorOptions = evaluatorOptions;
|
||||
this.enableXfa = enableXfa;
|
||||
|
||||
|
|
|
@ -215,6 +215,7 @@ class WorkerMessageHandler {
|
|||
docId,
|
||||
source.data,
|
||||
source.password,
|
||||
handler,
|
||||
evaluatorOptions,
|
||||
enableXfa,
|
||||
docBaseUrl
|
||||
|
@ -287,6 +288,7 @@ class WorkerMessageHandler {
|
|||
docId,
|
||||
pdfFile,
|
||||
source.password,
|
||||
handler,
|
||||
evaluatorOptions,
|
||||
enableXfa,
|
||||
docBaseUrl
|
||||
|
@ -532,10 +534,6 @@ class WorkerMessageHandler {
|
|||
});
|
||||
});
|
||||
|
||||
handler.on("GetStats", function wphSetupGetStats(data) {
|
||||
return pdfManager.ensureXRef("stats");
|
||||
});
|
||||
|
||||
handler.on("GetAnnotations", function ({ pageIndex, intent }) {
|
||||
return pdfManager.getPage(pageIndex).then(function (page) {
|
||||
return page.getAnnotationsData(intent);
|
||||
|
|
|
@ -30,13 +30,14 @@ import {
|
|||
isStream,
|
||||
Ref,
|
||||
} from "./primitives.js";
|
||||
import { Lexer, Parser } from "./parser.js";
|
||||
import {
|
||||
DocStats,
|
||||
MissingDataException,
|
||||
ParserEOFException,
|
||||
XRefEntryException,
|
||||
XRefParseException,
|
||||
} from "./core_utils.js";
|
||||
import { Lexer, Parser } from "./parser.js";
|
||||
import { CipherTransformFactory } from "./crypto.js";
|
||||
|
||||
class XRef {
|
||||
|
@ -46,10 +47,7 @@ class XRef {
|
|||
this.entries = [];
|
||||
this.xrefstms = Object.create(null);
|
||||
this._cacheMap = new Map(); // Prepare the XRef cache.
|
||||
this.stats = {
|
||||
streamTypes: Object.create(null),
|
||||
fontTypes: Object.create(null),
|
||||
};
|
||||
this.stats = new DocStats(pdfManager.msgHandler);
|
||||
this._newRefNum = null;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue