1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

[api-minor] Replace PDFDocumentProxy.getStats with a synchronous PDFDocumentProxy.stats getter

*Please note:* These changes will primarily benefit longer documents, somewhat at the expense of e.g. one-page documents.

The existing `PDFDocumentProxy.getStats` function, which in the default viewer is called for each rendered page, requires a round-trip to the worker-thread in order to obtain the current document stats. In the default viewer, we currently make one such API-call for *every rendered* page.
This patch proposes replacing that method with a *synchronous* `PDFDocumentProxy.stats` getter instead, combined with re-factoring the worker-thread code by adding a `DocStats`-class to track Stream/Font-types and *only send* them to the main-thread *the first time* that a type is encountered.

Note that in practice most PDF documents only use a fairly limited number of Stream/Font-types, which means that in longer documents most of the `PDFDocumentProxy.getStats`-calls will return the same data.[1]
This re-factoring will obviously benefit longer document the most[2], and could actually be seen as a regression for one-page documents, since in practice there'll usually be a couple of "DocStats" messages sent during the parsing of the first page. However, if the user zooms/rotates the document (which causes re-rendering), note that even a one-page document would start to benefit from these changes.

Another benefit of having the data available/cached in the API is that unless the document stats change during parsing, repeated `PDFDocumentProxy.stats`-calls will return *the same identical* object.
This is something that we can easily take advantage of in the default viewer, by now *only* reporting "documentStats" telemetry[3] when the data actually have changed rather than once per rendered page (again beneficial in longer documents).

---
[1] Furthermore, the maximium number of `StreamType`/`FontType` are `10` respectively `12`, which means that regardless of the complexity and page count in a PDF document there'll never be more than twenty-two "DocStats" messages sent; see 41ac3f0c07/src/shared/util.js (L206-L232)

[2] One example is the `pdf.pdf` document in the test-suite, where rendering all of its 1310 pages only result in a total of seven "DocStats" messages being sent from the worker-thread.

[3] Reporting telemetry, in Firefox, includes using `JSON.stringify` on the data and then sending an event to the `PdfStreamConverter.jsm`-code.
In that code the event is handled and `JSON.parse` is used to retrieve the data, and in the "documentStats"-case we'll then iterate through the data to avoid double-reporting telemetry; see https://searchfox.org/mozilla-central/rev/8f4c180b87e52f3345ef8a3432d6e54bd1eb18dc/toolkit/components/pdfjs/content/PdfStreamConverter.jsm#515-549
This commit is contained in:
Jonas Jenwald 2021-11-11 18:14:26 +01:00
parent 41ac3f0c07
commit 6da0944fc7
10 changed files with 158 additions and 67 deletions

View file

@ -1275,8 +1275,8 @@ describe("api", function () {
});
it("gets document stats", async function () {
const stats = await pdfDocument.getStats();
expect(stats).toEqual({ streamTypes: {}, fontTypes: {} });
const stats = pdfDocument.stats;
expect(stats).toEqual(null);
});
it("cleans up document resources", async function () {
@ -2021,15 +2021,16 @@ sozialökonomische Gerechtigkeit.`)
});
it("gets document stats after parsing page", async function () {
const stats = await page.getOperatorList().then(function () {
return pdfDocument.getStats();
});
await page.getOperatorList();
const stats = pdfDocument.stats;
const expectedStreamTypes = {};
expectedStreamTypes[StreamType.FLATE] = true;
const expectedFontTypes = {};
expectedFontTypes[FontType.TYPE1STANDARD] = true;
expectedFontTypes[FontType.CIDFONTTYPE2] = true;
const expectedStreamTypes = {
[StreamType.FLATE]: true,
};
const expectedFontTypes = {
[FontType.TYPE1STANDARD]: true,
[FontType.CIDFONTTYPE2]: true,
};
expect(stats).toEqual({
streamTypes: expectedStreamTypes,

View file

@ -16,6 +16,7 @@
import { isRef, Ref } from "../../src/core/primitives.js";
import { Page, PDFDocument } from "../../src/core/document.js";
import { assert } from "../../src/shared/util.js";
import { DocStats } from "../../src/core/core_utils.js";
import { isNodeJS } from "../../src/shared/is_node.js";
import { StringStream } from "../../src/core/stream.js";
@ -76,10 +77,7 @@ function buildGetDocumentParams(filename, options) {
class XRefMock {
constructor(array) {
this._map = Object.create(null);
this.stats = {
streamTypes: Object.create(null),
fontTypes: Object.create(null),
};
this.stats = new DocStats({ send: () => {} });
this._newRefNum = null;
for (const key in array) {