1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

[api-minor] Enable transferring of TypedArray PDF data by default (PR 15908 follow-up)

This patch removes the recently introduced `transferPdfData` API-option, and simply enables transferring of TypedArray data *by default* instead of copying it. This will help reduce main-thread memory usage, however it will take ownership of the TypedArrays. Currently this only applies to the following cases:
 - TypedArrays passed to the `getDocument`-function in the API, in order to open PDF documents from binary data.
 - TypedArrays passed to a `PDFDataRangeTransport`-instance, used to support custom PDF document fetching/loading (see e.g. the Firefox PDF Viewer).

*PLEASE NOTE:* To avoid being affected by this, please simply *copy* any TypedArray data before passing it to either of the functions/methods mentioned above.

Now that we transfer TypedArray data that we previously only copied, we need to be more careful with input validation. Given how the `{IPDFStreamReader, IPDFStreamRangeReader}.read` methods will always return ArrayBuffer data, which is then transferred to the worker-thread[1], the actual TypedArray data passed to the API thus need to have the same exact size as its underlying ArrayBuffer to prevent issues.
Hence we'll check for this and only allow transferring of *safe* TypedArray data, and fallback to simply copying the data just as before. This obviously shouldn't be an issue in the Firefox PDF Viewer, but for the general PDF.js library we need to be more careful here.

---
[1] See e09ad99973/src/display/api.js (L2492-L2506) respectively e09ad99973/src/display/api.js (L2578-L2590)
This commit is contained in:
Jonas Jenwald 2023-01-13 11:16:28 +01:00
parent 99cfab18c1
commit 397f943ca3
4 changed files with 83 additions and 109 deletions

View file

@ -139,8 +139,12 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
* @typedef {Object} DocumentInitParameters
* @property {string | URL} [url] - The URL of the PDF.
* @property {BinaryData} [data] - Binary PDF data.
* Use typed arrays (Uint8Array) to improve the memory usage. If PDF data is
* Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is
* BASE64-encoded, use `atob()` to convert it to a binary string first.
*
* NOTE: If TypedArrays are used they will generally be transferred to the
* worker-thread. This will help reduce main-thread memory usage, however
* it will take ownership of the TypedArrays.
* @property {Object} [httpHeaders] - Basic authentication headers.
* @property {boolean} [withCredentials] - Indicates whether or not
* cross-site Access-Control requests should be made using credentials such
@ -189,12 +193,6 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION")) {
* @property {number} [maxImageSize] - The maximum allowed image size in total
* pixels, i.e. width * height. Images above this value will not be rendered.
* Use -1 for no limit, which is also the default value.
* @property {boolean} [transferPdfData] - Determines if we can transfer
* TypedArrays used for loading the PDF file, utilized together with:
* - The `data`-option, for the `getDocument` function.
* - The `PDFDataTransportStream` implementation.
* This will help reduce main-thread memory usage, however it will take
* ownership of the TypedArrays. The default value is `false`.
* @property {boolean} [isEvalSupported] - Determines if we can evaluate strings
* as JavaScript. Primarily used to improve performance of font rendering, and
* when parsing PDF functions. The default value is `true`.
@ -317,8 +315,14 @@ function getDocument(src) {
val instanceof Buffer // eslint-disable-line no-undef
) {
params[key] = new Uint8Array(val);
} else if (val instanceof Uint8Array) {
break; // Use the data as-is when it's already a Uint8Array.
} else if (
val instanceof Uint8Array &&
val.byteLength === val.buffer.byteLength
) {
// Use the data as-is when it's already a Uint8Array that completely
// "utilizes" its underlying ArrayBuffer, to prevent any possible
// issues when transferring it to the worker-thread.
break;
} else if (typeof val === "string") {
params[key] = stringToBytes(val);
} else if (
@ -342,7 +346,6 @@ function getDocument(src) {
params.StandardFontDataFactory =
params.StandardFontDataFactory || DefaultStandardFontDataFactory;
params.ignoreErrors = params.stopAtErrors !== true;
params.transferPdfData = params.transferPdfData === true;
params.fontExtraProperties = params.fontExtraProperties === true;
params.pdfBug = params.pdfBug === true;
params.enableXfa = params.enableXfa === true;
@ -440,7 +443,6 @@ function getDocument(src) {
{
length: params.length,
initialData: params.initialData,
transferPdfData: params.transferPdfData,
progressiveDone: params.progressiveDone,
contentDispositionFilename: params.contentDispositionFilename,
disableRange: params.disableRange,
@ -515,8 +517,7 @@ async function _fetchDocument(worker, source, pdfDataRangeTransport, docId) {
source.contentDispositionFilename =
pdfDataRangeTransport.contentDispositionFilename;
}
const transfers =
source.transferPdfData && source.data ? [source.data.buffer] : null;
const transfers = source.data ? [source.data.buffer] : null;
const workerId = await worker.messageHandler.sendWithPromise(
"GetDocRequest",
@ -656,6 +657,10 @@ class PDFDocumentLoadingTask {
/**
* Abstract class to support range requests file loading.
*
* NOTE: The TypedArrays passed to the constructor and relevant methods below
* will generally be transferred to the worker-thread. This will help reduce
* main-thread memory usage, however it will take ownership of the TypedArrays.
*/
class PDFDataRangeTransport {
/**

View file

@ -18,13 +18,10 @@ import { isPdfFile } from "./display_utils.js";
/** @implements {IPDFStream} */
class PDFDataTransportStream {
#transferPdfData = false;
constructor(
{
length,
initialData,
transferPdfData = false,
progressiveDone = false,
contentDispositionFilename = null,
disableRange = false,
@ -38,14 +35,17 @@ class PDFDataTransportStream {
);
this._queuedChunks = [];
this.#transferPdfData = transferPdfData;
this._progressiveDone = progressiveDone;
this._contentDispositionFilename = contentDispositionFilename;
if (initialData?.length > 0) {
const buffer = this.#transferPdfData
? initialData.buffer
: new Uint8Array(initialData).buffer;
// Prevent any possible issues by only transferring a Uint8Array that
// completely "utilizes" its underlying ArrayBuffer.
const buffer =
initialData instanceof Uint8Array &&
initialData.byteLength === initialData.buffer.byteLength
? initialData.buffer
: new Uint8Array(initialData).buffer;
this._queuedChunks.push(buffer);
}
@ -77,8 +77,11 @@ class PDFDataTransportStream {
}
_onReceiveData({ begin, chunk }) {
// Prevent any possible issues by only transferring a Uint8Array that
// completely "utilizes" its underlying ArrayBuffer.
const buffer =
this.#transferPdfData && chunk?.length >= 0
chunk instanceof Uint8Array &&
chunk.byteLength === chunk.buffer.byteLength
? chunk.buffer
: new Uint8Array(chunk).buffer;