2012-08-31 15:48:21 -07:00
|
|
|
/* Copyright 2012 Mozilla Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2011-09-07 10:16:02 -07:00
|
|
|
|
2017-04-02 16:14:30 +02:00
|
|
|
import {
|
2019-08-01 16:31:32 +02:00
|
|
|
AbortException,
|
2023-02-09 15:49:13 +01:00
|
|
|
assert,
|
2019-08-01 16:31:32 +02:00
|
|
|
getVerbosityLevel,
|
|
|
|
info,
|
|
|
|
InvalidPDFException,
|
2023-07-17 16:33:06 +02:00
|
|
|
isNodeJS,
|
2019-08-01 16:31:32 +02:00
|
|
|
MissingPDFException,
|
|
|
|
PasswordException,
|
|
|
|
setVerbosityLevel,
|
2020-08-03 19:44:04 +02:00
|
|
|
stringToPDFString,
|
2019-08-01 16:31:32 +02:00
|
|
|
UnexpectedResponseException,
|
|
|
|
UnknownErrorException,
|
|
|
|
VerbosityLevel,
|
|
|
|
warn,
|
2020-01-02 12:00:16 +01:00
|
|
|
} from "../shared/util.js";
|
2023-02-09 22:01:30 +01:00
|
|
|
import {
|
|
|
|
arrayBuffersToBytes,
|
|
|
|
getNewAnnotationsMap,
|
|
|
|
XRefParseException,
|
|
|
|
} from "./core_utils.js";
|
2024-11-10 20:54:42 +01:00
|
|
|
import { Dict, isDict, Ref, RefSetCache } from "./primitives.js";
|
2020-01-02 12:00:16 +01:00
|
|
|
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
|
2023-06-22 19:48:40 +02:00
|
|
|
import { AnnotationFactory } from "./annotation.js";
|
2022-01-24 16:16:54 +01:00
|
|
|
import { clearGlobalCaches } from "./cleanup_helper.js";
|
2020-08-03 19:44:04 +02:00
|
|
|
import { incrementalUpdate } from "./writer.js";
|
2020-01-02 12:00:16 +01:00
|
|
|
import { MessageHandler } from "../shared/message_handler.js";
|
|
|
|
import { PDFWorkerStream } from "./worker_stream.js";
|
2023-09-11 17:51:22 +02:00
|
|
|
import { StructTreeRoot } from "./struct_tree.js";
|
2015-11-21 10:32:47 -06:00
|
|
|
|
2020-06-16 11:44:33 +02:00
|
|
|
class WorkerTask {
|
|
|
|
constructor(name) {
|
2015-10-20 20:50:32 -05:00
|
|
|
this.name = name;
|
|
|
|
this.terminated = false;
|
[api-minor] Replace the `PromiseCapability` with `Promise.withResolvers()`
This replaces our custom `PromiseCapability`-class with the new native `Promise.withResolvers()` functionality, which does *almost* the same thing[1]; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/withResolvers
The only difference is that `PromiseCapability` also had a `settled`-getter, which was however not widely used and the call-sites can either be removed or re-factored to avoid it. In particular:
- In `src/display/api.js` we can tweak the `PDFObjects`-class to use a "special" initial data-value and just compare against that, in order to replace the `settled`-state.
- In `web/app.js` we change the only case to manually track the `settled`-state, which should hopefully be OK given how this is being used.
- In `web/pdf_outline_viewer.js` we can remove the `settled`-checks, since the code should work just fine without it. The only thing that could potentially happen is that we try to `resolve` a Promise multiple times, which is however *not* a problem since the value of a Promise cannot be changed once fulfilled or rejected.
- In `web/pdf_viewer.js` we can remove the `settled`-checks, since the code should work fine without them:
- For the `_onePageRenderedCapability` case the `settled`-check is used in a `EventBus`-listener which is *removed* on its first (valid) invocation.
- For the `_pagesCapability` case the `settled`-check is used in a print-related helper that works just fine with "only" the other checks.
- In `test/unit/api_spec.js` we can change the few relevant cases to manually track the `settled`-state, since this is both simple and *test-only* code.
---
[1] In browsers/environments that lack native support, note [the compatibility data](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/withResolvers#browser_compatibility), it'll be polyfilled via the `core-js` library (but only in `legacy` builds).
2024-03-28 16:42:37 +01:00
|
|
|
this._capability = Promise.withResolvers();
|
2015-10-20 20:50:32 -05:00
|
|
|
}
|
|
|
|
|
2020-06-16 11:44:33 +02:00
|
|
|
get finished() {
|
|
|
|
return this._capability.promise;
|
|
|
|
}
|
2015-10-20 20:50:32 -05:00
|
|
|
|
2020-06-16 11:44:33 +02:00
|
|
|
finish() {
|
|
|
|
this._capability.resolve();
|
|
|
|
}
|
2015-10-20 20:50:32 -05:00
|
|
|
|
2020-06-16 11:44:33 +02:00
|
|
|
terminate() {
|
|
|
|
this.terminated = true;
|
|
|
|
}
|
2015-10-20 20:50:32 -05:00
|
|
|
|
2020-06-16 11:44:33 +02:00
|
|
|
ensureNotTerminated() {
|
|
|
|
if (this.terminated) {
|
|
|
|
throw new Error("Worker task was terminated");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-10-20 20:50:32 -05:00
|
|
|
|
2020-06-16 11:44:33 +02:00
|
|
|
class WorkerMessageHandler {
|
|
|
|
static setup(handler, port) {
|
2021-04-25 15:51:20 +02:00
|
|
|
let testMessageProcessed = false;
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("test", function (data) {
|
2015-12-16 18:37:43 -06:00
|
|
|
if (testMessageProcessed) {
|
|
|
|
return; // we already processed 'test' message once
|
|
|
|
}
|
|
|
|
testMessageProcessed = true;
|
|
|
|
|
2022-03-16 13:04:47 +01:00
|
|
|
// Ensure that `TypedArray`s can be sent to the worker.
|
|
|
|
handler.send("test", data instanceof Uint8Array);
|
2015-10-27 10:07:20 -05:00
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("configure", function (data) {
|
2016-03-03 10:13:37 -06:00
|
|
|
setVerbosityLevel(data.verbosity);
|
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetDocRequest", function (data) {
|
2015-10-27 10:07:20 -05:00
|
|
|
return WorkerMessageHandler.createDocumentHandler(data, port);
|
|
|
|
});
|
2020-06-16 11:44:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static createDocumentHandler(docParams, port) {
|
2015-10-27 12:55:15 -05:00
|
|
|
// This context is actually holds references on pdfManager and handler,
|
|
|
|
// until the latter is destroyed.
|
2021-04-25 15:51:20 +02:00
|
|
|
let pdfManager;
|
|
|
|
let terminated = false;
|
|
|
|
let cancelXHRs = null;
|
2023-02-09 22:01:16 +01:00
|
|
|
const WorkerTasks = new Set();
|
2019-03-02 11:36:30 +01:00
|
|
|
const verbosity = getVerbosityLevel();
|
2015-10-20 17:45:55 -05:00
|
|
|
|
2022-10-09 11:30:24 +02:00
|
|
|
const { docId, apiVersion } = docParams;
|
2019-09-01 16:43:58 +02:00
|
|
|
const workerVersion =
|
|
|
|
typeof PDFJSDev !== "undefined" && !PDFJSDev.test("TESTING")
|
|
|
|
? PDFJSDev.eval("BUNDLE_VERSION")
|
|
|
|
: null;
|
|
|
|
if (apiVersion !== workerVersion) {
|
2017-09-27 15:19:31 +02:00
|
|
|
throw new Error(
|
|
|
|
`The API version "${apiVersion}" does not match ` +
|
|
|
|
`the Worker version "${workerVersion}".`
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-02-10 13:54:09 +01:00
|
|
|
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) {
|
|
|
|
// Fail early, and predictably, rather than having (some) fonts fail to
|
|
|
|
// load/render with slightly cryptic error messages in environments where
|
|
|
|
// the `Array.prototype` has been *incorrectly* extended.
|
|
|
|
//
|
|
|
|
// PLEASE NOTE: We do *not* want to slow down font parsing by adding
|
|
|
|
// `hasOwnProperty` checks all over the code-base.
|
|
|
|
const enumerableProperties = [];
|
|
|
|
for (const property in []) {
|
|
|
|
enumerableProperties.push(property);
|
|
|
|
}
|
|
|
|
if (enumerableProperties.length) {
|
|
|
|
throw new Error(
|
|
|
|
"The `Array.prototype` contains unexpected enumerable properties: " +
|
|
|
|
enumerableProperties.join(", ") +
|
|
|
|
"; thus breaking e.g. `for...in` iteration of `Array`s."
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
2022-10-09 11:30:24 +02:00
|
|
|
const workerHandlerName = docId + "_worker";
|
2021-04-25 15:51:20 +02:00
|
|
|
let handler = new MessageHandler(workerHandlerName, docId, port);
|
2015-10-27 10:07:20 -05:00
|
|
|
|
2015-10-20 17:45:55 -05:00
|
|
|
function ensureNotTerminated() {
|
|
|
|
if (terminated) {
|
|
|
|
throw new Error("Worker was terminated");
|
|
|
|
}
|
|
|
|
}
|
2011-10-09 10:37:53 +02:00
|
|
|
|
2015-10-20 20:50:32 -05:00
|
|
|
function startWorkerTask(task) {
|
2023-02-09 22:01:16 +01:00
|
|
|
WorkerTasks.add(task);
|
2015-10-20 20:50:32 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
function finishWorkerTask(task) {
|
|
|
|
task.finish();
|
2023-02-09 22:01:16 +01:00
|
|
|
WorkerTasks.delete(task);
|
2015-10-20 20:50:32 -05:00
|
|
|
}
|
|
|
|
|
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326)
For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1].
Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase.
Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made.
Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2].
Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call.
Whether or not this is a problem depends very much on what you actually measure, please consider the following examples:
```javascript
console.time('first');
getDocument(...).promise.then((pdfDocument) => {
console.timeEnd('first');
});
console.time('second');
getDocument(...).promise.then((pdfDocument) => {
pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`.
console.timeEnd('second');
});
});
```
The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable.
---
[1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated.
In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects.
[2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page.
[3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-04 21:51:27 +01:00
|
|
|
async function loadDocument(recoveryMode) {
|
|
|
|
await pdfManager.ensureDoc("checkHeader");
|
|
|
|
await pdfManager.ensureDoc("parseStartXRef");
|
|
|
|
await pdfManager.ensureDoc("parse", [recoveryMode]);
|
|
|
|
|
[api-minor] Validate the /Pages-tree /Count entry during document initialization (issue 14303)
*This patch basically extends the approach from PR 10392, by also checking the last page.*
Currently, in e.g. the `Catalog.numPages`-getter, we're simply assuming that if the /Pages-tree has an *integer* /Count entry it must also be correct/valid.
As can be seen in the referenced PDF documents, that entry may be completely bogus which causes general parsing to breaking down elsewhere in the worker-thread (and hanging the browser).
Rather than hoping that the /Count entry is correct, similar to all other data found in PDF documents, we obviously need to validate it. This turns out to be a little less straightforward than one would like, since the only way to do this (as far as I know) is to parse the *entire* /Pages-tree and essentially counting the pages.
To avoid doing that for all documents, this patch tries to take a short-cut by checking if the last page (based on the /Count entry) can be successfully fetched. If so, we assume that the /Count entry is correct and use it as-is, otherwise we'll iterate through (potentially) the *entire* /Pages-tree to determine the number of pages.
Unfortunately these changes will have a number of *somewhat* negative side-effects, please see a possibly incomplete list below, however I cannot see a better way to address this bug.
- This will slow down initial loading/rendering of all documents, at least by some amount, since we now need to fetch/parse more of the /Pages-tree in order to be able to access the *last* page of the PDF documents.
- For poorly generated PDF documents, where the entire /Pages-tree only has *one* level, we'll unfortunately need to fetch/parse the *entire* /Pages-tree to get to the last page. While there's a cache to help reduce repeated data lookups, this will affect initial loading/rendering of *some* long PDF documents,
- This will affect the `disableAutoFetch = true` mode negatively, since we now need to fetch/parse more data during document initialization. While the `disableAutoFetch = true` mode should still be helpful in larger/longer PDF documents, for smaller ones the effect/usefulness may unfortunately be lost.
As one *small* additional bonus, we should now also be able to support opening PDF documents where the /Pages-tree /Count entry is completely invalid (e.g. contains a non-integer value).
Fixes two of the issues listed in issue 14303, namely the `poppler-67295-0.pdf` and `poppler-85140-0.pdf` documents.
2021-11-25 18:34:11 +01:00
|
|
|
// Check that at least the first page can be successfully loaded,
|
|
|
|
// since otherwise the XRef table is definitely not valid.
|
|
|
|
await pdfManager.ensureDoc("checkFirstPage", [recoveryMode]);
|
2022-04-09 09:43:18 +09:00
|
|
|
// Check that the last page can be successfully loaded, to ensure that
|
[api-minor] Validate the /Pages-tree /Count entry during document initialization (issue 14303)
*This patch basically extends the approach from PR 10392, by also checking the last page.*
Currently, in e.g. the `Catalog.numPages`-getter, we're simply assuming that if the /Pages-tree has an *integer* /Count entry it must also be correct/valid.
As can be seen in the referenced PDF documents, that entry may be completely bogus which causes general parsing to breaking down elsewhere in the worker-thread (and hanging the browser).
Rather than hoping that the /Count entry is correct, similar to all other data found in PDF documents, we obviously need to validate it. This turns out to be a little less straightforward than one would like, since the only way to do this (as far as I know) is to parse the *entire* /Pages-tree and essentially counting the pages.
To avoid doing that for all documents, this patch tries to take a short-cut by checking if the last page (based on the /Count entry) can be successfully fetched. If so, we assume that the /Count entry is correct and use it as-is, otherwise we'll iterate through (potentially) the *entire* /Pages-tree to determine the number of pages.
Unfortunately these changes will have a number of *somewhat* negative side-effects, please see a possibly incomplete list below, however I cannot see a better way to address this bug.
- This will slow down initial loading/rendering of all documents, at least by some amount, since we now need to fetch/parse more of the /Pages-tree in order to be able to access the *last* page of the PDF documents.
- For poorly generated PDF documents, where the entire /Pages-tree only has *one* level, we'll unfortunately need to fetch/parse the *entire* /Pages-tree to get to the last page. While there's a cache to help reduce repeated data lookups, this will affect initial loading/rendering of *some* long PDF documents,
- This will affect the `disableAutoFetch = true` mode negatively, since we now need to fetch/parse more data during document initialization. While the `disableAutoFetch = true` mode should still be helpful in larger/longer PDF documents, for smaller ones the effect/usefulness may unfortunately be lost.
As one *small* additional bonus, we should now also be able to support opening PDF documents where the /Pages-tree /Count entry is completely invalid (e.g. contains a non-integer value).
Fixes two of the issues listed in issue 14303, namely the `poppler-67295-0.pdf` and `poppler-85140-0.pdf` documents.
2021-11-25 18:34:11 +01:00
|
|
|
// `numPages` is correct, and fallback to walking the entire /Pages-tree.
|
|
|
|
await pdfManager.ensureDoc("checkLastPage", [recoveryMode]);
|
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326)
For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1].
Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase.
Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made.
Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2].
Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call.
Whether or not this is a problem depends very much on what you actually measure, please consider the following examples:
```javascript
console.time('first');
getDocument(...).promise.then((pdfDocument) => {
console.timeEnd('first');
});
console.time('second');
getDocument(...).promise.then((pdfDocument) => {
pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`.
console.timeEnd('second');
});
});
```
The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable.
---
[1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated.
In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects.
[2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page.
[3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-04 21:51:27 +01:00
|
|
|
|
2021-06-14 19:16:42 +02:00
|
|
|
const isPureXfa = await pdfManager.ensureDoc("isPureXfa");
|
|
|
|
if (isPureXfa) {
|
A couple of small scripting/XFA-related tweaks in the worker-code
- Use `PDFManager.ensureDoc`, rather than `PDFManager.ensure`, in a couple of spots in the code. If there exists a short-hand format, we should obviously use it whenever possible.
- Fix a unit-test helper, to account for the previous changes. (Also, converts a function to be `async` instead.)
- Add one more exists-check in `PDFDocument.loadXfaFonts`, which I missed to suggest in PR 13146, to prevent any possible errors if the method is ever called in a situation where it shouldn't be.
Also, print a warning if the actual font-loading fails since that could help future debugging. (Finally, reduce overall indentation in the loop.)
- Slightly unrelated, but make a small tweak of a comment in `src/core/fonts.js` to reduce possible confusion.
2021-04-17 10:13:42 +02:00
|
|
|
const task = new WorkerTask("loadXfaFonts");
|
2021-03-26 09:28:18 +01:00
|
|
|
startWorkerTask(task);
|
2021-07-02 20:05:23 +02:00
|
|
|
await Promise.all([
|
|
|
|
pdfManager
|
|
|
|
.loadXfaFonts(handler, task)
|
|
|
|
.catch(reason => {
|
|
|
|
// Ignore errors, to allow the document to load.
|
|
|
|
})
|
|
|
|
.then(() => finishWorkerTask(task)),
|
|
|
|
pdfManager.loadXfaImages(),
|
|
|
|
]);
|
2021-03-26 09:28:18 +01:00
|
|
|
}
|
2021-06-14 19:16:42 +02:00
|
|
|
|
2021-07-02 16:36:27 +02:00
|
|
|
const [numPages, fingerprints] = await Promise.all([
|
2021-06-14 19:16:42 +02:00
|
|
|
pdfManager.ensureDoc("numPages"),
|
2021-07-02 16:36:27 +02:00
|
|
|
pdfManager.ensureDoc("fingerprints"),
|
2021-06-14 19:16:42 +02:00
|
|
|
]);
|
|
|
|
|
|
|
|
// Get htmlForXfa after numPages to avoid to create HTML twice.
|
|
|
|
const htmlForXfa = isPureXfa
|
|
|
|
? await pdfManager.ensureDoc("htmlForXfa")
|
|
|
|
: null;
|
|
|
|
|
2021-07-02 16:36:27 +02:00
|
|
|
return { numPages, fingerprints, htmlForXfa };
|
2013-02-06 15:19:29 -08:00
|
|
|
}
|
|
|
|
|
2024-11-22 17:49:43 +01:00
|
|
|
async function getPdfManager({
|
2022-10-09 11:30:24 +02:00
|
|
|
data,
|
|
|
|
password,
|
|
|
|
disableAutoFetch,
|
|
|
|
rangeChunkSize,
|
|
|
|
length,
|
|
|
|
docBaseUrl,
|
|
|
|
enableXfa,
|
|
|
|
evaluatorOptions,
|
|
|
|
}) {
|
2023-02-11 12:34:20 +01:00
|
|
|
const pdfManagerArgs = {
|
|
|
|
source: null,
|
|
|
|
disableAutoFetch,
|
|
|
|
docBaseUrl,
|
|
|
|
docId,
|
|
|
|
enableXfa,
|
|
|
|
evaluatorOptions,
|
|
|
|
handler,
|
|
|
|
length,
|
|
|
|
password,
|
|
|
|
rangeChunkSize,
|
|
|
|
};
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2022-10-09 11:30:24 +02:00
|
|
|
if (data) {
|
2024-11-22 17:49:43 +01:00
|
|
|
pdfManagerArgs.source = data;
|
2023-02-11 12:34:20 +01:00
|
|
|
|
2024-11-22 17:49:43 +01:00
|
|
|
return new LocalPdfManager(pdfManagerArgs);
|
2016-02-09 14:55:11 -06:00
|
|
|
}
|
2024-11-22 17:49:43 +01:00
|
|
|
const pdfStream = new PDFWorkerStream(handler),
|
|
|
|
fullRequest = pdfStream.getFullReader();
|
2016-02-09 14:55:11 -06:00
|
|
|
|
2024-11-22 17:49:43 +01:00
|
|
|
const pdfManagerCapability = Promise.withResolvers();
|
|
|
|
let newPdfManager,
|
2024-11-02 11:06:30 +01:00
|
|
|
cachedChunks = [],
|
|
|
|
loaded = 0;
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2016-02-09 14:55:11 -06:00
|
|
|
fullRequest.headersReady
|
2020-04-14 12:28:14 +02:00
|
|
|
.then(function () {
|
2016-02-09 14:55:11 -06:00
|
|
|
if (!fullRequest.isRangeSupported) {
|
|
|
|
return;
|
|
|
|
}
|
2023-02-11 12:34:20 +01:00
|
|
|
pdfManagerArgs.source = pdfStream;
|
|
|
|
pdfManagerArgs.length = fullRequest.contentLength;
|
2016-02-09 14:55:11 -06:00
|
|
|
// We don't need auto-fetch when streaming is enabled.
|
2022-07-17 11:24:05 +02:00
|
|
|
pdfManagerArgs.disableAutoFetch ||= fullRequest.isStreamingSupported;
|
2023-02-11 12:34:20 +01:00
|
|
|
|
|
|
|
newPdfManager = new NetworkPdfManager(pdfManagerArgs);
|
2020-03-23 17:04:47 +01:00
|
|
|
// There may be a chance that `newPdfManager` is not initialized for
|
|
|
|
// the first few runs of `readchunk` block of code. Be sure to send
|
|
|
|
// all cached chunks, if any, to chunked_stream via pdf_manager.
|
2022-07-17 15:48:39 +02:00
|
|
|
for (const chunk of cachedChunks) {
|
|
|
|
newPdfManager.sendProgressiveData(chunk);
|
2017-06-30 23:29:52 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
cachedChunks = [];
|
2020-03-23 17:04:47 +01:00
|
|
|
pdfManagerCapability.resolve(newPdfManager);
|
2016-02-09 14:55:11 -06:00
|
|
|
cancelXHRs = null;
|
|
|
|
})
|
2020-04-14 12:28:14 +02:00
|
|
|
.catch(function (reason) {
|
2016-02-09 14:55:11 -06:00
|
|
|
pdfManagerCapability.reject(reason);
|
|
|
|
cancelXHRs = null;
|
|
|
|
});
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2022-11-17 15:19:40 +01:00
|
|
|
new Promise(function (resolve, reject) {
|
2021-04-25 15:51:20 +02:00
|
|
|
const readChunk = function ({ value, done }) {
|
2013-05-30 16:54:49 -05:00
|
|
|
try {
|
2016-02-09 14:55:11 -06:00
|
|
|
ensureNotTerminated();
|
2020-03-23 17:04:47 +01:00
|
|
|
if (done) {
|
|
|
|
if (!newPdfManager) {
|
2024-11-02 11:06:30 +01:00
|
|
|
const pdfFile = arrayBuffersToBytes(cachedChunks);
|
|
|
|
cachedChunks = [];
|
|
|
|
|
|
|
|
if (length && pdfFile.length !== length) {
|
|
|
|
warn("reported HTTP length is different from actual");
|
|
|
|
}
|
|
|
|
pdfManagerArgs.source = pdfFile;
|
|
|
|
|
|
|
|
newPdfManager = new LocalPdfManager(pdfManagerArgs);
|
|
|
|
pdfManagerCapability.resolve(newPdfManager);
|
2016-02-09 14:55:11 -06:00
|
|
|
}
|
|
|
|
cancelXHRs = null;
|
|
|
|
return;
|
|
|
|
}
|
2023-03-18 12:09:25 +01:00
|
|
|
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
2023-02-09 15:49:13 +01:00
|
|
|
assert(
|
|
|
|
value instanceof ArrayBuffer,
|
|
|
|
"readChunk (getPdfManager) - expected an ArrayBuffer."
|
|
|
|
);
|
|
|
|
}
|
|
|
|
loaded += value.byteLength;
|
2014-09-05 20:02:54 -05:00
|
|
|
|
2016-02-09 14:55:11 -06:00
|
|
|
if (!fullRequest.isStreamingSupported) {
|
|
|
|
handler.send("DocProgress", {
|
2017-04-27 12:58:44 +02:00
|
|
|
loaded,
|
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on
*Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.*
http://eslint.org/docs/rules/comma-dangle
http://eslint.org/docs/rules/object-curly-spacing
Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead.
Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch.
```diff
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index abab9027..dcd3594b 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() {
t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, };
t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, };
t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, };
- t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, };
+ t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1,
+ variableArgs: false, };
t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, };
t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, };
t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, };
diff --git a/src/core/jbig2.js b/src/core/jbig2.js
index 5a17d482..71671541 100644
--- a/src/core/jbig2.js
+++ b/src/core/jbig2.js
@@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() {
{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, },
{ x: -1, y: 0, }],
[{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, },
- { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }]
+ { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, },
+ { x: -1, y: 0, }]
];
var RefinementTemplates = [
{
coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }],
- reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, },
- { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }],
+ reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, },
+ { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, },
+ { x: 0, y: 1, }, { x: 1, y: 1, }],
},
{
- coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }],
- reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, },
- { x: 0, y: 1, }, { x: 1, y: 1, }],
+ coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, },
+ { x: -1, y: 0, }],
+ reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, },
+ { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }],
}
];
```
2017-06-02 11:16:24 +02:00
|
|
|
total: Math.max(loaded, fullRequest.contentLength || 0),
|
2016-02-09 14:55:11 -06:00
|
|
|
});
|
2014-09-05 20:02:54 -05:00
|
|
|
}
|
|
|
|
|
2020-03-23 17:04:47 +01:00
|
|
|
if (newPdfManager) {
|
|
|
|
newPdfManager.sendProgressiveData(value);
|
2016-02-09 14:55:11 -06:00
|
|
|
} else {
|
2020-03-23 17:04:47 +01:00
|
|
|
cachedChunks.push(value);
|
2016-02-09 14:55:11 -06:00
|
|
|
}
|
|
|
|
fullRequest.read().then(readChunk, reject);
|
|
|
|
} catch (e) {
|
|
|
|
reject(e);
|
2013-02-06 15:19:29 -08:00
|
|
|
}
|
2016-02-09 14:55:11 -06:00
|
|
|
};
|
|
|
|
fullRequest.read().then(readChunk, reject);
|
2022-11-17 15:19:40 +01:00
|
|
|
}).catch(function (e) {
|
2016-02-09 14:55:11 -06:00
|
|
|
pdfManagerCapability.reject(e);
|
|
|
|
cancelXHRs = null;
|
2013-02-06 15:19:29 -08:00
|
|
|
});
|
|
|
|
|
2024-11-22 17:49:43 +01:00
|
|
|
cancelXHRs = reason => {
|
2019-08-01 16:31:32 +02:00
|
|
|
pdfStream.cancelAllRequests(reason);
|
2015-10-20 17:45:55 -05:00
|
|
|
};
|
|
|
|
|
2014-05-02 01:38:49 +02:00
|
|
|
return pdfManagerCapability.promise;
|
2012-06-23 14:48:33 -05:00
|
|
|
}
|
|
|
|
|
2016-12-31 13:59:07 +01:00
|
|
|
function setupDoc(data) {
|
|
|
|
function onSuccess(doc) {
|
2015-10-20 17:45:55 -05:00
|
|
|
ensureNotTerminated();
|
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on
*Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.*
http://eslint.org/docs/rules/comma-dangle
http://eslint.org/docs/rules/object-curly-spacing
Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead.
Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch.
```diff
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index abab9027..dcd3594b 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() {
t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, };
t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, };
t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, };
- t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, };
+ t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1,
+ variableArgs: false, };
t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, };
t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, };
t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, };
diff --git a/src/core/jbig2.js b/src/core/jbig2.js
index 5a17d482..71671541 100644
--- a/src/core/jbig2.js
+++ b/src/core/jbig2.js
@@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() {
{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, },
{ x: -1, y: 0, }],
[{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, },
- { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }]
+ { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, },
+ { x: -1, y: 0, }]
];
var RefinementTemplates = [
{
coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }],
- reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, },
- { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }],
+ reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, },
+ { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, },
+ { x: 0, y: 1, }, { x: 1, y: 1, }],
},
{
- coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }],
- reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, },
- { x: 0, y: 1, }, { x: 1, y: 1, }],
+ coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, },
+ { x: -1, y: 0, }],
+ reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, },
+ { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }],
}
];
```
2017-06-02 11:16:24 +02:00
|
|
|
handler.send("GetDoc", { pdfInfo: doc });
|
2016-12-31 13:59:07 +01:00
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2019-10-19 12:49:30 +02:00
|
|
|
function onFailure(ex) {
|
2017-06-29 13:59:38 +02:00
|
|
|
ensureNotTerminated();
|
|
|
|
|
2019-10-19 12:49:30 +02:00
|
|
|
if (ex instanceof PasswordException) {
|
2021-04-25 15:51:20 +02:00
|
|
|
const task = new WorkerTask(`PasswordException: response ${ex.code}`);
|
2016-12-31 13:59:07 +01:00
|
|
|
startWorkerTask(task);
|
|
|
|
|
2019-10-19 12:49:30 +02:00
|
|
|
handler
|
|
|
|
.sendWithPromise("PasswordRequest", ex)
|
2020-04-14 12:28:14 +02:00
|
|
|
.then(function ({ password }) {
|
2016-12-31 13:59:07 +01:00
|
|
|
finishWorkerTask(task);
|
2020-03-23 17:04:47 +01:00
|
|
|
pdfManager.updatePassword(password);
|
2016-12-31 13:59:07 +01:00
|
|
|
pdfManagerReady();
|
2019-10-19 12:49:30 +02:00
|
|
|
})
|
2020-04-14 12:28:14 +02:00
|
|
|
.catch(function () {
|
2016-12-31 13:59:07 +01:00
|
|
|
finishWorkerTask(task);
|
2019-10-19 12:49:30 +02:00
|
|
|
handler.send("DocException", ex);
|
|
|
|
});
|
|
|
|
} else if (
|
|
|
|
ex instanceof InvalidPDFException ||
|
|
|
|
ex instanceof MissingPDFException ||
|
|
|
|
ex instanceof UnexpectedResponseException ||
|
|
|
|
ex instanceof UnknownErrorException
|
|
|
|
) {
|
|
|
|
handler.send("DocException", ex);
|
2013-02-06 15:19:29 -08:00
|
|
|
} else {
|
2019-10-19 12:49:30 +02:00
|
|
|
handler.send(
|
|
|
|
"DocException",
|
|
|
|
new UnknownErrorException(ex.message, ex.toString())
|
|
|
|
);
|
2013-02-06 15:19:29 -08:00
|
|
|
}
|
2016-12-31 13:59:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
function pdfManagerReady() {
|
|
|
|
ensureNotTerminated();
|
|
|
|
|
2020-07-28 14:33:39 +02:00
|
|
|
loadDocument(false).then(onSuccess, function (reason) {
|
|
|
|
ensureNotTerminated();
|
2016-12-31 13:59:07 +01:00
|
|
|
|
2020-07-28 14:33:39 +02:00
|
|
|
// Try again with recoveryMode == true
|
|
|
|
if (!(reason instanceof XRefParseException)) {
|
|
|
|
onFailure(reason);
|
|
|
|
return;
|
|
|
|
}
|
2022-10-24 16:59:25 +02:00
|
|
|
pdfManager.requestLoadedStream().then(function () {
|
2020-07-28 14:33:39 +02:00
|
|
|
ensureNotTerminated();
|
Enable auto-formatting of the entire code-base using Prettier (issue 11444)
Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).
Prettier is being used for a couple of reasons:
- To be consistent with `mozilla-central`, where Prettier is already in use across the tree.
- To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.
Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.
*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.
(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 15:59:37 +01:00
|
|
|
|
2020-07-28 14:33:39 +02:00
|
|
|
loadDocument(true).then(onSuccess, onFailure);
|
|
|
|
});
|
|
|
|
});
|
2016-12-31 13:59:07 +01:00
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2015-10-20 17:45:55 -05:00
|
|
|
ensureNotTerminated();
|
|
|
|
|
2022-10-09 11:30:24 +02:00
|
|
|
getPdfManager(data)
|
2020-04-14 12:28:14 +02:00
|
|
|
.then(function (newPdfManager) {
|
2015-10-20 17:45:55 -05:00
|
|
|
if (terminated) {
|
|
|
|
// We were in a process of setting up the manager, but it got
|
|
|
|
// terminated in the middle.
|
2019-08-01 16:31:32 +02:00
|
|
|
newPdfManager.terminate(
|
|
|
|
new AbortException("Worker was terminated.")
|
|
|
|
);
|
2015-10-20 17:45:55 -05:00
|
|
|
throw new Error("Worker was terminated");
|
|
|
|
}
|
|
|
|
pdfManager = newPdfManager;
|
2018-06-05 20:29:56 +02:00
|
|
|
|
2022-10-25 15:07:12 +02:00
|
|
|
pdfManager.requestLoadedStream(/* noFetch = */ true).then(stream => {
|
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on
*Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.*
http://eslint.org/docs/rules/comma-dangle
http://eslint.org/docs/rules/object-curly-spacing
Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead.
Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch.
```diff
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index abab9027..dcd3594b 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() {
t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, };
t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, };
t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, };
- t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, };
+ t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1,
+ variableArgs: false, };
t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, };
t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, };
t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, };
diff --git a/src/core/jbig2.js b/src/core/jbig2.js
index 5a17d482..71671541 100644
--- a/src/core/jbig2.js
+++ b/src/core/jbig2.js
@@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() {
{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, },
{ x: -1, y: 0, }],
[{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, },
- { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }]
+ { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, },
+ { x: -1, y: 0, }]
];
var RefinementTemplates = [
{
coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }],
- reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, },
- { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }],
+ reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, },
+ { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, },
+ { x: 0, y: 1, }, { x: 1, y: 1, }],
},
{
- coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }],
- reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, },
- { x: 0, y: 1, }, { x: 1, y: 1, }],
+ coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, },
+ { x: -1, y: 0, }],
+ reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, },
+ { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }],
}
];
```
2017-06-02 11:16:24 +02:00
|
|
|
handler.send("DataLoaded", { length: stream.bytes.byteLength });
|
2014-01-28 15:13:47 -06:00
|
|
|
});
|
2016-12-31 13:59:07 +01:00
|
|
|
})
|
|
|
|
.then(pdfManagerReady, onFailure);
|
|
|
|
}
|
2012-04-11 15:52:15 -07:00
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetPage", function (data) {
|
2020-04-14 12:28:14 +02:00
|
|
|
return pdfManager.getPage(data.pageIndex).then(function (page) {
|
2016-11-21 14:39:04 -07:00
|
|
|
return Promise.all([
|
2018-06-05 20:30:06 +02:00
|
|
|
pdfManager.ensure(page, "rotate"),
|
|
|
|
pdfManager.ensure(page, "ref"),
|
|
|
|
pdfManager.ensure(page, "userUnit"),
|
|
|
|
pdfManager.ensure(page, "view"),
|
2020-04-14 12:28:14 +02:00
|
|
|
]).then(function ([rotate, ref, userUnit, view]) {
|
2014-05-08 15:02:53 -05:00
|
|
|
return {
|
2018-06-05 20:30:06 +02:00
|
|
|
rotate,
|
|
|
|
ref,
|
2024-04-24 21:55:50 +02:00
|
|
|
refStr: ref?.toString() ?? null,
|
2018-06-05 20:30:06 +02:00
|
|
|
userUnit,
|
|
|
|
view,
|
2013-02-06 15:19:29 -08:00
|
|
|
};
|
|
|
|
});
|
|
|
|
});
|
2011-09-07 10:16:02 -07:00
|
|
|
});
|
2011-10-09 10:37:53 +02:00
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetPageIndex", function (data) {
|
2022-02-24 12:01:51 +01:00
|
|
|
const pageRef = Ref.get(data.num, data.gen);
|
2020-08-25 11:54:23 +02:00
|
|
|
return pdfManager.ensureCatalog("getPageIndex", [pageRef]);
|
2013-11-13 15:27:46 -08:00
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetDestinations", function (data) {
|
2014-05-07 18:15:25 -05:00
|
|
|
return pdfManager.ensureCatalog("destinations");
|
2013-02-06 15:19:29 -08:00
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetDestination", function (data) {
|
2015-11-22 13:56:52 +01:00
|
|
|
return pdfManager.ensureCatalog("getDestination", [data.id]);
|
2014-10-05 15:56:40 +02:00
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetPageLabels", function (data) {
|
2015-12-25 21:57:08 +01:00
|
|
|
return pdfManager.ensureCatalog("pageLabels");
|
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetPageLayout", function (data) {
|
2019-04-03 13:48:18 +02:00
|
|
|
return pdfManager.ensureCatalog("pageLayout");
|
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetPageMode", function (data) {
|
2017-07-18 13:08:02 +02:00
|
|
|
return pdfManager.ensureCatalog("pageMode");
|
|
|
|
});
|
|
|
|
|
2020-04-14 12:28:14 +02:00
|
|
|
handler.on("GetViewerPreferences", function (data) {
|
2019-04-14 13:13:59 +02:00
|
|
|
return pdfManager.ensureCatalog("viewerPreferences");
|
|
|
|
});
|
|
|
|
|
2020-04-14 12:28:14 +02:00
|
|
|
handler.on("GetOpenAction", function (data) {
|
2020-02-28 14:54:07 +01:00
|
|
|
return pdfManager.ensureCatalog("openAction");
|
2018-12-05 20:09:15 +01:00
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetAttachments", function (data) {
|
2014-05-07 18:15:25 -05:00
|
|
|
return pdfManager.ensureCatalog("attachments");
|
2014-03-18 16:32:47 -04:00
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetDocJSActions", function (data) {
|
2020-12-07 19:22:14 +01:00
|
|
|
return pdfManager.ensureCatalog("jsActions");
|
|
|
|
});
|
|
|
|
|
|
|
|
handler.on("GetPageJSActions", function ({ pageIndex }) {
|
|
|
|
return pdfManager.getPage(pageIndex).then(function (page) {
|
2021-04-12 13:48:34 +02:00
|
|
|
return pdfManager.ensure(page, "jsActions");
|
2020-12-07 19:22:14 +01:00
|
|
|
});
|
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetOutline", function (data) {
|
2014-05-07 18:15:25 -05:00
|
|
|
return pdfManager.ensureCatalog("documentOutline");
|
2014-05-07 21:06:44 +02:00
|
|
|
});
|
|
|
|
|
2020-07-14 15:17:27 -07:00
|
|
|
handler.on("GetOptionalContentConfig", function (data) {
|
|
|
|
return pdfManager.ensureCatalog("optionalContentConfig");
|
|
|
|
});
|
|
|
|
|
2020-04-14 12:28:14 +02:00
|
|
|
handler.on("GetPermissions", function (data) {
|
2018-08-26 21:37:05 +02:00
|
|
|
return pdfManager.ensureCatalog("permissions");
|
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetMetadata", function (data) {
|
2014-05-07 18:15:25 -05:00
|
|
|
return Promise.all([
|
|
|
|
pdfManager.ensureDoc("documentInfo"),
|
|
|
|
pdfManager.ensureCatalog("metadata"),
|
|
|
|
]);
|
2014-05-07 21:38:40 +02:00
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetMarkInfo", function (data) {
|
2020-10-23 16:30:36 -07:00
|
|
|
return pdfManager.ensureCatalog("markInfo");
|
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetData", function (data) {
|
2022-10-24 16:59:25 +02:00
|
|
|
return pdfManager.requestLoadedStream().then(function (stream) {
|
2014-05-07 18:15:25 -05:00
|
|
|
return stream.bytes;
|
2013-02-06 15:19:29 -08:00
|
|
|
});
|
|
|
|
});
|
|
|
|
|
2020-04-14 12:28:14 +02:00
|
|
|
handler.on("GetAnnotations", function ({ pageIndex, intent }) {
|
|
|
|
return pdfManager.getPage(pageIndex).then(function (page) {
|
2022-08-03 12:03:49 +02:00
|
|
|
const task = new WorkerTask(`GetAnnotations: page ${pageIndex}`);
|
|
|
|
startWorkerTask(task);
|
|
|
|
|
|
|
|
return page.getAnnotationsData(handler, task, intent).then(
|
|
|
|
data => {
|
|
|
|
finishWorkerTask(task);
|
|
|
|
return data;
|
|
|
|
},
|
|
|
|
reason => {
|
|
|
|
finishWorkerTask(task);
|
2023-02-10 12:08:46 +01:00
|
|
|
throw reason;
|
2022-08-03 12:03:49 +02:00
|
|
|
}
|
|
|
|
);
|
2012-04-14 13:54:31 -07:00
|
|
|
});
|
|
|
|
});
|
|
|
|
|
2020-09-30 20:58:45 +02:00
|
|
|
handler.on("GetFieldObjects", function (data) {
|
2024-10-03 21:48:58 +02:00
|
|
|
return pdfManager
|
|
|
|
.ensureDoc("fieldObjects")
|
|
|
|
.then(fieldObjects => fieldObjects?.allFields || null);
|
2020-09-30 20:58:45 +02:00
|
|
|
});
|
|
|
|
|
2020-10-28 19:16:56 +01:00
|
|
|
handler.on("HasJSActions", function (data) {
|
|
|
|
return pdfManager.ensureDoc("hasJSActions");
|
|
|
|
});
|
|
|
|
|
2020-10-16 17:15:58 +02:00
|
|
|
handler.on("GetCalculationOrderIds", function (data) {
|
|
|
|
return pdfManager.ensureDoc("calculationOrderIds");
|
|
|
|
});
|
|
|
|
|
2020-11-29 09:59:03 +01:00
|
|
|
handler.on(
|
|
|
|
"SaveDocument",
|
2023-06-22 19:48:40 +02:00
|
|
|
async function ({ isPureXfa, numPages, annotationStorage, filename }) {
|
2023-09-12 13:09:58 +02:00
|
|
|
const globalPromises = [
|
2022-10-24 16:59:25 +02:00
|
|
|
pdfManager.requestLoadedStream(),
|
2020-11-29 09:59:03 +01:00
|
|
|
pdfManager.ensureCatalog("acroForm"),
|
2021-09-03 14:28:31 +02:00
|
|
|
pdfManager.ensureCatalog("acroFormRef"),
|
2020-11-29 09:59:03 +01:00
|
|
|
pdfManager.ensureDoc("startXRef"),
|
2023-09-12 13:09:58 +02:00
|
|
|
pdfManager.ensureDoc("xref"),
|
2023-07-20 17:05:52 +02:00
|
|
|
pdfManager.ensureDoc("linearization"),
|
2023-09-11 17:51:22 +02:00
|
|
|
pdfManager.ensureCatalog("structTreeRoot"),
|
2020-11-29 09:59:03 +01:00
|
|
|
];
|
2024-11-10 20:54:42 +01:00
|
|
|
const changes = new RefSetCache();
|
2023-09-12 13:09:58 +02:00
|
|
|
const promises = [];
|
2020-11-29 09:59:03 +01:00
|
|
|
|
2022-10-24 16:59:25 +02:00
|
|
|
const newAnnotationsByPage = !isPureXfa
|
|
|
|
? getNewAnnotationsMap(annotationStorage)
|
|
|
|
: null;
|
2023-09-11 17:51:22 +02:00
|
|
|
const [
|
|
|
|
stream,
|
|
|
|
acroForm,
|
|
|
|
acroFormRef,
|
|
|
|
startXRef,
|
|
|
|
xref,
|
|
|
|
linearization,
|
|
|
|
_structTreeRoot,
|
|
|
|
] = await Promise.all(globalPromises);
|
|
|
|
const catalogRef = xref.trailer.getRaw("Root") || null;
|
|
|
|
let structTreeRoot;
|
2023-06-22 19:48:40 +02:00
|
|
|
|
2022-06-15 16:57:33 +02:00
|
|
|
if (newAnnotationsByPage) {
|
2023-09-11 17:51:22 +02:00
|
|
|
if (!_structTreeRoot) {
|
|
|
|
if (
|
|
|
|
await StructTreeRoot.canCreateStructureTree({
|
|
|
|
catalogRef,
|
|
|
|
pdfManager,
|
|
|
|
newAnnotationsByPage,
|
|
|
|
})
|
|
|
|
) {
|
|
|
|
structTreeRoot = null;
|
|
|
|
}
|
|
|
|
} else if (
|
|
|
|
await _structTreeRoot.canUpdateStructTree({
|
|
|
|
pdfManager,
|
2023-09-28 11:37:35 +02:00
|
|
|
xref,
|
2023-09-11 17:51:22 +02:00
|
|
|
newAnnotationsByPage,
|
|
|
|
})
|
|
|
|
) {
|
|
|
|
structTreeRoot = _structTreeRoot;
|
|
|
|
}
|
|
|
|
|
2023-06-22 19:48:40 +02:00
|
|
|
const imagePromises = AnnotationFactory.generateImages(
|
|
|
|
annotationStorage.values(),
|
|
|
|
xref,
|
|
|
|
pdfManager.evaluatorOptions.isOffscreenCanvasSupported
|
|
|
|
);
|
2023-09-11 17:51:22 +02:00
|
|
|
const newAnnotationPromises =
|
|
|
|
structTreeRoot === undefined ? promises : [];
|
2022-06-15 16:57:33 +02:00
|
|
|
for (const [pageIndex, annotations] of newAnnotationsByPage) {
|
2023-09-11 17:51:22 +02:00
|
|
|
newAnnotationPromises.push(
|
2022-06-15 16:57:33 +02:00
|
|
|
pdfManager.getPage(pageIndex).then(page => {
|
|
|
|
const task = new WorkerTask(`Save (editor): page ${pageIndex}`);
|
|
|
|
return page
|
2024-11-10 20:54:42 +01:00
|
|
|
.saveNewAnnotations(
|
|
|
|
handler,
|
|
|
|
task,
|
|
|
|
annotations,
|
|
|
|
imagePromises,
|
|
|
|
changes
|
|
|
|
)
|
2022-06-15 16:57:33 +02:00
|
|
|
.finally(function () {
|
|
|
|
finishWorkerTask(task);
|
|
|
|
});
|
|
|
|
})
|
|
|
|
);
|
|
|
|
}
|
2023-09-11 17:51:22 +02:00
|
|
|
if (structTreeRoot === null) {
|
|
|
|
// No structTreeRoot exists, so we need to create one.
|
|
|
|
promises.push(
|
2024-11-10 20:54:42 +01:00
|
|
|
Promise.all(newAnnotationPromises).then(async () => {
|
2023-09-11 17:51:22 +02:00
|
|
|
await StructTreeRoot.createStructureTree({
|
|
|
|
newAnnotationsByPage,
|
|
|
|
xref,
|
|
|
|
catalogRef,
|
|
|
|
pdfManager,
|
2024-11-10 20:54:42 +01:00
|
|
|
changes,
|
2023-09-11 17:51:22 +02:00
|
|
|
});
|
|
|
|
})
|
|
|
|
);
|
|
|
|
} else if (structTreeRoot) {
|
|
|
|
promises.push(
|
2024-11-10 20:54:42 +01:00
|
|
|
Promise.all(newAnnotationPromises).then(async () => {
|
2023-09-11 17:51:22 +02:00
|
|
|
await structTreeRoot.updateStructureTree({
|
|
|
|
newAnnotationsByPage,
|
|
|
|
pdfManager,
|
2024-11-10 20:54:42 +01:00
|
|
|
changes,
|
2023-09-11 17:51:22 +02:00
|
|
|
});
|
|
|
|
})
|
|
|
|
);
|
|
|
|
}
|
2022-06-01 15:42:46 +02:00
|
|
|
}
|
|
|
|
|
2021-06-25 14:31:55 +02:00
|
|
|
if (isPureXfa) {
|
|
|
|
promises.push(pdfManager.serializeXfaData(annotationStorage));
|
|
|
|
} else {
|
|
|
|
for (let pageIndex = 0; pageIndex < numPages; pageIndex++) {
|
|
|
|
promises.push(
|
|
|
|
pdfManager.getPage(pageIndex).then(function (page) {
|
|
|
|
const task = new WorkerTask(`Save: page ${pageIndex}`);
|
|
|
|
return page
|
2024-11-10 20:54:42 +01:00
|
|
|
.save(handler, task, annotationStorage, changes)
|
2021-06-25 14:31:55 +02:00
|
|
|
.finally(function () {
|
|
|
|
finishWorkerTask(task);
|
|
|
|
});
|
|
|
|
})
|
|
|
|
);
|
|
|
|
}
|
2020-08-03 19:44:04 +02:00
|
|
|
}
|
2023-09-16 13:06:48 +02:00
|
|
|
const refs = await Promise.all(promises);
|
2020-08-03 19:44:04 +02:00
|
|
|
|
2023-09-16 13:06:48 +02:00
|
|
|
let xfaData = null;
|
|
|
|
if (isPureXfa) {
|
|
|
|
xfaData = refs[0];
|
|
|
|
if (!xfaData) {
|
|
|
|
return stream.bytes;
|
2020-09-09 00:13:52 +02:00
|
|
|
}
|
2024-11-10 20:54:42 +01:00
|
|
|
} else if (changes.size === 0) {
|
|
|
|
// No new refs so just return the initial bytes
|
|
|
|
return stream.bytes;
|
2023-09-16 13:06:48 +02:00
|
|
|
}
|
2020-08-03 19:44:04 +02:00
|
|
|
|
2023-09-16 13:06:48 +02:00
|
|
|
const needAppearances =
|
|
|
|
acroFormRef &&
|
|
|
|
acroForm instanceof Dict &&
|
2024-11-10 20:54:42 +01:00
|
|
|
changes.values().some(ref => ref.needAppearances);
|
2023-09-16 13:06:48 +02:00
|
|
|
|
|
|
|
const xfa = (acroForm instanceof Dict && acroForm.get("XFA")) || null;
|
|
|
|
let xfaDatasetsRef = null;
|
|
|
|
let hasXfaDatasetsEntry = false;
|
|
|
|
if (Array.isArray(xfa)) {
|
|
|
|
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
|
|
|
|
if (xfa[i] === "datasets") {
|
|
|
|
xfaDatasetsRef = xfa[i + 1];
|
|
|
|
hasXfaDatasetsEntry = true;
|
2020-11-29 09:59:03 +01:00
|
|
|
}
|
2023-09-16 13:06:48 +02:00
|
|
|
}
|
|
|
|
if (xfaDatasetsRef === null) {
|
|
|
|
xfaDatasetsRef = xref.getNewTemporaryRef();
|
|
|
|
}
|
|
|
|
} else if (xfa) {
|
|
|
|
// TODO: Support XFA streams.
|
|
|
|
warn("Unsupported XFA type.");
|
|
|
|
}
|
2020-08-03 19:44:04 +02:00
|
|
|
|
2023-09-16 13:06:48 +02:00
|
|
|
let newXrefInfo = Object.create(null);
|
|
|
|
if (xref.trailer) {
|
|
|
|
// Get string info from Info in order to compute fileId.
|
|
|
|
const infoObj = Object.create(null);
|
|
|
|
const xrefInfo = xref.trailer.get("Info") || null;
|
|
|
|
if (xrefInfo instanceof Dict) {
|
2024-11-16 11:51:09 +01:00
|
|
|
for (const [key, value] of xrefInfo) {
|
2023-09-16 13:06:48 +02:00
|
|
|
if (typeof value === "string") {
|
|
|
|
infoObj[key] = stringToPDFString(value);
|
|
|
|
}
|
2024-11-16 11:51:09 +01:00
|
|
|
}
|
2020-11-29 09:59:03 +01:00
|
|
|
}
|
2022-10-18 17:07:47 +02:00
|
|
|
|
2023-09-16 13:06:48 +02:00
|
|
|
newXrefInfo = {
|
2023-09-11 17:51:22 +02:00
|
|
|
rootRef: catalogRef,
|
2023-09-16 13:06:48 +02:00
|
|
|
encryptRef: xref.trailer.getRaw("Encrypt") || null,
|
|
|
|
newRef: xref.getNewTemporaryRef(),
|
|
|
|
infoRef: xref.trailer.getRaw("Info") || null,
|
|
|
|
info: infoObj,
|
|
|
|
fileIds: xref.trailer.get("ID") || null,
|
|
|
|
startXRef: linearization
|
|
|
|
? startXRef
|
2024-07-13 16:20:22 +02:00
|
|
|
: (xref.lastXRefStreamPos ?? startXRef),
|
2023-09-16 13:06:48 +02:00
|
|
|
filename,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
return incrementalUpdate({
|
|
|
|
originalData: stream.bytes,
|
|
|
|
xrefInfo: newXrefInfo,
|
2024-11-10 20:54:42 +01:00
|
|
|
changes,
|
2023-09-16 13:06:48 +02:00
|
|
|
xref,
|
|
|
|
hasXfa: !!xfa,
|
|
|
|
xfaDatasetsRef,
|
|
|
|
hasXfaDatasetsEntry,
|
|
|
|
needAppearances,
|
|
|
|
acroFormRef,
|
|
|
|
acroForm,
|
|
|
|
xfaData,
|
2024-02-08 10:41:51 +01:00
|
|
|
// Use the same kind of XRef as the previous one.
|
|
|
|
useXrefStream: isDict(xref.topDict, "XRef"),
|
2023-09-16 13:06:48 +02:00
|
|
|
}).finally(() => {
|
|
|
|
xref.resetNewTemporaryRef();
|
2020-09-09 11:46:02 +02:00
|
|
|
});
|
2020-11-29 09:59:03 +01:00
|
|
|
}
|
|
|
|
);
|
2020-08-03 19:44:04 +02:00
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetOperatorList", function (data, sink) {
|
2021-04-25 15:51:20 +02:00
|
|
|
const pageIndex = data.pageIndex;
|
2020-10-13 15:52:57 +02:00
|
|
|
pdfManager.getPage(pageIndex).then(function (page) {
|
2021-04-25 15:51:20 +02:00
|
|
|
const task = new WorkerTask(`GetOperatorList: page ${pageIndex}`);
|
2020-10-13 15:52:57 +02:00
|
|
|
startWorkerTask(task);
|
|
|
|
|
|
|
|
// NOTE: Keep this condition in sync with the `info` helper function.
|
|
|
|
const start = verbosity >= VerbosityLevel.INFOS ? Date.now() : 0;
|
|
|
|
|
|
|
|
// Pre compile the pdf page and fetch the fonts/images.
|
|
|
|
page
|
|
|
|
.getOperatorList({
|
|
|
|
handler,
|
|
|
|
sink,
|
|
|
|
task,
|
|
|
|
intent: data.intent,
|
[Regression] Re-factor the *internal* `includeAnnotationStorage` handling, since it's currently subtly wrong
*This patch is very similar to the recently fixed `renderInteractiveForms`-options, see PR 13867.*
As far as I can tell, this *subtle* bug has existed ever since `AnnotationStorage`-support was first added in PR 12106 (a little over a year ago).
The value of the `includeAnnotationStorage`-option, as passed to the `PDFPageProxy.render` method, will (potentially) affect the size/content of the operatorList that's returned from the worker (for documents with forms).
Given that operatorLists will generally, unless they contain huge images, be cached in the API, repeated `PDFPageProxy.render` calls where the form-data has been changed by the user in between, can thus *wrongly* return a cached operatorList.
In the viewer we're only using the `includeAnnotationStorage`-option when printing, which is probably why this has gone unnoticed for so long. Note that we, for performance reasons, don't cache printing-operatorLists in the API.
However, there's nothing stopping an API-user from using the `includeAnnotationStorage`-option during "normal" rendering, which could thus result in *subtle* (and difficult to understand) rendering bugs.
In order to handle this, we need to know if the `AnnotationStorage`-instance has been updated since the last `PDFPageProxy.render` call. The most "correct" solution would obviously be to create a hash of the `AnnotationStorage` contents, however that would require adding a bunch of code, complexity, and runtime overhead.
Given that operatorList caching in the API doesn't have to be perfect[1], but only have to avoid *false* cache-hits, we can simplify things significantly be only keeping track of the last time that the `AnnotationStorage`-data was modified.
*Please note:* While working on this patch, I also noticed that the `renderInteractiveForms`- and `includeAnnotationStorage`-options in the `PDFPageProxy.render` method are mutually exclusive.[2]
Given that the various Annotation-related options in `PDFPageProxy.render` have been added at different times, this has unfortunately led to the current "messy" situation.[3]
---
[1] Note how we're already not caching operatorLists for pages with *huge* images, in order to save memory, hence there's no guarantee that operatorLists will always be cached.
[2] Setting both to `true` will result in undefined behaviour, since trying to insert `AnnotationStorage`-values into fields that are being excluded from the operatorList-building will obviously not work, which isn't at all clear from the documentation.
[3] My intention is to try and fix this in a follow-up PR, and I've got a WIP patch locally, however it will result in a number of API-observable changes.
2021-08-15 19:57:42 +02:00
|
|
|
cacheKey: data.cacheKey,
|
2020-10-13 15:52:57 +02:00
|
|
|
annotationStorage: data.annotationStorage,
|
2024-05-21 14:41:07 +02:00
|
|
|
modifiedIds: data.modifiedIds,
|
2020-10-13 15:52:57 +02:00
|
|
|
})
|
|
|
|
.then(
|
|
|
|
function (operatorListInfo) {
|
|
|
|
finishWorkerTask(task);
|
2019-08-19 13:02:52 +02:00
|
|
|
|
2020-10-13 15:52:57 +02:00
|
|
|
if (start) {
|
|
|
|
info(
|
|
|
|
`page=${pageIndex + 1} - getOperatorList: time=` +
|
|
|
|
`${Date.now() - start}ms, len=${operatorListInfo.length}`
|
|
|
|
);
|
Enable auto-formatting of the entire code-base using Prettier (issue 11444)
Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).
Prettier is being used for a couple of reasons:
- To be consistent with `mozilla-central`, where Prettier is already in use across the tree.
- To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.
Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.
*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.
(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 15:59:37 +01:00
|
|
|
}
|
2020-10-13 15:52:57 +02:00
|
|
|
sink.close();
|
|
|
|
},
|
|
|
|
function (reason) {
|
|
|
|
finishWorkerTask(task);
|
|
|
|
if (task.terminated) {
|
|
|
|
return; // ignoring errors from the terminated thread
|
|
|
|
}
|
|
|
|
sink.error(reason);
|
|
|
|
|
|
|
|
// TODO: Should `reason` be re-thrown here (currently that casues
|
|
|
|
// "Uncaught exception: ..." messages in the console)?
|
|
|
|
}
|
|
|
|
);
|
|
|
|
});
|
|
|
|
});
|
2011-10-09 10:37:53 +02:00
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetTextContent", function (data, sink) {
|
2023-03-23 10:15:14 +01:00
|
|
|
const { pageIndex, includeMarkedContent, disableNormalization } = data;
|
2017-04-17 18:16:53 +05:30
|
|
|
|
2020-04-14 12:28:14 +02:00
|
|
|
pdfManager.getPage(pageIndex).then(function (page) {
|
2021-04-25 15:51:20 +02:00
|
|
|
const task = new WorkerTask("GetTextContent: page " + pageIndex);
|
2015-10-20 20:50:32 -05:00
|
|
|
startWorkerTask(task);
|
[api-minor] Always allow e.g. rendering to continue even if there are errors, and add a `stopAtErrors` parameter to `getDocument` to opt-out of this behaviour (issue 6342, issue 3795, bug 1130815)
Other PDF readers, e.g. Adobe Reader and PDFium (in Chrome), will attempt to render as much of a page as possible even if there are errors present.
Currently we just bail as soon the first error is hit, which means that we'll usually not render anything in these cases and just display a blank page instead.
NOTE: This patch changes the default behaviour of the PDF.js API to always attempt to recover as much data as possible, even when encountering errors during e.g. `getOperatorList`/`getTextContent`, which thus improve our handling of corrupt PDF files and allow the default viewer to handle errors slightly more gracefully.
In the event that an API consumer wishes to use the old behaviour, where we stop parsing as soon as an error is encountered, the `stopAtErrors` parameter can be set at `getDocument`.
Fixes, inasmuch it's possible since the PDF files are corrupt, e.g. issue 6342, issue 3795, and [bug 1130815](https://bugzilla.mozilla.org/show_bug.cgi?id=1130815) (and probably others too).
2017-02-19 14:03:08 +01:00
|
|
|
|
2019-03-02 11:36:30 +01:00
|
|
|
// NOTE: Keep this condition in sync with the `info` helper function.
|
|
|
|
const start = verbosity >= VerbosityLevel.INFOS ? Date.now() : 0;
|
Enable auto-formatting of the entire code-base using Prettier (issue 11444)
Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).
Prettier is being used for a couple of reasons:
- To be consistent with `mozilla-central`, where Prettier is already in use across the tree.
- To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.
Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.
*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.
(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 15:59:37 +01:00
|
|
|
|
2017-04-17 18:16:53 +05:30
|
|
|
page
|
|
|
|
.extractTextContent({
|
Change the signatures of the `PartialEvaluator` "constructor" and its `getOperatorList`/`getTextContent` methods to take parameter objects
Currently these methods accept a large number of parameters, which creates quite unwieldy call-sites. When invoking them, you have to remember not only what arguments to supply, but also the correct order, to avoid runtime errors.
Furthermore, since some of the parameters are optional, you also have to remember to pass e.g. `null` or `undefined` for those ones.
Also, adding new parameters to these methods (which happens occasionally), often becomes unnecessarily tedious (based on personal experience).
Please note that I do *not* think that we need/should convert *every* single method in `evaluator.js` (or elsewhere in `/core` files) to take parameter objects. However, in my opinion, once a method starts relying on approximately five parameter (or even more), passing them in individually becomes quite cumbersome.
With these changes, I obviously needed to update the `evaluator_spec.js` unit-tests. The main change there, except the new method signatures[1], is that it's now re-using *one* `PartialEvalutor` instance, since I couldn't see any compelling reason for creating a new one in every single test.
*Note:* If this patch is accepted, my intention is to (time permitting) see if it makes sense to convert additional methods in `evaluator.js` (and other `/core` files) in a similar fashion, but I figured that it'd be a good idea to limit the initial scope somewhat.
---
[1] A fun fact here, note how the `PartialEvaluator` signature used in `evaluator_spec.js` wasn't even correct in the current `master`.
2017-04-29 23:13:51 +02:00
|
|
|
handler,
|
|
|
|
task,
|
2017-04-17 18:16:53 +05:30
|
|
|
sink,
|
2023-03-30 13:36:42 +02:00
|
|
|
includeMarkedContent,
|
2023-03-23 10:15:14 +01:00
|
|
|
disableNormalization,
|
2017-04-17 18:16:53 +05:30
|
|
|
})
|
|
|
|
.then(
|
2020-04-14 12:28:14 +02:00
|
|
|
function () {
|
2015-10-20 20:50:32 -05:00
|
|
|
finishWorkerTask(task);
|
Enable auto-formatting of the entire code-base using Prettier (issue 11444)
Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).
Prettier is being used for a couple of reasons:
- To be consistent with `mozilla-central`, where Prettier is already in use across the tree.
- To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.
Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.
*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.
(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 15:59:37 +01:00
|
|
|
|
2019-03-02 11:36:30 +01:00
|
|
|
if (start) {
|
|
|
|
info(
|
|
|
|
`page=${pageIndex + 1} - getTextContent: time=` +
|
|
|
|
`${Date.now() - start}ms`
|
|
|
|
);
|
|
|
|
}
|
2017-04-17 18:16:53 +05:30
|
|
|
sink.close();
|
2015-10-20 20:50:32 -05:00
|
|
|
},
|
2020-04-14 12:28:14 +02:00
|
|
|
function (reason) {
|
2015-10-20 20:50:32 -05:00
|
|
|
finishWorkerTask(task);
|
|
|
|
if (task.terminated) {
|
|
|
|
return; // ignoring errors from the terminated thread
|
|
|
|
}
|
2017-04-17 18:16:53 +05:30
|
|
|
sink.error(reason);
|
2019-08-19 13:02:52 +02:00
|
|
|
|
|
|
|
// TODO: Should `reason` be re-thrown here (currently that casues
|
|
|
|
// "Uncaught exception: ..." messages in the console)?
|
2013-02-06 15:19:29 -08:00
|
|
|
}
|
|
|
|
);
|
|
|
|
});
|
2011-12-10 17:24:54 -06:00
|
|
|
});
|
2013-04-12 11:37:49 -07:00
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("GetStructTree", function (data) {
|
2021-04-12 08:52:35 +02:00
|
|
|
return pdfManager.getPage(data.pageIndex).then(function (page) {
|
|
|
|
return pdfManager.ensure(page, "getStructTree");
|
|
|
|
});
|
2021-03-31 15:07:02 -07:00
|
|
|
});
|
|
|
|
|
2020-04-14 12:28:14 +02:00
|
|
|
handler.on("FontFallback", function (data) {
|
Fallback to the built-in font renderer when font loading fails
After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines.
This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2].
Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3].
The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load.
*Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list:
[Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888)
Issue 10175
Issue 10232
---
[1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly.
[2] Glyphs fell back to some default font, which while not accurate was more useful than the current state.
[3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.
2019-02-11 00:47:56 +01:00
|
|
|
return pdfManager.fontFallback(data.id, handler);
|
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("Cleanup", function (data) {
|
2020-05-23 11:21:32 +02:00
|
|
|
return pdfManager.cleanup(/* manuallyTriggered = */ true);
|
2013-11-14 13:43:38 -08:00
|
|
|
});
|
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("Terminate", function (data) {
|
2015-10-20 17:45:55 -05:00
|
|
|
terminated = true;
|
2020-01-16 14:55:08 +01:00
|
|
|
|
|
|
|
const waitOn = [];
|
2015-10-20 17:45:55 -05:00
|
|
|
if (pdfManager) {
|
2019-08-01 16:31:32 +02:00
|
|
|
pdfManager.terminate(new AbortException("Worker was terminated."));
|
2020-01-16 14:55:08 +01:00
|
|
|
|
|
|
|
const cleanupPromise = pdfManager.cleanup();
|
|
|
|
waitOn.push(cleanupPromise);
|
|
|
|
|
2015-10-20 17:45:55 -05:00
|
|
|
pdfManager = null;
|
2020-01-16 14:55:08 +01:00
|
|
|
} else {
|
2022-01-24 16:16:54 +01:00
|
|
|
clearGlobalCaches();
|
2015-10-20 17:45:55 -05:00
|
|
|
}
|
2024-11-02 11:06:30 +01:00
|
|
|
cancelXHRs?.(new AbortException("Worker was terminated."));
|
2015-10-20 20:50:32 -05:00
|
|
|
|
2021-04-24 12:36:01 +02:00
|
|
|
for (const task of WorkerTasks) {
|
2015-10-20 20:50:32 -05:00
|
|
|
waitOn.push(task.finished);
|
|
|
|
task.terminate();
|
2021-04-24 12:36:01 +02:00
|
|
|
}
|
2015-10-20 20:50:32 -05:00
|
|
|
|
2020-04-14 12:28:14 +02:00
|
|
|
return Promise.all(waitOn).then(function () {
|
2015-10-27 12:55:15 -05:00
|
|
|
// Notice that even if we destroying handler, resolved response promise
|
|
|
|
// must be sent back.
|
2017-10-06 11:55:28 -05:00
|
|
|
handler.destroy();
|
2015-10-27 12:55:15 -05:00
|
|
|
handler = null;
|
|
|
|
});
|
2013-04-12 11:37:49 -07:00
|
|
|
});
|
2015-10-27 10:07:20 -05:00
|
|
|
|
2022-11-17 15:12:48 +01:00
|
|
|
handler.on("Ready", function (data) {
|
2015-11-25 18:21:23 -06:00
|
|
|
setupDoc(docParams);
|
|
|
|
docParams = null; // we don't need docParams anymore -- saving memory.
|
|
|
|
});
|
2023-02-22 22:08:21 +01:00
|
|
|
|
|
|
|
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
|
|
|
handler.on("GetXFADatasets", function (data) {
|
|
|
|
return pdfManager.ensureDoc("xfaDatasets");
|
|
|
|
});
|
2023-03-21 18:14:43 +01:00
|
|
|
handler.on("GetXRefPrevValue", function (data) {
|
|
|
|
return pdfManager
|
|
|
|
.ensureXRef("trailer")
|
|
|
|
.then(trailer => trailer.get("Prev"));
|
|
|
|
});
|
2024-02-12 15:31:08 +01:00
|
|
|
handler.on("GetStartXRefPos", function (data) {
|
|
|
|
return pdfManager.ensureDoc("startXRef");
|
|
|
|
});
|
2023-06-15 20:43:57 +02:00
|
|
|
handler.on("GetAnnotArray", function (data) {
|
|
|
|
return pdfManager.getPage(data.pageIndex).then(function (page) {
|
|
|
|
return page.annotations.map(a => a.toString());
|
|
|
|
});
|
|
|
|
});
|
2023-02-22 22:08:21 +01:00
|
|
|
}
|
|
|
|
|
2015-10-27 10:07:20 -05:00
|
|
|
return workerHandlerName;
|
2020-06-16 11:44:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static initializeFromPort(port) {
|
2021-04-25 15:51:20 +02:00
|
|
|
const handler = new MessageHandler("worker", "main", port);
|
2017-05-02 13:03:58 -05:00
|
|
|
WorkerMessageHandler.setup(handler, port);
|
|
|
|
handler.send("ready", null);
|
2020-06-16 11:44:33 +02:00
|
|
|
}
|
|
|
|
}
|
2011-10-25 10:16:20 -07:00
|
|
|
|
2017-05-02 13:03:58 -05:00
|
|
|
function isMessagePort(maybePort) {
|
|
|
|
return (
|
|
|
|
typeof maybePort.postMessage === "function" && "onmessage" in maybePort
|
|
|
|
);
|
2015-12-16 18:37:43 -06:00
|
|
|
}
|
|
|
|
|
2020-05-14 15:55:11 +02:00
|
|
|
// Worker thread (and not Node.js)?
|
2019-11-10 16:42:46 +01:00
|
|
|
if (
|
|
|
|
typeof window === "undefined" &&
|
|
|
|
!isNodeJS &&
|
2017-05-02 13:03:58 -05:00
|
|
|
typeof self !== "undefined" &&
|
|
|
|
isMessagePort(self)
|
|
|
|
) {
|
|
|
|
WorkerMessageHandler.initializeFromPort(self);
|
2011-10-25 15:43:41 -07:00
|
|
|
}
|
2015-11-21 10:32:47 -06:00
|
|
|
|
2021-01-09 15:37:44 +01:00
|
|
|
export { WorkerMessageHandler, WorkerTask };
|