2013-02-06 15:19:29 -08:00
|
|
|
/* Copyright 2012 Mozilla Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2022-04-19 16:53:44 +02:00
|
|
|
import {
|
|
|
|
createValidAbsoluteUrl,
|
2023-02-27 12:27:28 +01:00
|
|
|
FeatureTest,
|
2022-04-19 16:53:44 +02:00
|
|
|
unreachable,
|
|
|
|
warn,
|
|
|
|
} from "../shared/util.js";
|
2020-01-02 12:00:16 +01:00
|
|
|
import { ChunkedStreamManager } from "./chunked_stream.js";
|
2025-01-30 11:46:38 +01:00
|
|
|
import { ImageResizer } from "./image_resizer.js";
|
|
|
|
import { JpegStream } from "./jpeg_stream.js";
|
|
|
|
import { JpxImage } from "./jpx.js";
|
2020-01-02 12:00:16 +01:00
|
|
|
import { MissingDataException } from "./core_utils.js";
|
|
|
|
import { PDFDocument } from "./document.js";
|
|
|
|
import { Stream } from "./stream.js";
|
2015-11-21 10:32:47 -06:00
|
|
|
|
Improve memory usage around the `BasePdfManager.docBaseUrl` parameter (PR 7689 follow-up)
While there is nothing *outright* wrong with the existing implementation, it can however lead to increased memory usage in one particular case (that I completely overlooked when implementing this):
For "data:"-URLs, which by definition contains the entire PDF document and can thus be arbitrarily large, we obviously want to avoid sending, storing, and/or logging the "raw" docBaseUrl in that case.
To address this, this patch makes the following changes:
- Ignore any non-string in the `docBaseUrl` option passed to `getDocument`, since those are unsupported anyway, already on the main-thread.
- Ignore "data:"-URLs in the `docBaseUrl` option passed to `getDocument`, to avoid having to send what could potentially be a *very* long string to the worker-thread.
- Parse the `docBaseUrl` option *directly* in the `BasePdfManager`-constructors, on the worker-thread, to avoid having to store the "raw" docBaseUrl in the first place.
2021-03-16 11:56:39 +01:00
|
|
|
function parseDocBaseUrl(url) {
|
|
|
|
if (url) {
|
|
|
|
const absoluteUrl = createValidAbsoluteUrl(url);
|
|
|
|
if (absoluteUrl) {
|
|
|
|
return absoluteUrl.href;
|
|
|
|
}
|
|
|
|
warn(`Invalid absolute docBaseUrl: "${url}".`);
|
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
class BasePdfManager {
|
2025-01-30 11:46:38 +01:00
|
|
|
constructor({
|
|
|
|
// source,
|
|
|
|
// disableAutoFetch,
|
|
|
|
docBaseUrl,
|
|
|
|
docId,
|
|
|
|
enableXfa,
|
|
|
|
evaluatorOptions,
|
|
|
|
handler,
|
|
|
|
// length,
|
|
|
|
password,
|
|
|
|
// rangeChunkSize,
|
|
|
|
}) {
|
2024-08-12 11:59:13 +02:00
|
|
|
if (
|
|
|
|
(typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) &&
|
|
|
|
this.constructor === BasePdfManager
|
|
|
|
) {
|
2018-06-23 21:34:31 +02:00
|
|
|
unreachable("Cannot initialize BasePdfManager.");
|
|
|
|
}
|
2025-01-30 11:46:38 +01:00
|
|
|
this._docBaseUrl = parseDocBaseUrl(docBaseUrl);
|
|
|
|
this._docId = docId;
|
|
|
|
this._password = password;
|
|
|
|
this.enableXfa = enableXfa;
|
2023-02-27 12:27:28 +01:00
|
|
|
|
2024-11-12 16:56:54 +01:00
|
|
|
// Check `OffscreenCanvas` and `ImageDecoder` support once,
|
|
|
|
// rather than repeatedly throughout the worker-thread code.
|
2025-01-30 11:46:38 +01:00
|
|
|
evaluatorOptions.isOffscreenCanvasSupported &&=
|
2023-02-27 12:27:28 +01:00
|
|
|
FeatureTest.isOffscreenCanvasSupported;
|
2025-01-30 11:46:38 +01:00
|
|
|
evaluatorOptions.isImageDecoderSupported &&=
|
2024-11-12 16:56:54 +01:00
|
|
|
FeatureTest.isImageDecoderSupported;
|
2025-01-30 11:46:38 +01:00
|
|
|
this.evaluatorOptions = Object.freeze(evaluatorOptions);
|
|
|
|
|
|
|
|
// Initially image-options once per document.
|
|
|
|
ImageResizer.setOptions(evaluatorOptions);
|
|
|
|
JpegStream.setOptions(evaluatorOptions);
|
|
|
|
JpxImage.setOptions({ ...evaluatorOptions, handler });
|
2013-02-06 15:19:29 -08:00
|
|
|
}
|
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
get docId() {
|
|
|
|
return this._docId;
|
|
|
|
}
|
2015-10-27 17:48:10 -05:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
get password() {
|
|
|
|
return this._password;
|
|
|
|
}
|
2017-01-03 12:39:38 +01:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
get docBaseUrl() {
|
2023-09-07 14:14:35 +02:00
|
|
|
return this._docBaseUrl;
|
2018-06-23 21:34:31 +02:00
|
|
|
}
|
2016-10-01 12:05:07 +02:00
|
|
|
|
2023-09-11 17:51:22 +02:00
|
|
|
get catalog() {
|
|
|
|
return this.pdfDocument.catalog;
|
|
|
|
}
|
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
ensureDoc(prop, args) {
|
|
|
|
return this.ensure(this.pdfDocument, prop, args);
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
ensureXRef(prop, args) {
|
|
|
|
return this.ensure(this.pdfDocument.xref, prop, args);
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
ensureCatalog(prop, args) {
|
|
|
|
return this.ensure(this.pdfDocument.catalog, prop, args);
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
getPage(pageIndex) {
|
|
|
|
return this.pdfDocument.getPage(pageIndex);
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
Fallback to the built-in font renderer when font loading fails
After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines.
This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2].
Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3].
The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load.
*Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list:
[Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888)
Issue 10175
Issue 10232
---
[1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly.
[2] Glyphs fell back to some default font, which while not accurate was more useful than the current state.
[3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.
2019-02-11 00:47:56 +01:00
|
|
|
fontFallback(id, handler) {
|
|
|
|
return this.pdfDocument.fontFallback(id, handler);
|
|
|
|
}
|
|
|
|
|
2021-03-26 09:28:18 +01:00
|
|
|
loadXfaFonts(handler, task) {
|
|
|
|
return this.pdfDocument.loadXfaFonts(handler, task);
|
|
|
|
}
|
|
|
|
|
2021-07-02 20:05:23 +02:00
|
|
|
loadXfaImages() {
|
|
|
|
return this.pdfDocument.loadXfaImages();
|
|
|
|
}
|
|
|
|
|
2021-06-25 14:31:55 +02:00
|
|
|
serializeXfaData(annotationStorage) {
|
|
|
|
return this.pdfDocument.serializeXfaData(annotationStorage);
|
|
|
|
}
|
|
|
|
|
2020-05-23 11:21:32 +02:00
|
|
|
cleanup(manuallyTriggered = false) {
|
|
|
|
return this.pdfDocument.cleanup(manuallyTriggered);
|
2018-06-23 21:34:31 +02:00
|
|
|
}
|
2013-11-14 13:43:38 -08:00
|
|
|
|
2018-07-30 13:58:09 +02:00
|
|
|
async ensure(obj, prop, args) {
|
2018-06-23 21:34:31 +02:00
|
|
|
unreachable("Abstract method `ensure` called");
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
requestRange(begin, end) {
|
|
|
|
unreachable("Abstract method `requestRange` called");
|
|
|
|
}
|
2013-04-18 10:41:33 -07:00
|
|
|
|
2022-10-25 15:07:12 +02:00
|
|
|
requestLoadedStream(noFetch = false) {
|
2018-06-23 21:34:31 +02:00
|
|
|
unreachable("Abstract method `requestLoadedStream` called");
|
|
|
|
}
|
2013-05-09 17:35:23 -05:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
sendProgressiveData(chunk) {
|
|
|
|
unreachable("Abstract method `sendProgressiveData` called");
|
|
|
|
}
|
2014-09-05 20:02:54 -05:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
updatePassword(password) {
|
|
|
|
this._password = password;
|
|
|
|
}
|
2014-05-02 01:38:49 +02:00
|
|
|
|
2019-08-01 16:31:32 +02:00
|
|
|
terminate(reason) {
|
2018-06-23 21:34:31 +02:00
|
|
|
unreachable("Abstract method `terminate` called");
|
|
|
|
}
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
class LocalPdfManager extends BasePdfManager {
|
2023-02-11 12:34:20 +01:00
|
|
|
constructor(args) {
|
|
|
|
super(args);
|
2018-06-23 21:34:31 +02:00
|
|
|
|
2023-02-11 12:34:20 +01:00
|
|
|
const stream = new Stream(args.source);
|
2017-01-03 12:39:38 +01:00
|
|
|
this.pdfDocument = new PDFDocument(this, stream);
|
2018-06-23 21:34:31 +02:00
|
|
|
this._loadedStreamPromise = Promise.resolve(stream);
|
2013-02-06 15:19:29 -08:00
|
|
|
}
|
|
|
|
|
2018-07-30 13:58:09 +02:00
|
|
|
async ensure(obj, prop, args) {
|
|
|
|
const value = obj[prop];
|
|
|
|
if (typeof value === "function") {
|
|
|
|
return value.apply(obj, args);
|
|
|
|
}
|
|
|
|
return value;
|
2018-06-23 21:34:31 +02:00
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
requestRange(begin, end) {
|
|
|
|
return Promise.resolve();
|
|
|
|
}
|
2013-04-18 10:41:33 -07:00
|
|
|
|
2022-10-25 15:07:12 +02:00
|
|
|
requestLoadedStream(noFetch = false) {
|
2018-06-23 21:34:31 +02:00
|
|
|
return this._loadedStreamPromise;
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2019-08-01 16:31:32 +02:00
|
|
|
terminate(reason) {}
|
2018-06-23 21:34:31 +02:00
|
|
|
}
|
2013-09-25 16:25:41 -05:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
class NetworkPdfManager extends BasePdfManager {
|
2023-02-11 12:34:20 +01:00
|
|
|
constructor(args) {
|
|
|
|
super(args);
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2023-02-11 12:34:20 +01:00
|
|
|
this.streamManager = new ChunkedStreamManager(args.source, {
|
|
|
|
msgHandler: args.handler,
|
2016-02-09 14:55:11 -06:00
|
|
|
length: args.length,
|
2013-11-18 11:17:26 -08:00
|
|
|
disableAutoFetch: args.disableAutoFetch,
|
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on
*Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.*
http://eslint.org/docs/rules/comma-dangle
http://eslint.org/docs/rules/object-curly-spacing
Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead.
Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch.
```diff
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index abab9027..dcd3594b 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() {
t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, };
t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, };
t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, };
- t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, };
+ t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1,
+ variableArgs: false, };
t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, };
t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, };
t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, };
diff --git a/src/core/jbig2.js b/src/core/jbig2.js
index 5a17d482..71671541 100644
--- a/src/core/jbig2.js
+++ b/src/core/jbig2.js
@@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() {
{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, },
{ x: -1, y: 0, }],
[{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, },
- { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }]
+ { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, },
+ { x: -1, y: 0, }]
];
var RefinementTemplates = [
{
coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }],
- reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, },
- { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }],
+ reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, },
+ { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, },
+ { x: 0, y: 1, }, { x: 1, y: 1, }],
},
{
- coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }],
- reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, },
- { x: 0, y: 1, }, { x: 1, y: 1, }],
+ coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, },
+ { x: -1, y: 0, }],
+ reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, },
+ { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }],
}
];
```
2017-06-02 11:16:24 +02:00
|
|
|
rangeChunkSize: args.rangeChunkSize,
|
2018-06-23 21:34:31 +02:00
|
|
|
});
|
2017-01-03 12:39:38 +01:00
|
|
|
this.pdfDocument = new PDFDocument(this, this.streamManager.getStream());
|
2013-02-06 15:19:29 -08:00
|
|
|
}
|
|
|
|
|
2018-07-30 13:58:09 +02:00
|
|
|
async ensure(obj, prop, args) {
|
|
|
|
try {
|
|
|
|
const value = obj[prop];
|
|
|
|
if (typeof value === "function") {
|
|
|
|
return value.apply(obj, args);
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
} catch (ex) {
|
|
|
|
if (!(ex instanceof MissingDataException)) {
|
|
|
|
throw ex;
|
|
|
|
}
|
|
|
|
await this.requestRange(ex.begin, ex.end);
|
|
|
|
return this.ensure(obj, prop, args);
|
|
|
|
}
|
2018-06-23 21:34:31 +02:00
|
|
|
}
|
2013-04-18 10:41:33 -07:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
requestRange(begin, end) {
|
|
|
|
return this.streamManager.requestRange(begin, end);
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2022-10-25 15:07:12 +02:00
|
|
|
requestLoadedStream(noFetch = false) {
|
|
|
|
return this.streamManager.requestAllChunks(noFetch);
|
2018-06-23 21:34:31 +02:00
|
|
|
}
|
2014-09-05 20:02:54 -05:00
|
|
|
|
2018-06-23 21:34:31 +02:00
|
|
|
sendProgressiveData(chunk) {
|
|
|
|
this.streamManager.onReceiveData({ chunk });
|
|
|
|
}
|
2013-02-06 15:19:29 -08:00
|
|
|
|
2019-08-01 16:31:32 +02:00
|
|
|
terminate(reason) {
|
|
|
|
this.streamManager.abort(reason);
|
2018-06-23 21:34:31 +02:00
|
|
|
}
|
|
|
|
}
|
2015-11-21 10:32:47 -06:00
|
|
|
|
2017-04-02 16:14:30 +02:00
|
|
|
export { LocalPdfManager, NetworkPdfManager };
|