1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-26 10:08:06 +02:00

Merge pull request #11601 from Snuffleupagus/rm-nativeImageDecoderSupport

[api-minor] Decode all JPEG images with the built-in PDF.js decoder in `src/core/jpg.js`
This commit is contained in:
Tim van der Meij 2020-05-23 15:33:46 +02:00 committed by GitHub
commit 3b615e4ca3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 57 additions and 682 deletions

View file

@ -25,7 +25,6 @@ import {
isArrayEqual,
isNum,
isString,
NativeImageDecoding,
OPS,
stringToPDFString,
TextRenderingMode,
@ -80,18 +79,14 @@ import { DecodeStream } from "./stream.js";
import { getGlyphsUnicode } from "./glyphlist.js";
import { getMetrics } from "./metrics.js";
import { isPDFFunction } from "./function.js";
import { JpegStream } from "./jpeg_stream.js";
import { MurmurHash3_64 } from "./murmurhash3.js";
import { NativeImageDecoder } from "./image_utils.js";
import { OperatorList } from "./operator_list.js";
import { PDFImage } from "./image.js";
var PartialEvaluator = (function PartialEvaluatorClosure() {
const DefaultPartialEvaluatorOptions = {
forceDataSchema: false,
maxImageSize: -1,
disableFontFace: false,
nativeImageDecoderSupport: NativeImageDecoding.DECODE,
ignoreErrors: false,
isEvalSupported: true,
fontExtraProperties: false,
@ -450,7 +445,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList,
cacheKey,
imageCache,
forceDisableNativeImageDecoder = false,
}) {
var dict = image.dict;
const imageRef = dict.objId;
@ -510,13 +504,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var SMALL_IMAGE_DIMENSIONS = 200;
// Inlining small images into the queue as RGB data
if (
isInline &&
!softMask &&
!mask &&
!(image instanceof JpegStream) &&
w + h < SMALL_IMAGE_DIMENSIONS
) {
if (isInline && !softMask && !mask && w + h < SMALL_IMAGE_DIMENSIONS) {
const imageObj = new PDFImage({
xref: this.xref,
res: resources,
@ -531,20 +519,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return undefined;
}
let nativeImageDecoderSupport = forceDisableNativeImageDecoder
? NativeImageDecoding.NONE
: this.options.nativeImageDecoderSupport;
// If there is no imageMask, create the PDFImage and a lot
// of image processing can be done here.
let objId = `img_${this.idFactory.createObjId()}`,
cacheGlobally = false;
if (this.parsingType3Font) {
assert(
nativeImageDecoderSupport === NativeImageDecoding.NONE,
"Type3 image resources should be completely decoded in the worker."
);
objId = `${this.idFactory.getDocId()}_type3res_${objId}`;
} else if (imageRef) {
cacheGlobally = this.globalImageCache.shouldCache(
@ -553,102 +533,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
);
if (cacheGlobally) {
// Ensure that the image is *completely* decoded on the worker-thread,
// in order to simplify the caching/rendering code on the main-thread.
nativeImageDecoderSupport = NativeImageDecoding.NONE;
objId = `${this.idFactory.getDocId()}_${objId}`;
}
}
if (
nativeImageDecoderSupport !== NativeImageDecoding.NONE &&
!softMask &&
!mask &&
image instanceof JpegStream &&
image.maybeValidDimensions &&
NativeImageDecoder.isSupported(
image,
this.xref,
resources,
this.pdfFunctionFactory
)
) {
// These JPEGs don't need any more processing so we can just send it.
return this.handler
.sendWithPromise("obj", [
objId,
this.pageIndex,
"JpegStream",
image.getIR(this.options.forceDataSchema),
])
.then(
() => {
// Only add the dependency once we know that the native JPEG
// decoding succeeded, to ensure that rendering will always
// complete.
operatorList.addDependency(objId);
args = [objId, w, h];
operatorList.addOp(OPS.paintJpegXObject, args);
if (cacheKey) {
imageCache[cacheKey] = {
fn: OPS.paintJpegXObject,
args,
};
if (imageRef) {
this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
}
}
},
reason => {
warn(
"Native JPEG decoding failed -- trying to recover: " +
(reason && reason.message)
);
// Try to decode the JPEG image with the built-in decoder instead.
return this.buildPaintImageXObject({
resources,
image,
isInline,
operatorList,
cacheKey,
imageCache,
forceDisableNativeImageDecoder: true,
});
}
);
}
// Creates native image decoder only if a JPEG image or mask is present.
var nativeImageDecoder = null;
if (
nativeImageDecoderSupport === NativeImageDecoding.DECODE &&
(image instanceof JpegStream ||
mask instanceof JpegStream ||
softMask instanceof JpegStream)
) {
nativeImageDecoder = new NativeImageDecoder({
xref: this.xref,
resources,
handler: this.handler,
forceDataSchema: this.options.forceDataSchema,
pdfFunctionFactory: this.pdfFunctionFactory,
});
}
// Ensure that the dependency is added before the image is decoded.
operatorList.addDependency(objId);
args = [objId, w, h];
const imgPromise = PDFImage.buildImage({
handler: this.handler,
xref: this.xref,
res: resources,
image,
isInline,
nativeDecoder: nativeImageDecoder,
pdfFunctionFactory: this.pdfFunctionFactory,
})
.then(imageObj => {
@ -3393,7 +3290,6 @@ class TranslatedFont {
// the rendering code on the main-thread (see issue10717.pdf).
var type3Options = Object.create(evaluator.options);
type3Options.ignoreErrors = false;
type3Options.nativeImageDecoderSupport = NativeImageDecoding.NONE;
var type3Evaluator = evaluator.clone(type3Options);
type3Evaluator.parsingType3Font = true;

View file

@ -21,23 +21,6 @@ import { JpegStream } from "./jpeg_stream.js";
import { JpxImage } from "./jpx.js";
var PDFImage = (function PDFImageClosure() {
/**
* Decodes the image using native decoder if possible. Resolves the promise
* when the image data is ready.
*/
function handleImageData(image, nativeDecoder) {
if (nativeDecoder && nativeDecoder.canDecode(image)) {
return nativeDecoder.decode(image).catch(reason => {
warn(
"Native image decoding failed -- trying to recover: " +
(reason && reason.message)
);
return image;
});
}
return Promise.resolve(image);
}
/**
* Decode and clamp a value. The formula is different from the spec because we
* don't decode to float range [0,1], we decode it in the [0,max] range.
@ -266,51 +249,38 @@ var PDFImage = (function PDFImageClosure() {
* with a PDFImage when the image is ready to be used.
*/
PDFImage.buildImage = function ({
handler,
xref,
res,
image,
isInline = false,
nativeDecoder = null,
pdfFunctionFactory,
}) {
var imagePromise = handleImageData(image, nativeDecoder);
var smaskPromise;
var maskPromise;
const imageData = image;
let smaskData = null;
let maskData = null;
var smask = image.dict.get("SMask");
var mask = image.dict.get("Mask");
const smask = image.dict.get("SMask");
const mask = image.dict.get("Mask");
if (smask) {
smaskPromise = handleImageData(smask, nativeDecoder);
maskPromise = Promise.resolve(null);
} else {
smaskPromise = Promise.resolve(null);
if (mask) {
if (isStream(mask)) {
maskPromise = handleImageData(mask, nativeDecoder);
} else if (Array.isArray(mask)) {
maskPromise = Promise.resolve(mask);
} else {
warn("Unsupported mask format.");
maskPromise = Promise.resolve(null);
}
smaskData = smask;
} else if (mask) {
if (isStream(mask) || Array.isArray(mask)) {
maskData = mask;
} else {
maskPromise = Promise.resolve(null);
warn("Unsupported mask format.");
}
}
return Promise.all([imagePromise, smaskPromise, maskPromise]).then(
function ([imageData, smaskData, maskData]) {
return new PDFImage({
xref,
res,
image: imageData,
isInline,
smask: smaskData,
mask: maskData,
pdfFunctionFactory,
});
}
return Promise.resolve(
new PDFImage({
xref,
res,
image: imageData,
isInline,
smask: smaskData,
mask: maskData,
pdfFunctionFactory,
})
);
};

View file

@ -15,103 +15,7 @@
/* eslint no-var: error */
import { assert, info, shadow } from "../shared/util.js";
import { ColorSpace } from "./colorspace.js";
import { JpegStream } from "./jpeg_stream.js";
import { RefSetCache } from "./primitives.js";
import { Stream } from "./stream.js";
class NativeImageDecoder {
constructor({
xref,
resources,
handler,
forceDataSchema = false,
pdfFunctionFactory,
}) {
this.xref = xref;
this.resources = resources;
this.handler = handler;
this.forceDataSchema = forceDataSchema;
this.pdfFunctionFactory = pdfFunctionFactory;
}
canDecode(image) {
return (
image instanceof JpegStream &&
image.maybeValidDimensions &&
NativeImageDecoder.isDecodable(
image,
this.xref,
this.resources,
this.pdfFunctionFactory
)
);
}
decode(image) {
// For natively supported JPEGs send them to the main thread for decoding.
const dict = image.dict;
let colorSpace = dict.get("ColorSpace", "CS");
colorSpace = ColorSpace.parse(
colorSpace,
this.xref,
this.resources,
this.pdfFunctionFactory
);
return this.handler
.sendWithPromise("JpegDecode", [
image.getIR(this.forceDataSchema),
colorSpace.numComps,
])
.then(function ({ data, width, height }) {
return new Stream(data, 0, data.length, dict);
});
}
/**
* Checks if the image can be decoded and displayed by the browser without any
* further processing such as color space conversions.
*/
static isSupported(image, xref, res, pdfFunctionFactory) {
const dict = image.dict;
if (dict.has("DecodeParms") || dict.has("DP")) {
return false;
}
const cs = ColorSpace.parse(
dict.get("ColorSpace", "CS"),
xref,
res,
pdfFunctionFactory
);
// isDefaultDecode() of DeviceGray and DeviceRGB needs no `bpc` argument.
return (
(cs.name === "DeviceGray" || cs.name === "DeviceRGB") &&
cs.isDefaultDecode(dict.getArray("Decode", "D"))
);
}
/**
* Checks if the image can be decoded by the browser.
*/
static isDecodable(image, xref, res, pdfFunctionFactory) {
const dict = image.dict;
if (dict.has("DecodeParms") || dict.has("DP")) {
return false;
}
const cs = ColorSpace.parse(
dict.get("ColorSpace", "CS"),
xref,
res,
pdfFunctionFactory
);
const bpc = dict.get("BitsPerComponent", "BPC") || 1;
return (
(cs.numComps === 1 || cs.numComps === 3) &&
cs.isDefaultDecode(dict.getArray("Decode", "D"), bpc)
);
}
}
class GlobalImageCache {
static get NUM_PAGES_THRESHOLD() {
@ -207,4 +111,4 @@ class GlobalImageCache {
}
}
export { NativeImageDecoder, GlobalImageCache };
export { GlobalImageCache };

View file

@ -13,17 +13,14 @@
* limitations under the License.
*/
import { createObjectURL, shadow } from "../shared/util.js";
import { DecodeStream } from "./stream.js";
import { isDict } from "./primitives.js";
import { JpegImage } from "./jpg.js";
import { shadow } from "../shared/util.js";
/**
* Depending on the type of JPEG a JpegStream is handled in different ways. For
* JPEG's that are supported natively such as DeviceGray and DeviceRGB the image
* data is stored and then loaded by the browser. For unsupported JPEG's we use
* a library to decode these images and the stream behaves like all the other
* DecodeStreams.
* For JPEG's we use a library to decode these images and the stream behaves
* like all the other DecodeStreams.
*/
const JpegStream = (function JpegStreamClosure() {
// eslint-disable-next-line no-shadow
@ -110,150 +107,6 @@ const JpegStream = (function JpegStreamClosure() {
this.eof = true;
};
Object.defineProperty(JpegStream.prototype, "maybeValidDimensions", {
get: function JpegStream_maybeValidDimensions() {
const { dict, stream } = this;
const dictHeight = dict.get("Height", "H");
const startPos = stream.pos;
let validDimensions = true,
foundSOF = false,
b;
while ((b = stream.getByte()) !== -1) {
if (b !== 0xff) {
// Not a valid marker.
continue;
}
switch (stream.getByte()) {
case 0xc0: // SOF0
case 0xc1: // SOF1
case 0xc2: // SOF2
// These three SOF{n} markers are the only ones that the built-in
// PDF.js JPEG decoder currently supports.
foundSOF = true;
stream.pos += 2; // Skip marker length.
stream.pos += 1; // Skip precision.
const scanLines = stream.getUint16();
const samplesPerLine = stream.getUint16();
// Letting the browser handle the JPEG decoding, on the main-thread,
// will cause a *large* increase in peak memory usage since there's
// a handful of short-lived copies of the image data. For very big
// JPEG images, always let the PDF.js image decoder handle them to
// reduce overall memory usage during decoding (see issue 11694).
if (scanLines * samplesPerLine > 1e6) {
validDimensions = false;
break;
}
// The "normal" case, where the image data and dictionary agrees.
if (scanLines === dictHeight) {
break;
}
// A DNL (Define Number of Lines) marker is expected,
// which browsers (usually) cannot decode natively.
if (scanLines === 0) {
validDimensions = false;
break;
}
// The dimensions of the image, among other properties, should
// always be taken from the image data *itself* rather than the
// XObject dictionary. However there's cases of corrupt images that
// browsers cannot decode natively, for example:
// - JPEG images with DNL markers, where the SOF `scanLines`
// parameter has an unexpected value (see issue 8614).
// - JPEG images with too large SOF `scanLines` parameter, where
// the EOI marker is encountered prematurely (see issue 10880).
// In an attempt to handle these kinds of corrupt images, compare
// the dimensions in the image data with the dictionary and *always*
// let the PDF.js JPEG decoder (rather than the browser) handle the
// image if the difference is larger than one order of magnitude
// (since that would generally suggest that something is off).
if (scanLines > dictHeight * 10) {
validDimensions = false;
break;
}
break;
case 0xc3: // SOF3
/* falls through */
case 0xc5: // SOF5
case 0xc6: // SOF6
case 0xc7: // SOF7
/* falls through */
case 0xc9: // SOF9
case 0xca: // SOF10
case 0xcb: // SOF11
/* falls through */
case 0xcd: // SOF13
case 0xce: // SOF14
case 0xcf: // SOF15
foundSOF = true;
break;
case 0xc4: // DHT
case 0xcc: // DAC
/* falls through */
case 0xda: // SOS
case 0xdb: // DQT
case 0xdc: // DNL
case 0xdd: // DRI
case 0xde: // DHP
case 0xdf: // EXP
/* falls through */
case 0xe0: // APP0
case 0xe1: // APP1
case 0xe2: // APP2
case 0xe3: // APP3
case 0xe4: // APP4
case 0xe5: // APP5
case 0xe6: // APP6
case 0xe7: // APP7
case 0xe8: // APP8
case 0xe9: // APP9
case 0xea: // APP10
case 0xeb: // APP11
case 0xec: // APP12
case 0xed: // APP13
case 0xee: // APP14
case 0xef: // APP15
/* falls through */
case 0xfe: // COM
const markerLength = stream.getUint16();
if (markerLength > 2) {
stream.skip(markerLength - 2); // Jump to the next marker.
} else {
// The marker length is invalid, resetting the stream position.
stream.skip(-2);
}
break;
case 0xff: // Fill byte.
// Avoid skipping a valid marker, resetting the stream position.
stream.skip(-1);
break;
case 0xd9: // EOI
foundSOF = true;
break;
}
if (foundSOF) {
break;
}
}
// Finally, don't forget to reset the stream position.
stream.pos = startPos;
return shadow(this, "maybeValidDimensions", validDimensions);
},
configurable: true,
});
JpegStream.prototype.getIR = function (forceDataSchema = false) {
return createObjectURL(this.bytes, "image/jpeg", forceDataSchema);
};
return JpegStream;
})();

View file

@ -399,10 +399,8 @@ var WorkerMessageHandler = {
ensureNotTerminated();
var evaluatorOptions = {
forceDataSchema: data.disableCreateObjectURL,
maxImageSize: data.maxImageSize,
disableFontFace: data.disableFontFace,
nativeImageDecoderSupport: data.nativeImageDecoderSupport,
ignoreErrors: data.ignoreErrors,
isEvalSupported: data.isEvalSupported,
fontExtraProperties: data.fontExtraProperties,