Merge pull request #11601 from Snuffleupagus/rm-nativeImageDecoderSupport

[api-minor] Decode all JPEG images with the built-in PDF.js decoder in `src/core/jpg.js`
2025-04-26 10:08:06 +02:00 · 2020-05-23 15:33:46 +02:00 · 2020-05-23 15:33:46 +02:00 · 3b615e4ca3
commit 3b615e4ca3
parent cd6d089489 ebef67b354
23 changed files with 57 additions and 682 deletions
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@ -25,7 +25,6 @@ import {
  isArrayEqual,
  isNum,
  isString,
-  NativeImageDecoding,
  OPS,
  stringToPDFString,
  TextRenderingMode,
@ -80,18 +79,14 @@ import { DecodeStream } from "./stream.js";
 import { getGlyphsUnicode } from "./glyphlist.js";
 import { getMetrics } from "./metrics.js";
 import { isPDFFunction } from "./function.js";
-import { JpegStream } from "./jpeg_stream.js";
 import { MurmurHash3_64 } from "./murmurhash3.js";
-import { NativeImageDecoder } from "./image_utils.js";
 import { OperatorList } from "./operator_list.js";
 import { PDFImage } from "./image.js";

 var PartialEvaluator = (function PartialEvaluatorClosure() {
  const DefaultPartialEvaluatorOptions = {
-    forceDataSchema: false,
    maxImageSize: -1,
    disableFontFace: false,
-    nativeImageDecoderSupport: NativeImageDecoding.DECODE,
    ignoreErrors: false,
    isEvalSupported: true,
    fontExtraProperties: false,
@ -450,7 +445,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
      operatorList,
      cacheKey,
      imageCache,
-      forceDisableNativeImageDecoder = false,
    }) {
      var dict = image.dict;
      const imageRef = dict.objId;
@ -510,13 +504,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {

      var SMALL_IMAGE_DIMENSIONS = 200;
      // Inlining small images into the queue as RGB data
-      if (
-        isInline &&
-        !softMask &&
-        !mask &&
-        !(image instanceof JpegStream) &&
-        w + h < SMALL_IMAGE_DIMENSIONS
-      ) {
+      if (isInline && !softMask && !mask && w + h < SMALL_IMAGE_DIMENSIONS) {
        const imageObj = new PDFImage({
          xref: this.xref,
          res: resources,
@ -531,20 +519,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
        return undefined;
      }

-      let nativeImageDecoderSupport = forceDisableNativeImageDecoder
-        ? NativeImageDecoding.NONE
-        : this.options.nativeImageDecoderSupport;
      // If there is no imageMask, create the PDFImage and a lot
      // of image processing can be done here.
      let objId = `img_${this.idFactory.createObjId()}`,
        cacheGlobally = false;

      if (this.parsingType3Font) {
-        assert(
-          nativeImageDecoderSupport === NativeImageDecoding.NONE,
-          "Type3 image resources should be completely decoded in the worker."
-        );
-
        objId = `${this.idFactory.getDocId()}_type3res_${objId}`;
      } else if (imageRef) {
        cacheGlobally = this.globalImageCache.shouldCache(
@ -553,102 +533,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
        );

        if (cacheGlobally) {
-          // Ensure that the image is *completely* decoded on the worker-thread,
-          // in order to simplify the caching/rendering code on the main-thread.
-          nativeImageDecoderSupport = NativeImageDecoding.NONE;
-
          objId = `${this.idFactory.getDocId()}_${objId}`;
        }
      }

-      if (
-        nativeImageDecoderSupport !== NativeImageDecoding.NONE &&
-        !softMask &&
-        !mask &&
-        image instanceof JpegStream &&
-        image.maybeValidDimensions &&
-        NativeImageDecoder.isSupported(
-          image,
-          this.xref,
-          resources,
-          this.pdfFunctionFactory
-        )
-      ) {
-        // These JPEGs don't need any more processing so we can just send it.
-        return this.handler
-          .sendWithPromise("obj", [
-            objId,
-            this.pageIndex,
-            "JpegStream",
-            image.getIR(this.options.forceDataSchema),
-          ])
-          .then(
-            () => {
-              // Only add the dependency once we know that the native JPEG
-              // decoding succeeded, to ensure that rendering will always
-              // complete.
-              operatorList.addDependency(objId);
-              args = [objId, w, h];
-
-              operatorList.addOp(OPS.paintJpegXObject, args);
-              if (cacheKey) {
-                imageCache[cacheKey] = {
-                  fn: OPS.paintJpegXObject,
-                  args,
-                };
-
-                if (imageRef) {
-                  this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
-                }
-              }
-            },
-            reason => {
-              warn(
-                "Native JPEG decoding failed -- trying to recover: " +
-                  (reason && reason.message)
-              );
-              // Try to decode the JPEG image with the built-in decoder instead.
-              return this.buildPaintImageXObject({
-                resources,
-                image,
-                isInline,
-                operatorList,
-                cacheKey,
-                imageCache,
-                forceDisableNativeImageDecoder: true,
-              });
-            }
-          );
-      }
-
-      // Creates native image decoder only if a JPEG image or mask is present.
-      var nativeImageDecoder = null;
-      if (
-        nativeImageDecoderSupport === NativeImageDecoding.DECODE &&
-        (image instanceof JpegStream ||
-          mask instanceof JpegStream ||
-          softMask instanceof JpegStream)
-      ) {
-        nativeImageDecoder = new NativeImageDecoder({
-          xref: this.xref,
-          resources,
-          handler: this.handler,
-          forceDataSchema: this.options.forceDataSchema,
-          pdfFunctionFactory: this.pdfFunctionFactory,
-        });
-      }
-
      // Ensure that the dependency is added before the image is decoded.
      operatorList.addDependency(objId);
      args = [objId, w, h];

      const imgPromise = PDFImage.buildImage({
-        handler: this.handler,
        xref: this.xref,
        res: resources,
        image,
        isInline,
-        nativeDecoder: nativeImageDecoder,
        pdfFunctionFactory: this.pdfFunctionFactory,
      })
        .then(imageObj => {
@ -3393,7 +3290,6 @@ class TranslatedFont {
    // the rendering code on the main-thread (see issue10717.pdf).
    var type3Options = Object.create(evaluator.options);
    type3Options.ignoreErrors = false;
-    type3Options.nativeImageDecoderSupport = NativeImageDecoding.NONE;
    var type3Evaluator = evaluator.clone(type3Options);
    type3Evaluator.parsingType3Font = true;

--- a/src/core/image.js
+++ b/src/core/image.js
@ -21,23 +21,6 @@ import { JpegStream } from "./jpeg_stream.js";
 import { JpxImage } from "./jpx.js";

 var PDFImage = (function PDFImageClosure() {
-  /**
-   * Decodes the image using native decoder if possible. Resolves the promise
-   * when the image data is ready.
-   */
-  function handleImageData(image, nativeDecoder) {
-    if (nativeDecoder && nativeDecoder.canDecode(image)) {
-      return nativeDecoder.decode(image).catch(reason => {
-        warn(
-          "Native image decoding failed -- trying to recover: " +
-            (reason && reason.message)
-        );
-        return image;
-      });
-    }
-    return Promise.resolve(image);
-  }
-
  /**
   * Decode and clamp a value. The formula is different from the spec because we
   * don't decode to float range [0,1], we decode it in the [0,max] range.
@ -266,51 +249,38 @@ var PDFImage = (function PDFImageClosure() {
   * with a PDFImage when the image is ready to be used.
   */
  PDFImage.buildImage = function ({
-    handler,
    xref,
    res,
    image,
    isInline = false,
-    nativeDecoder = null,
    pdfFunctionFactory,
  }) {
-    var imagePromise = handleImageData(image, nativeDecoder);
-    var smaskPromise;
-    var maskPromise;
+    const imageData = image;
+    let smaskData = null;
+    let maskData = null;

-    var smask = image.dict.get("SMask");
-    var mask = image.dict.get("Mask");
+    const smask = image.dict.get("SMask");
+    const mask = image.dict.get("Mask");

    if (smask) {
-      smaskPromise = handleImageData(smask, nativeDecoder);
-      maskPromise = Promise.resolve(null);
-    } else {
-      smaskPromise = Promise.resolve(null);
-      if (mask) {
-        if (isStream(mask)) {
-          maskPromise = handleImageData(mask, nativeDecoder);
-        } else if (Array.isArray(mask)) {
-          maskPromise = Promise.resolve(mask);
-        } else {
-          warn("Unsupported mask format.");
-          maskPromise = Promise.resolve(null);
-        }
+      smaskData = smask;
+    } else if (mask) {
+      if (isStream(mask) || Array.isArray(mask)) {
+        maskData = mask;
      } else {
-        maskPromise = Promise.resolve(null);
+        warn("Unsupported mask format.");
      }
    }
-    return Promise.all([imagePromise, smaskPromise, maskPromise]).then(
-      function ([imageData, smaskData, maskData]) {
-        return new PDFImage({
-          xref,
-          res,
-          image: imageData,
-          isInline,
-          smask: smaskData,
-          mask: maskData,
-          pdfFunctionFactory,
-        });
-      }
+    return Promise.resolve(
+      new PDFImage({
+        xref,
+        res,
+        image: imageData,
+        isInline,
+        smask: smaskData,
+        mask: maskData,
+        pdfFunctionFactory,
+      })
    );
  };

--- a/src/core/image_utils.js
+++ b/src/core/image_utils.js
@ -15,103 +15,7 @@
 /* eslint no-var: error */

 import { assert, info, shadow } from "../shared/util.js";
-import { ColorSpace } from "./colorspace.js";
-import { JpegStream } from "./jpeg_stream.js";
 import { RefSetCache } from "./primitives.js";
-import { Stream } from "./stream.js";
-
-class NativeImageDecoder {
-  constructor({
-    xref,
-    resources,
-    handler,
-    forceDataSchema = false,
-    pdfFunctionFactory,
-  }) {
-    this.xref = xref;
-    this.resources = resources;
-    this.handler = handler;
-    this.forceDataSchema = forceDataSchema;
-    this.pdfFunctionFactory = pdfFunctionFactory;
-  }
-
-  canDecode(image) {
-    return (
-      image instanceof JpegStream &&
-      image.maybeValidDimensions &&
-      NativeImageDecoder.isDecodable(
-        image,
-        this.xref,
-        this.resources,
-        this.pdfFunctionFactory
-      )
-    );
-  }
-
-  decode(image) {
-    // For natively supported JPEGs send them to the main thread for decoding.
-    const dict = image.dict;
-    let colorSpace = dict.get("ColorSpace", "CS");
-    colorSpace = ColorSpace.parse(
-      colorSpace,
-      this.xref,
-      this.resources,
-      this.pdfFunctionFactory
-    );
-
-    return this.handler
-      .sendWithPromise("JpegDecode", [
-        image.getIR(this.forceDataSchema),
-        colorSpace.numComps,
-      ])
-      .then(function ({ data, width, height }) {
-        return new Stream(data, 0, data.length, dict);
-      });
-  }
-
-  /**
-   * Checks if the image can be decoded and displayed by the browser without any
-   * further processing such as color space conversions.
-   */
-  static isSupported(image, xref, res, pdfFunctionFactory) {
-    const dict = image.dict;
-    if (dict.has("DecodeParms") || dict.has("DP")) {
-      return false;
-    }
-    const cs = ColorSpace.parse(
-      dict.get("ColorSpace", "CS"),
-      xref,
-      res,
-      pdfFunctionFactory
-    );
-    // isDefaultDecode() of DeviceGray and DeviceRGB needs no `bpc` argument.
-    return (
-      (cs.name === "DeviceGray" || cs.name === "DeviceRGB") &&
-      cs.isDefaultDecode(dict.getArray("Decode", "D"))
-    );
-  }
-
-  /**
-   * Checks if the image can be decoded by the browser.
-   */
-  static isDecodable(image, xref, res, pdfFunctionFactory) {
-    const dict = image.dict;
-    if (dict.has("DecodeParms") || dict.has("DP")) {
-      return false;
-    }
-    const cs = ColorSpace.parse(
-      dict.get("ColorSpace", "CS"),
-      xref,
-      res,
-      pdfFunctionFactory
-    );
-    const bpc = dict.get("BitsPerComponent", "BPC") || 1;
-    return (
-      (cs.numComps === 1 || cs.numComps === 3) &&
-      cs.isDefaultDecode(dict.getArray("Decode", "D"), bpc)
-    );
-  }
-}

 class GlobalImageCache {
  static get NUM_PAGES_THRESHOLD() {
@ -207,4 +111,4 @@ class GlobalImageCache {
  }
 }

-export { NativeImageDecoder, GlobalImageCache };
+export { GlobalImageCache };
--- a/src/core/jpeg_stream.js
+++ b/src/core/jpeg_stream.js
@ -13,17 +13,14 @@
 * limitations under the License.
 */

-import { createObjectURL, shadow } from "../shared/util.js";
 import { DecodeStream } from "./stream.js";
 import { isDict } from "./primitives.js";
 import { JpegImage } from "./jpg.js";
+import { shadow } from "../shared/util.js";

 /**
- * Depending on the type of JPEG a JpegStream is handled in different ways. For
- * JPEG's that are supported natively such as DeviceGray and DeviceRGB the image
- * data is stored and then loaded by the browser. For unsupported JPEG's we use
- * a library to decode these images and the stream behaves like all the other
- * DecodeStreams.
+ * For JPEG's we use a library to decode these images and the stream behaves
+ * like all the other DecodeStreams.
 */
 const JpegStream = (function JpegStreamClosure() {
  // eslint-disable-next-line no-shadow
@ -110,150 +107,6 @@ const JpegStream = (function JpegStreamClosure() {
    this.eof = true;
  };

-  Object.defineProperty(JpegStream.prototype, "maybeValidDimensions", {
-    get: function JpegStream_maybeValidDimensions() {
-      const { dict, stream } = this;
-      const dictHeight = dict.get("Height", "H");
-      const startPos = stream.pos;
-
-      let validDimensions = true,
-        foundSOF = false,
-        b;
-      while ((b = stream.getByte()) !== -1) {
-        if (b !== 0xff) {
-          // Not a valid marker.
-          continue;
-        }
-        switch (stream.getByte()) {
-          case 0xc0: // SOF0
-          case 0xc1: // SOF1
-          case 0xc2: // SOF2
-            // These three SOF{n} markers are the only ones that the built-in
-            // PDF.js JPEG decoder currently supports.
-            foundSOF = true;
-
-            stream.pos += 2; // Skip marker length.
-            stream.pos += 1; // Skip precision.
-            const scanLines = stream.getUint16();
-            const samplesPerLine = stream.getUint16();
-
-            // Letting the browser handle the JPEG decoding, on the main-thread,
-            // will cause a *large* increase in peak memory usage since there's
-            // a handful of short-lived copies of the image data. For very big
-            // JPEG images, always let the PDF.js image decoder handle them to
-            // reduce overall memory usage during decoding (see issue 11694).
-            if (scanLines * samplesPerLine > 1e6) {
-              validDimensions = false;
-              break;
-            }
-
-            // The "normal" case, where the image data and dictionary agrees.
-            if (scanLines === dictHeight) {
-              break;
-            }
-            // A DNL (Define Number of Lines) marker is expected,
-            // which browsers (usually) cannot decode natively.
-            if (scanLines === 0) {
-              validDimensions = false;
-              break;
-            }
-            // The dimensions of the image, among other properties, should
-            // always be taken from the image data *itself* rather than the
-            // XObject dictionary. However there's cases of corrupt images that
-            // browsers cannot decode natively, for example:
-            //  - JPEG images with DNL markers, where the SOF `scanLines`
-            //    parameter has an unexpected value (see issue 8614).
-            //  - JPEG images with too large SOF `scanLines` parameter, where
-            //    the EOI marker is encountered prematurely (see issue 10880).
-            // In an attempt to handle these kinds of corrupt images, compare
-            // the dimensions in the image data with the dictionary and *always*
-            // let the PDF.js JPEG decoder (rather than the browser) handle the
-            // image if the difference is larger than one order of magnitude
-            // (since that would generally suggest that something is off).
-            if (scanLines > dictHeight * 10) {
-              validDimensions = false;
-              break;
-            }
-            break;
-
-          case 0xc3: // SOF3
-          /* falls through */
-          case 0xc5: // SOF5
-          case 0xc6: // SOF6
-          case 0xc7: // SOF7
-          /* falls through */
-          case 0xc9: // SOF9
-          case 0xca: // SOF10
-          case 0xcb: // SOF11
-          /* falls through */
-          case 0xcd: // SOF13
-          case 0xce: // SOF14
-          case 0xcf: // SOF15
-            foundSOF = true;
-            break;
-
-          case 0xc4: // DHT
-          case 0xcc: // DAC
-          /* falls through */
-          case 0xda: // SOS
-          case 0xdb: // DQT
-          case 0xdc: // DNL
-          case 0xdd: // DRI
-          case 0xde: // DHP
-          case 0xdf: // EXP
-          /* falls through */
-          case 0xe0: // APP0
-          case 0xe1: // APP1
-          case 0xe2: // APP2
-          case 0xe3: // APP3
-          case 0xe4: // APP4
-          case 0xe5: // APP5
-          case 0xe6: // APP6
-          case 0xe7: // APP7
-          case 0xe8: // APP8
-          case 0xe9: // APP9
-          case 0xea: // APP10
-          case 0xeb: // APP11
-          case 0xec: // APP12
-          case 0xed: // APP13
-          case 0xee: // APP14
-          case 0xef: // APP15
-          /* falls through */
-          case 0xfe: // COM
-            const markerLength = stream.getUint16();
-            if (markerLength > 2) {
-              stream.skip(markerLength - 2); // Jump to the next marker.
-            } else {
-              // The marker length is invalid, resetting the stream position.
-              stream.skip(-2);
-            }
-            break;
-
-          case 0xff: // Fill byte.
-            // Avoid skipping a valid marker, resetting the stream position.
-            stream.skip(-1);
-            break;
-
-          case 0xd9: // EOI
-            foundSOF = true;
-            break;
-        }
-        if (foundSOF) {
-          break;
-        }
-      }
-      // Finally, don't forget to reset the stream position.
-      stream.pos = startPos;
-
-      return shadow(this, "maybeValidDimensions", validDimensions);
-    },
-    configurable: true,
-  });
-
-  JpegStream.prototype.getIR = function (forceDataSchema = false) {
-    return createObjectURL(this.bytes, "image/jpeg", forceDataSchema);
-  };
-
  return JpegStream;
 })();

--- a/src/core/worker.js
+++ b/src/core/worker.js
@ -399,10 +399,8 @@ var WorkerMessageHandler = {
      ensureNotTerminated();

      var evaluatorOptions = {
-        forceDataSchema: data.disableCreateObjectURL,
        maxImageSize: data.maxImageSize,
        disableFontFace: data.disableFontFace,
-        nativeImageDecoderSupport: data.nativeImageDecoderSupport,
        ignoreErrors: data.ignoreErrors,
        isEvalSupported: data.isEvalSupported,
        fontExtraProperties: data.fontExtraProperties,