Move the EXIF-block replacement into JpegStream (PR 19356 follow-up)

Currently we modify the EXIF-block in place, which may end up "breaking" the JPEG-data of the original PDF document since e.g. saving it from the viewer no longer contains the real EXIF-block. Hence the EXIF-block replacement is moved into the `JpegStream` class, such that we can copy the data before doing the replacement.
2025-04-22 16:18:08 +02:00 · 2025-02-20 12:36:25 +01:00 · 2025-02-20 12:36:25 +01:00 · d5ce35f744
commit d5ce35f744
parent 63b397f49d
3 changed files with 54 additions and 12 deletions
--- a/src/core/jpeg_stream.js
+++ b/src/core/jpeg_stream.js
@ -163,10 +163,23 @@ class JpegStream extends DecodeStream {
      if (!bytes) {
        return null;
      }
-      const data = this.#skipUselessBytes(bytes);
-      if (!JpegImage.canUseImageDecoder(data, jpegOptions.colorTransform)) {
+      let data = this.#skipUselessBytes(bytes);
+      const useImageDecoder = JpegImage.canUseImageDecoder(
+        data,
+        jpegOptions.colorTransform
+      );
+      if (!useImageDecoder) {
        return null;
      }
+      if (useImageDecoder.exifStart) {
+        // Replace the entire EXIF-block with dummy data, to ensure that a
+        // non-default EXIF orientation won't cause the image to be rotated
+        // when using `ImageDecoder` (fixes bug1942064.pdf).
+        //
+        // Copy the data first, to avoid modifying the original PDF document.
+        data = data.slice();
+        data.fill(0x00, useImageDecoder.exifStart, useImageDecoder.exifEnd);
+      }
      decoder = new ImageDecoder({
        data,
        type: "image/jpeg",
--- a/src/core/jpg.js
+++ b/src/core/jpg.js
@ -782,8 +782,11 @@ function readDataBlock(data, offset) {
  }

  const array = data.subarray(offset, endOffset);
-  offset += array.length;
-  return { appData: array, newOffset: offset };
+  return {
+    appData: array,
+    oldOffset: offset,
+    newOffset: offset + array.length,
+  };
 }

 function skipData(data, offset) {
@ -805,6 +808,7 @@ class JpegImage {
  }

  static canUseImageDecoder(data, colorTransform = -1) {
+    let exifOffsets = null;
    let offset = 0;
    let numComponents = null;
    let fileMarker = readUint16(data, offset);
@ -820,7 +824,7 @@ class JpegImage {
        case 0xffe1: // APP1 - Exif
          // TODO: Remove this once https://github.com/w3c/webcodecs/issues/870
          //       is fixed.
-          const { appData, newOffset } = readDataBlock(data, offset);
+          const { appData, oldOffset, newOffset } = readDataBlock(data, offset);
          offset = newOffset;

          // 'Exif\x00\x00'
@ -832,10 +836,12 @@ class JpegImage {
            appData[4] === 0 &&
            appData[5] === 0
          ) {
-            // Replace the entire EXIF-block with dummy data, to ensure that a
-            // non-default EXIF orientation won't cause the image to be rotated
-            // when using `ImageDecoder` (fixes bug1942064.pdf).
-            appData.fill(0x00, 6);
+            if (exifOffsets) {
+              throw new JpegError("Duplicate EXIF-blocks found.");
+            }
+            // Don't do the EXIF-block replacement here, see `JpegStream`,
+            // since that can modify the original PDF document.
+            exifOffsets = { exifStart: oldOffset + 6, exifEnd: newOffset };
          }
          fileMarker = readUint16(data, offset);
          offset += 2;
@ -861,12 +867,12 @@ class JpegImage {
      offset += 2;
    }
    if (numComponents === 4) {
-      return false;
+      return null;
    }
    if (numComponents === 3 && colorTransform === 0) {
-      return false;
+      return null;
    }
-    return true;
+    return exifOffsets || {};
  }

  parse(data, { dnlScanLines = null } = {}) {