diff --git a/src/core/jpeg_stream.js b/src/core/jpeg_stream.js index 70789d543..4af24eeb8 100644 --- a/src/core/jpeg_stream.js +++ b/src/core/jpeg_stream.js @@ -163,10 +163,23 @@ class JpegStream extends DecodeStream { if (!bytes) { return null; } - const data = this.#skipUselessBytes(bytes); - if (!JpegImage.canUseImageDecoder(data, jpegOptions.colorTransform)) { + let data = this.#skipUselessBytes(bytes); + const useImageDecoder = JpegImage.canUseImageDecoder( + data, + jpegOptions.colorTransform + ); + if (!useImageDecoder) { return null; } + if (useImageDecoder.exifStart) { + // Replace the entire EXIF-block with dummy data, to ensure that a + // non-default EXIF orientation won't cause the image to be rotated + // when using `ImageDecoder` (fixes bug1942064.pdf). + // + // Copy the data first, to avoid modifying the original PDF document. + data = data.slice(); + data.fill(0x00, useImageDecoder.exifStart, useImageDecoder.exifEnd); + } decoder = new ImageDecoder({ data, type: "image/jpeg", diff --git a/src/core/jpg.js b/src/core/jpg.js index 93d703ab9..26ff546e0 100644 --- a/src/core/jpg.js +++ b/src/core/jpg.js @@ -782,8 +782,11 @@ function readDataBlock(data, offset) { } const array = data.subarray(offset, endOffset); - offset += array.length; - return { appData: array, newOffset: offset }; + return { + appData: array, + oldOffset: offset, + newOffset: offset + array.length, + }; } function skipData(data, offset) { @@ -805,6 +808,7 @@ class JpegImage { } static canUseImageDecoder(data, colorTransform = -1) { + let exifOffsets = null; let offset = 0; let numComponents = null; let fileMarker = readUint16(data, offset); @@ -820,7 +824,7 @@ class JpegImage { case 0xffe1: // APP1 - Exif // TODO: Remove this once https://github.com/w3c/webcodecs/issues/870 // is fixed. - const { appData, newOffset } = readDataBlock(data, offset); + const { appData, oldOffset, newOffset } = readDataBlock(data, offset); offset = newOffset; // 'Exif\x00\x00' @@ -832,10 +836,12 @@ class JpegImage { appData[4] === 0 && appData[5] === 0 ) { - // Replace the entire EXIF-block with dummy data, to ensure that a - // non-default EXIF orientation won't cause the image to be rotated - // when using `ImageDecoder` (fixes bug1942064.pdf). - appData.fill(0x00, 6); + if (exifOffsets) { + throw new JpegError("Duplicate EXIF-blocks found."); + } + // Don't do the EXIF-block replacement here, see `JpegStream`, + // since that can modify the original PDF document. + exifOffsets = { exifStart: oldOffset + 6, exifEnd: newOffset }; } fileMarker = readUint16(data, offset); offset += 2; @@ -861,12 +867,12 @@ class JpegImage { offset += 2; } if (numComponents === 4) { - return false; + return null; } if (numComponents === 3 && colorTransform === 0) { - return false; + return null; } - return true; + return exifOffsets || {}; } parse(data, { dnlScanLines = null } = {}) { diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 5e2c2550f..5532cf7a5 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -2235,6 +2235,29 @@ describe("api", function () { expect(data.length).toEqual(basicApiFileLength); }); + it("gets data from PDF document with JPEG image containing EXIF-data (bug 1942064)", async function () { + const typedArrayPdf = await DefaultFileReaderFactory.fetch({ + path: TEST_PDFS_PATH + "bug1942064.pdf", + }); + + // Sanity check to make sure that we fetched the entire PDF file. + expect(typedArrayPdf instanceof Uint8Array).toEqual(true); + expect(typedArrayPdf.length).toEqual(10719); + + const loadingTask = getDocument(typedArrayPdf.slice()); + const pdfDoc = await loadingTask.promise; + const page = await pdfDoc.getPage(1); + // Trigger parsing of the JPEG image. + await page.getOperatorList(); + + const data = await pdfDoc.getData(); + expect(data instanceof Uint8Array).toEqual(true); + // Ensure that the EXIF-block wasn't modified. + expect(typedArrayPdf).toEqual(data); + + await loadingTask.destroy(); + }); + it("gets download info", async function () { const downloadInfo = await pdfDocument.getDownloadInfo(); expect(downloadInfo).toEqual({ length: basicApiFileLength });