1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Move the EXIF-block replacement into JpegStream (PR 19356 follow-up)

Currently we modify the EXIF-block in place, which may end up "breaking" the JPEG-data of the original PDF document since e.g. saving it from the viewer no longer contains the real EXIF-block.
Hence the EXIF-block replacement is moved into the `JpegStream` class, such that we can copy the data before doing the replacement.
This commit is contained in:
Jonas Jenwald 2025-02-20 12:36:25 +01:00
parent 63b397f49d
commit d5ce35f744
3 changed files with 54 additions and 12 deletions

View file

@ -163,10 +163,23 @@ class JpegStream extends DecodeStream {
if (!bytes) {
return null;
}
const data = this.#skipUselessBytes(bytes);
if (!JpegImage.canUseImageDecoder(data, jpegOptions.colorTransform)) {
let data = this.#skipUselessBytes(bytes);
const useImageDecoder = JpegImage.canUseImageDecoder(
data,
jpegOptions.colorTransform
);
if (!useImageDecoder) {
return null;
}
if (useImageDecoder.exifStart) {
// Replace the entire EXIF-block with dummy data, to ensure that a
// non-default EXIF orientation won't cause the image to be rotated
// when using `ImageDecoder` (fixes bug1942064.pdf).
//
// Copy the data first, to avoid modifying the original PDF document.
data = data.slice();
data.fill(0x00, useImageDecoder.exifStart, useImageDecoder.exifEnd);
}
decoder = new ImageDecoder({
data,
type: "image/jpeg",

View file

@ -782,8 +782,11 @@ function readDataBlock(data, offset) {
}
const array = data.subarray(offset, endOffset);
offset += array.length;
return { appData: array, newOffset: offset };
return {
appData: array,
oldOffset: offset,
newOffset: offset + array.length,
};
}
function skipData(data, offset) {
@ -805,6 +808,7 @@ class JpegImage {
}
static canUseImageDecoder(data, colorTransform = -1) {
let exifOffsets = null;
let offset = 0;
let numComponents = null;
let fileMarker = readUint16(data, offset);
@ -820,7 +824,7 @@ class JpegImage {
case 0xffe1: // APP1 - Exif
// TODO: Remove this once https://github.com/w3c/webcodecs/issues/870
// is fixed.
const { appData, newOffset } = readDataBlock(data, offset);
const { appData, oldOffset, newOffset } = readDataBlock(data, offset);
offset = newOffset;
// 'Exif\x00\x00'
@ -832,10 +836,12 @@ class JpegImage {
appData[4] === 0 &&
appData[5] === 0
) {
// Replace the entire EXIF-block with dummy data, to ensure that a
// non-default EXIF orientation won't cause the image to be rotated
// when using `ImageDecoder` (fixes bug1942064.pdf).
appData.fill(0x00, 6);
if (exifOffsets) {
throw new JpegError("Duplicate EXIF-blocks found.");
}
// Don't do the EXIF-block replacement here, see `JpegStream`,
// since that can modify the original PDF document.
exifOffsets = { exifStart: oldOffset + 6, exifEnd: newOffset };
}
fileMarker = readUint16(data, offset);
offset += 2;
@ -861,12 +867,12 @@ class JpegImage {
offset += 2;
}
if (numComponents === 4) {
return false;
return null;
}
if (numComponents === 3 && colorTransform === 0) {
return false;
return null;
}
return true;
return exifOffsets || {};
}
parse(data, { dnlScanLines = null } = {}) {

View file

@ -2235,6 +2235,29 @@ describe("api", function () {
expect(data.length).toEqual(basicApiFileLength);
});
it("gets data from PDF document with JPEG image containing EXIF-data (bug 1942064)", async function () {
const typedArrayPdf = await DefaultFileReaderFactory.fetch({
path: TEST_PDFS_PATH + "bug1942064.pdf",
});
// Sanity check to make sure that we fetched the entire PDF file.
expect(typedArrayPdf instanceof Uint8Array).toEqual(true);
expect(typedArrayPdf.length).toEqual(10719);
const loadingTask = getDocument(typedArrayPdf.slice());
const pdfDoc = await loadingTask.promise;
const page = await pdfDoc.getPage(1);
// Trigger parsing of the JPEG image.
await page.getOperatorList();
const data = await pdfDoc.getData();
expect(data instanceof Uint8Array).toEqual(true);
// Ensure that the EXIF-block wasn't modified.
expect(typedArrayPdf).toEqual(data);
await loadingTask.destroy();
});
it("gets download info", async function () {
const downloadInfo = await pdfDocument.getDownloadInfo();
expect(downloadInfo).toEqual({ length: basicApiFileLength });