From 62a9c26cdae9118dea89dc18b5cad844bbc7faf5 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 18 Mar 2020 11:29:16 +0100 Subject: [PATCH] Always prefer the PDF.js JPEG decoder for very large images, in order to reduce peak memory usage (issue 11694) When JPEG images are decoded by the browser, on the main-thread, there's a handful of short-lived copies of the image data; see https://github.com/mozilla/pdf.js/blob/c3f4690bde8137d80c74203b1ad91476fc2ca160/src/display/api.js#L2364-L2408 That code thus becomes quite problematic for very big JPEG images, since it increases peak memory usage a lot during decoding. In the referenced issue there's a couple of JPEG images whose dimensions are `10006 x 7088` (i.e. ~68 mega-pixels), which causes the *peak* memory usage to increase by close to `1 GB` (i.e. one giga-byte) in my testing. By letting the PDF.js JPEG decoder, rather than the browser, handle very large images the *peak* memory usage is considerably reduced and the allocated memory also seem to be reclaimed faster. *Please note:* This will lead to movement in some existing `eq` tests. --- src/core/jpeg_stream.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/core/jpeg_stream.js b/src/core/jpeg_stream.js index 068f75e6e..c4cb56920 100644 --- a/src/core/jpeg_stream.js +++ b/src/core/jpeg_stream.js @@ -134,6 +134,17 @@ const JpegStream = (function JpegStreamClosure() { stream.pos += 2; // Skip marker length. stream.pos += 1; // Skip precision. const scanLines = stream.getUint16(); + const samplesPerLine = stream.getUint16(); + + // Letting the browser handle the JPEG decoding, on the main-thread, + // will cause a *large* increase in peak memory usage since there's + // a handful of short-lived copies of the image data. For very big + // JPEG images, always let the PDF.js image decoder handle them to + // reduce overall memory usage during decoding (see issue 11694). + if (scanLines * samplesPerLine > 1e6) { + validDimensions = false; + break; + } // The "normal" case, where the image data and dictionary agrees. if (scanLines === dictHeight) {