1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Decompress when it's possible images in using DecompressionStream

Getting images is already asynchronous, so we can use this opportunity
to use DecompressStream (which is async too) to decompress images.
This commit is contained in:
Calixte Denizet 2024-05-24 23:26:02 +02:00
parent 53dfb5a6ba
commit 9654ad570a
7 changed files with 149 additions and 30 deletions

View file

@ -44,6 +44,27 @@ class BaseStream {
unreachable("Abstract method `getBytes` called");
}
/**
* NOTE: This method can only be used to get image-data that is guaranteed
* to be fully loaded, since otherwise intermittent errors may occur;
* note the `ObjectLoader` class.
*/
async getImageData(length, ignoreColorSpace) {
return this.getBytes(length, ignoreColorSpace);
}
async asyncGetBytes() {
unreachable("Abstract method `asyncGetBytes` called");
}
get isAsync() {
return false;
}
get canAsyncDecodeImageFromBuffer() {
return false;
}
peekByte() {
const peekedByte = this.getByte();
if (peekedByte !== -1) {

View file

@ -99,6 +99,14 @@ class DecodeStream extends BaseStream {
return this.buffer.subarray(pos, end);
}
async getImageData(length, ignoreColorSpace = false) {
if (!this.canAsyncDecodeImageFromBuffer) {
return this.getBytes(length, ignoreColorSpace);
}
const data = await this.stream.asyncGetBytes();
return this.decodeImage(data, ignoreColorSpace);
}
reset() {
this.pos = 0;
}

View file

@ -21,6 +21,7 @@
import { FormatError, info } from "../shared/util.js";
import { DecodeStream } from "./decode_stream.js";
import { Stream } from "./stream.js";
const codeLenCodeMap = new Int32Array([
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
@ -148,6 +149,57 @@ class FlateStream extends DecodeStream {
this.codeBuf = 0;
}
async getImageData(length, _ignoreColorSpace) {
const data = await this.asyncGetBytes();
return data?.subarray(0, length) || this.getBytes(length);
}
async asyncGetBytes() {
this.str.reset();
const bytes = this.str.getBytes();
try {
const { readable, writable } = new DecompressionStream("deflate");
const writer = writable.getWriter();
writer.write(bytes);
writer.close();
const chunks = [];
let totalLength = 0;
for await (const chunk of readable) {
chunks.push(chunk);
totalLength += chunk.byteLength;
}
const data = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
data.set(chunk, offset);
offset += chunk.byteLength;
}
return data;
} catch {
// DecompressionStream failed (for example because there are some extra
// bytes after the end of the compressed data), so we fallback to our
// decoder.
// We already get the bytes from the underlying stream, so we just reuse
// them to avoid get them again.
this.str = new Stream(
bytes,
2 /* = header size (see ctor) */,
bytes.length,
this.str.dict
);
this.reset();
return null;
}
}
get isAsync() {
return true;
}
getBits(bits) {
const str = this.str;
let codeSize = this.codeSize;

View file

@ -565,7 +565,7 @@ class PDFImage {
return output;
}
fillOpacity(rgbaBuf, width, height, actualHeight, image) {
async fillOpacity(rgbaBuf, width, height, actualHeight, image) {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
rgbaBuf instanceof Uint8ClampedArray,
@ -580,7 +580,7 @@ class PDFImage {
sw = smask.width;
sh = smask.height;
alphaBuf = new Uint8ClampedArray(sw * sh);
smask.fillGrayBuffer(alphaBuf);
await smask.fillGrayBuffer(alphaBuf);
if (sw !== width || sh !== height) {
alphaBuf = resizeImageMask(alphaBuf, smask.bpc, sw, sh, width, height);
}
@ -590,7 +590,7 @@ class PDFImage {
sh = mask.height;
alphaBuf = new Uint8ClampedArray(sw * sh);
mask.numComps = 1;
mask.fillGrayBuffer(alphaBuf);
await mask.fillGrayBuffer(alphaBuf);
// Need to invert values in rgbaBuf
for (i = 0, ii = sw * sh; i < ii; ++i) {
@ -716,7 +716,7 @@ class PDFImage {
drawWidth === originalWidth &&
drawHeight === originalHeight
) {
const data = this.getImageBytes(originalHeight * rowBytes, {});
const data = await this.getImageBytes(originalHeight * rowBytes, {});
if (isOffscreenCanvasSupported) {
if (mustBeResized) {
return ImageResizer.createImage(
@ -774,7 +774,7 @@ class PDFImage {
}
if (isHandled) {
const rgba = this.getImageBytes(imageLength, {
const rgba = await this.getImageBytes(imageLength, {
drawWidth,
drawHeight,
forceRGBA: true,
@ -794,7 +794,7 @@ class PDFImage {
case "DeviceRGB":
case "DeviceCMYK":
imgData.kind = ImageKind.RGB_24BPP;
imgData.data = this.getImageBytes(imageLength, {
imgData.data = await this.getImageBytes(imageLength, {
drawWidth,
drawHeight,
forceRGB: true,
@ -809,7 +809,7 @@ class PDFImage {
}
}
const imgArray = this.getImageBytes(originalHeight * rowBytes, {
const imgArray = await this.getImageBytes(originalHeight * rowBytes, {
internal: true,
});
// imgArray can be incomplete (e.g. after CCITT fax encoding).
@ -852,7 +852,7 @@ class PDFImage {
maybeUndoPreblend = true;
// Color key masking (opacity) must be performed before decoding.
this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps);
await this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps);
}
if (this.needsDecode) {
@ -893,7 +893,7 @@ class PDFImage {
return imgData;
}
fillGrayBuffer(buffer) {
async fillGrayBuffer(buffer) {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
buffer instanceof Uint8ClampedArray,
@ -913,7 +913,9 @@ class PDFImage {
// rows start at byte boundary
const rowBytes = (width * numComps * bpc + 7) >> 3;
const imgArray = this.getImageBytes(height * rowBytes, { internal: true });
const imgArray = await this.getImageBytes(height * rowBytes, {
internal: true,
});
const comps = this.getComponents(imgArray);
let i, length;
@ -975,7 +977,7 @@ class PDFImage {
};
}
getImageBytes(
async getImageBytes(
length,
{
drawWidth,
@ -990,7 +992,10 @@ class PDFImage {
this.image.drawHeight = drawHeight || this.height;
this.image.forceRGBA = !!forceRGBA;
this.image.forceRGB = !!forceRGB;
const imageBytes = this.image.getBytes(length, this.ignoreColorSpace);
const imageBytes = await this.image.getImageData(
length,
this.ignoreColorSpace
);
// If imageBytes came from a DecodeStream, we're safe to transfer it
// (and thus detach its underlying buffer) because it will constitute

View file

@ -44,9 +44,14 @@ class Jbig2Stream extends DecodeStream {
}
readBlock() {
this.decodeImage();
}
decodeImage(bytes) {
if (this.eof) {
return;
return this.buffer;
}
bytes ||= this.bytes;
const jbig2Image = new Jbig2Image();
const chunks = [];
@ -57,7 +62,7 @@ class Jbig2Stream extends DecodeStream {
chunks.push({ data: globals, start: 0, end: globals.length });
}
}
chunks.push({ data: this.bytes, start: 0, end: this.bytes.length });
chunks.push({ data: bytes, start: 0, end: bytes.length });
const data = jbig2Image.parseChunks(chunks);
const dataLength = data.length;
@ -68,6 +73,12 @@ class Jbig2Stream extends DecodeStream {
this.buffer = data;
this.bufferLength = dataLength;
this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}

View file

@ -24,16 +24,6 @@ import { shadow } from "../shared/util.js";
*/
class JpegStream extends DecodeStream {
constructor(stream, maybeLength, params) {
// Some images may contain 'junk' before the SOI (start-of-image) marker.
// Note: this seems to mainly affect inline images.
let ch;
while ((ch = stream.getByte()) !== -1) {
// Find the first byte of the SOI marker (0xFFD8).
if (ch === 0xff) {
stream.skip(-1); // Reset the stream position to the SOI.
break;
}
}
super(maybeLength);
this.stream = stream;
@ -53,8 +43,24 @@ class JpegStream extends DecodeStream {
}
readBlock() {
this.decodeImage();
}
decodeImage(bytes) {
if (this.eof) {
return;
return this.buffer;
}
bytes ||= this.bytes;
// Some images may contain 'junk' before the SOI (start-of-image) marker.
// Note: this seems to mainly affect inline images.
for (let i = 0, ii = bytes.length - 1; i < ii; i++) {
if (bytes[i] === 0xff && bytes[i + 1] === 0xd8) {
if (i > 0) {
bytes = bytes.subarray(i);
}
break;
}
}
const jpegOptions = {
decodeTransform: undefined,
@ -89,7 +95,7 @@ class JpegStream extends DecodeStream {
}
const jpegImage = new JpegImage(jpegOptions);
jpegImage.parse(this.bytes);
jpegImage.parse(bytes);
const data = jpegImage.getData({
width: this.drawWidth,
height: this.drawHeight,
@ -100,6 +106,12 @@ class JpegStream extends DecodeStream {
this.buffer = data;
this.bufferLength = data.length;
this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}

View file

@ -42,13 +42,23 @@ class JpxStream extends DecodeStream {
}
readBlock(ignoreColorSpace) {
if (this.eof) {
return;
}
this.decodeImage(null, ignoreColorSpace);
}
this.buffer = JpxImage.decode(this.bytes, ignoreColorSpace);
decodeImage(bytes, ignoreColorSpace) {
if (this.eof) {
return this.buffer;
}
bytes ||= this.bytes;
this.buffer = JpxImage.decode(bytes, ignoreColorSpace);
this.bufferLength = this.buffer.length;
this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}