1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Merge pull request #18167 from calixteman/decompress_image

Decompress when it's possible images in using DecompressionStream
This commit is contained in:
calixteman 2024-06-02 16:08:19 +02:00 committed by GitHub
commit 5c51d56223
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 149 additions and 30 deletions

View file

@ -44,6 +44,27 @@ class BaseStream {
unreachable("Abstract method `getBytes` called");
}
/**
* NOTE: This method can only be used to get image-data that is guaranteed
* to be fully loaded, since otherwise intermittent errors may occur;
* note the `ObjectLoader` class.
*/
async getImageData(length, ignoreColorSpace) {
return this.getBytes(length, ignoreColorSpace);
}
async asyncGetBytes() {
unreachable("Abstract method `asyncGetBytes` called");
}
get isAsync() {
return false;
}
get canAsyncDecodeImageFromBuffer() {
return false;
}
peekByte() {
const peekedByte = this.getByte();
if (peekedByte !== -1) {

View file

@ -99,6 +99,14 @@ class DecodeStream extends BaseStream {
return this.buffer.subarray(pos, end);
}
async getImageData(length, ignoreColorSpace = false) {
if (!this.canAsyncDecodeImageFromBuffer) {
return this.getBytes(length, ignoreColorSpace);
}
const data = await this.stream.asyncGetBytes();
return this.decodeImage(data, ignoreColorSpace);
}
reset() {
this.pos = 0;
}

View file

@ -21,6 +21,7 @@
import { FormatError, info } from "../shared/util.js";
import { DecodeStream } from "./decode_stream.js";
import { Stream } from "./stream.js";
const codeLenCodeMap = new Int32Array([
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
@ -148,6 +149,57 @@ class FlateStream extends DecodeStream {
this.codeBuf = 0;
}
async getImageData(length, _ignoreColorSpace) {
const data = await this.asyncGetBytes();
return data?.subarray(0, length) || this.getBytes(length);
}
async asyncGetBytes() {
this.str.reset();
const bytes = this.str.getBytes();
try {
const { readable, writable } = new DecompressionStream("deflate");
const writer = writable.getWriter();
writer.write(bytes);
writer.close();
const chunks = [];
let totalLength = 0;
for await (const chunk of readable) {
chunks.push(chunk);
totalLength += chunk.byteLength;
}
const data = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
data.set(chunk, offset);
offset += chunk.byteLength;
}
return data;
} catch {
// DecompressionStream failed (for example because there are some extra
// bytes after the end of the compressed data), so we fallback to our
// decoder.
// We already get the bytes from the underlying stream, so we just reuse
// them to avoid get them again.
this.str = new Stream(
bytes,
2 /* = header size (see ctor) */,
bytes.length,
this.str.dict
);
this.reset();
return null;
}
}
get isAsync() {
return true;
}
getBits(bits) {
const str = this.str;
let codeSize = this.codeSize;

View file

@ -565,7 +565,7 @@ class PDFImage {
return output;
}
fillOpacity(rgbaBuf, width, height, actualHeight, image) {
async fillOpacity(rgbaBuf, width, height, actualHeight, image) {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
rgbaBuf instanceof Uint8ClampedArray,
@ -580,7 +580,7 @@ class PDFImage {
sw = smask.width;
sh = smask.height;
alphaBuf = new Uint8ClampedArray(sw * sh);
smask.fillGrayBuffer(alphaBuf);
await smask.fillGrayBuffer(alphaBuf);
if (sw !== width || sh !== height) {
alphaBuf = resizeImageMask(alphaBuf, smask.bpc, sw, sh, width, height);
}
@ -590,7 +590,7 @@ class PDFImage {
sh = mask.height;
alphaBuf = new Uint8ClampedArray(sw * sh);
mask.numComps = 1;
mask.fillGrayBuffer(alphaBuf);
await mask.fillGrayBuffer(alphaBuf);
// Need to invert values in rgbaBuf
for (i = 0, ii = sw * sh; i < ii; ++i) {
@ -716,7 +716,7 @@ class PDFImage {
drawWidth === originalWidth &&
drawHeight === originalHeight
) {
const data = this.getImageBytes(originalHeight * rowBytes, {});
const data = await this.getImageBytes(originalHeight * rowBytes, {});
if (isOffscreenCanvasSupported) {
if (mustBeResized) {
return ImageResizer.createImage(
@ -774,7 +774,7 @@ class PDFImage {
}
if (isHandled) {
const rgba = this.getImageBytes(imageLength, {
const rgba = await this.getImageBytes(imageLength, {
drawWidth,
drawHeight,
forceRGBA: true,
@ -794,7 +794,7 @@ class PDFImage {
case "DeviceRGB":
case "DeviceCMYK":
imgData.kind = ImageKind.RGB_24BPP;
imgData.data = this.getImageBytes(imageLength, {
imgData.data = await this.getImageBytes(imageLength, {
drawWidth,
drawHeight,
forceRGB: true,
@ -809,7 +809,7 @@ class PDFImage {
}
}
const imgArray = this.getImageBytes(originalHeight * rowBytes, {
const imgArray = await this.getImageBytes(originalHeight * rowBytes, {
internal: true,
});
// imgArray can be incomplete (e.g. after CCITT fax encoding).
@ -852,7 +852,7 @@ class PDFImage {
maybeUndoPreblend = true;
// Color key masking (opacity) must be performed before decoding.
this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps);
await this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps);
}
if (this.needsDecode) {
@ -893,7 +893,7 @@ class PDFImage {
return imgData;
}
fillGrayBuffer(buffer) {
async fillGrayBuffer(buffer) {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
buffer instanceof Uint8ClampedArray,
@ -913,7 +913,9 @@ class PDFImage {
// rows start at byte boundary
const rowBytes = (width * numComps * bpc + 7) >> 3;
const imgArray = this.getImageBytes(height * rowBytes, { internal: true });
const imgArray = await this.getImageBytes(height * rowBytes, {
internal: true,
});
const comps = this.getComponents(imgArray);
let i, length;
@ -975,7 +977,7 @@ class PDFImage {
};
}
getImageBytes(
async getImageBytes(
length,
{
drawWidth,
@ -990,7 +992,10 @@ class PDFImage {
this.image.drawHeight = drawHeight || this.height;
this.image.forceRGBA = !!forceRGBA;
this.image.forceRGB = !!forceRGB;
const imageBytes = this.image.getBytes(length, this.ignoreColorSpace);
const imageBytes = await this.image.getImageData(
length,
this.ignoreColorSpace
);
// If imageBytes came from a DecodeStream, we're safe to transfer it
// (and thus detach its underlying buffer) because it will constitute

View file

@ -44,9 +44,14 @@ class Jbig2Stream extends DecodeStream {
}
readBlock() {
this.decodeImage();
}
decodeImage(bytes) {
if (this.eof) {
return;
return this.buffer;
}
bytes ||= this.bytes;
const jbig2Image = new Jbig2Image();
const chunks = [];
@ -57,7 +62,7 @@ class Jbig2Stream extends DecodeStream {
chunks.push({ data: globals, start: 0, end: globals.length });
}
}
chunks.push({ data: this.bytes, start: 0, end: this.bytes.length });
chunks.push({ data: bytes, start: 0, end: bytes.length });
const data = jbig2Image.parseChunks(chunks);
const dataLength = data.length;
@ -68,6 +73,12 @@ class Jbig2Stream extends DecodeStream {
this.buffer = data;
this.bufferLength = dataLength;
this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}

View file

@ -24,16 +24,6 @@ import { shadow } from "../shared/util.js";
*/
class JpegStream extends DecodeStream {
constructor(stream, maybeLength, params) {
// Some images may contain 'junk' before the SOI (start-of-image) marker.
// Note: this seems to mainly affect inline images.
let ch;
while ((ch = stream.getByte()) !== -1) {
// Find the first byte of the SOI marker (0xFFD8).
if (ch === 0xff) {
stream.skip(-1); // Reset the stream position to the SOI.
break;
}
}
super(maybeLength);
this.stream = stream;
@ -53,8 +43,24 @@ class JpegStream extends DecodeStream {
}
readBlock() {
this.decodeImage();
}
decodeImage(bytes) {
if (this.eof) {
return;
return this.buffer;
}
bytes ||= this.bytes;
// Some images may contain 'junk' before the SOI (start-of-image) marker.
// Note: this seems to mainly affect inline images.
for (let i = 0, ii = bytes.length - 1; i < ii; i++) {
if (bytes[i] === 0xff && bytes[i + 1] === 0xd8) {
if (i > 0) {
bytes = bytes.subarray(i);
}
break;
}
}
const jpegOptions = {
decodeTransform: undefined,
@ -89,7 +95,7 @@ class JpegStream extends DecodeStream {
}
const jpegImage = new JpegImage(jpegOptions);
jpegImage.parse(this.bytes);
jpegImage.parse(bytes);
const data = jpegImage.getData({
width: this.drawWidth,
height: this.drawHeight,
@ -100,6 +106,12 @@ class JpegStream extends DecodeStream {
this.buffer = data;
this.bufferLength = data.length;
this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}

View file

@ -42,13 +42,23 @@ class JpxStream extends DecodeStream {
}
readBlock(ignoreColorSpace) {
if (this.eof) {
return;
}
this.decodeImage(null, ignoreColorSpace);
}
this.buffer = JpxImage.decode(this.bytes, ignoreColorSpace);
decodeImage(bytes, ignoreColorSpace) {
if (this.eof) {
return this.buffer;
}
bytes ||= this.bytes;
this.buffer = JpxImage.decode(bytes, ignoreColorSpace);
this.bufferLength = this.buffer.length;
this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}