From 4248f0745cc147e9b1da5f8aecaec0a128a9b9b1 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 14 May 2021 09:59:24 +0200 Subject: [PATCH] Improve the `Page.content` and `Page.getContentStream` methods First of all, by using `Dict.getArray` in the `Page.content` getter we remove the need to manually iterate through and fetch the sub-streams (when they exist) in the `Page.getContentStream` method. Secondly, we can simplify the code in `Page.{getOperatorList, extractTextContent}` by letting `Page.getContentStream` ensure that `content` is available and returning a Promise instead. --- src/core/document.js | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index b5f9b596e..1b9bae0e2 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -54,6 +54,7 @@ import { } from "./core_utils.js"; import { NullStream, Stream } from "./stream.js"; import { AnnotationFactory } from "./annotation.js"; +import { BaseStream } from "./base_stream.js"; import { calculateMD5 } from "./crypto.js"; import { Catalog } from "./catalog.js"; import { Linearization } from "./parser.js"; @@ -136,7 +137,7 @@ class Page { } get content() { - return this.pageDict.get("Contents"); + return this.pageDict.getArray("Contents"); } get resources() { @@ -229,25 +230,20 @@ class Page { return shadow(this, "rotate", rotate); } + /** + * @returns {Promise} + */ getContentStream() { - const content = this.content; - let stream; - - if (Array.isArray(content)) { - // Fetching the individual streams from the array. - const xref = this.xref; - const streams = []; - for (const subStream of content) { - streams.push(xref.fetchIfRef(subStream)); + return this.pdfManager.ensure(this, "content").then(content => { + if (content instanceof BaseStream) { + return content; + } + if (Array.isArray(content)) { + return new StreamsSequenceStream(content); } - stream = new StreamsSequenceStream(streams); - } else if (isStream(content)) { - stream = content; - } else { // Replace non-existent page content with empty content. - stream = new NullStream(); - } - return stream; + return new NullStream(); + }); } get xfaData() { @@ -313,10 +309,7 @@ class Page { renderInteractiveForms, annotationStorage, }) { - const contentStreamPromise = this.pdfManager.ensure( - this, - "getContentStream" - ); + const contentStreamPromise = this.getContentStream(); const resourcesPromise = this.loadResources([ "ColorSpace", "ExtGState", @@ -420,10 +413,7 @@ class Page { sink, combineTextItems, }) { - const contentStreamPromise = this.pdfManager.ensure( - this, - "getContentStream" - ); + const contentStreamPromise = this.getContentStream(); const resourcesPromise = this.loadResources([ "ExtGState", "Font",