1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 06:38:07 +02:00
pdf.js/test/unit/text_layer_spec.js
Jonas Jenwald 5b3d3c7dd9 Ensure that textLayers can be rendered in parallel, without interfering with each other
Note that the textContent is returned in "chunks" from the API, through the use of `ReadableStream`s, and on the main-thread we're (normally) using just one temporary canvas in order to measure the size of the textLayer `span`s; see the [`#layout`](5b4c2fe1a8/src/display/text_layer.js (L396-L428)) method.

*Order of events, for parallel textLayer rendering:*
 1. Call [`render`](5b4c2fe1a8/src/display/text_layer.js (L155-L177)) of the textLayer for page A.
 2. Immediately call `render` of the textLayer for page B.
 3. The first text-chunk for pageA arrives, and it's parsed/layout which means updating the cached [fontSize/fontFamily](5b4c2fe1a8/src/display/text_layer.js (L409-L413)) for the textLayer of page A.
 4. The first text-chunk for pageB arrives, which means updating the cached fontSize/fontFamily *for the textLayer of page B* since this data is unique to each `TextLayer`-instance.
 5. The second text-chunk for pageA arrives, and we don't update the canvas-font since the cached fontSize/fontFamily still apply from step 3 above.

Where this potentially breaks down is between the last steps, since we're using just one temporary canvas for all measurements but have *individual* fontSize/fontFamily caches for each textLayer.
Hence it's possible that the canvas-font has actually changed, despite the cached values suggesting otherwise, and to address this we instead cache the fontSize/fontFamily globally through a new (static) helper method.

*Note:* Includes a basic unit-test, using dummy text-content, which fails on `master` and passes with this patch.

Finally, pun intended, ensure that temporary textLayer-data is cleared *before* the `render`-promise resolves to avoid any intermittent problems in the unit-tests.
2024-09-11 15:28:51 +02:00

253 lines
7.3 KiB
JavaScript

/* Copyright 2022 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { buildGetDocumentParams } from "./test_utils.js";
import { getDocument } from "../../src/display/api.js";
import { isNodeJS } from "../../src/shared/util.js";
import { TextLayer } from "../../src/display/text_layer.js";
describe("textLayer", function () {
it("creates textLayer from ReadableStream", async function () {
if (isNodeJS) {
pending("document.createElement is not supported in Node.js.");
}
const loadingTask = getDocument(buildGetDocumentParams("basicapi.pdf"));
const pdfDocument = await loadingTask.promise;
const page = await pdfDocument.getPage(1);
const textLayer = new TextLayer({
textContentSource: page.streamTextContent(),
container: document.createElement("div"),
viewport: page.getViewport({ scale: 1 }),
});
await textLayer.render();
expect(textLayer.textContentItemsStr).toEqual([
"Table Of Content",
"",
"Chapter 1",
" ",
"..........................................................",
" ",
"2",
"",
"Paragraph 1.1",
" ",
"......................................................",
" ",
"3",
"",
"page 1 / 3",
]);
await loadingTask.destroy();
});
it("creates textLayer from TextContent", async function () {
if (isNodeJS) {
pending("document.createElement is not supported in Node.js.");
}
const loadingTask = getDocument(buildGetDocumentParams("basicapi.pdf"));
const pdfDocument = await loadingTask.promise;
const page = await pdfDocument.getPage(1);
const textLayer = new TextLayer({
textContentSource: await page.getTextContent(),
container: document.createElement("div"),
viewport: page.getViewport({ scale: 1 }),
});
await textLayer.render();
expect(textLayer.textContentItemsStr).toEqual([
"Table Of Content",
"",
"Chapter 1",
" ",
"..........................................................",
" ",
"2",
"",
"Paragraph 1.1",
" ",
"......................................................",
" ",
"3",
"",
"page 1 / 3",
]);
await loadingTask.destroy();
});
it("creates textLayers in parallel, from ReadableStream", async function () {
if (isNodeJS) {
pending("document.createElement is not supported in Node.js.");
}
if (typeof ReadableStream.from !== "function") {
pending("ReadableStream.from is not supported.");
}
const getTransform = container => {
const transform = [];
for (const span of container.childNodes) {
const t = span.style.transform;
expect(t).toMatch(/^scaleX\([\d.]+\)$/);
transform.push(t);
}
return transform;
};
const loadingTask = getDocument(buildGetDocumentParams("basicapi.pdf"));
const pdfDocument = await loadingTask.promise;
const [page1, page2] = await Promise.all([
pdfDocument.getPage(1),
pdfDocument.getPage(2),
]);
// Create text-content streams with dummy content.
const items1 = [
{
str: "Chapter A",
dir: "ltr",
width: 100,
height: 20,
transform: [20, 0, 0, 20, 45, 744],
fontName: "g_d0_f1",
hasEOL: false,
},
{
str: "page 1",
dir: "ltr",
width: 50,
height: 20,
transform: [20, 0, 0, 20, 45, 744],
fontName: "g_d0_f1",
hasEOL: false,
},
];
const items2 = [
{
str: "Chapter B",
dir: "ltr",
width: 120,
height: 10,
transform: [10, 0, 0, 10, 492, 16],
fontName: "g_d0_f2",
hasEOL: false,
},
{
str: "page 2",
dir: "ltr",
width: 60,
height: 10,
transform: [10, 0, 0, 10, 492, 16],
fontName: "g_d0_f2",
hasEOL: false,
},
];
const styles = {
g_d0_f1: {
ascent: 0.75,
descent: -0.25,
fontFamily: "serif",
vertical: false,
},
g_d0_f2: {
ascent: 0.5,
descent: -0.5,
fontFamily: "sans-serif",
vertical: false,
},
};
const lang = "en";
// Render the textLayers serially, to have something to compare against.
const serialContainer1 = document.createElement("div"),
serialContainer2 = document.createElement("div");
const serialTextLayer1 = new TextLayer({
textContentSource: { items: items1, styles, lang },
container: serialContainer1,
viewport: page1.getViewport({ scale: 1 }),
});
await serialTextLayer1.render();
const serialTextLayer2 = new TextLayer({
textContentSource: { items: items2, styles, lang },
container: serialContainer2,
viewport: page2.getViewport({ scale: 1 }),
});
await serialTextLayer2.render();
const serialTransform1 = getTransform(serialContainer1),
serialTransform2 = getTransform(serialContainer2);
expect(serialTransform1.length).toEqual(2);
expect(serialTransform2.length).toEqual(2);
// Reset any global textLayer-state before rendering in parallel.
TextLayer.cleanup();
const container1 = document.createElement("div"),
container2 = document.createElement("div");
const waitCapability1 = Promise.withResolvers();
const streamGenerator1 = (async function* () {
for (const item of items1) {
yield { items: [item], styles, lang };
await waitCapability1.promise;
}
})();
const streamGenerator2 = (async function* () {
for (const item of items2) {
yield { items: [item], styles, lang };
}
})();
const textLayer1 = new TextLayer({
textContentSource: ReadableStream.from(streamGenerator1),
container: container1,
viewport: page1.getViewport({ scale: 1 }),
});
const textLayer1Promise = textLayer1.render();
const textLayer2 = new TextLayer({
textContentSource: ReadableStream.from(streamGenerator2),
container: container2,
viewport: page2.getViewport({ scale: 1 }),
});
await textLayer2.render();
// Ensure that the first textLayer has its rendering "paused" while
// the second textLayer renders.
waitCapability1.resolve();
await textLayer1Promise;
// Sanity check to make sure that all text was parsed.
expect(textLayer1.textContentItemsStr).toEqual(["Chapter A", "page 1"]);
expect(textLayer2.textContentItemsStr).toEqual(["Chapter B", "page 2"]);
// Ensure that the transforms are identical when parsing in series/parallel.
const transform1 = getTransform(container1),
transform2 = getTransform(container2);
expect(transform1).toEqual(serialTransform1);
expect(transform2).toEqual(serialTransform2);
await loadingTask.destroy();
});
});