1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

[api-minor] Remove the disableCombineTextItems option

*Please note:* This parameter has never been used within the PDF.js library/viewer itself, and it was only ever added for backwards compatibility reasons.

This parameter was added in PR 7475, over six years ago, to try and optionally maintain the previous *default* text-extraction behaviour.
However as part of the general text-extraction improvements in PR 13257, almost two years ago, the `disableCombineTextItems` functionality was accidentally "broken" in various ways. Note how the only (very basic) unit-test was updated in a way that doesn't really make sense, since generally speaking you'd expect that using the option should result in *more* (or at least the same number of) text-items. Furthermore there's also the recent issue 16209, where the option causes almost all textContent to be concatenated together.

Hence this patch proposes that we simply remove the `disableCombineTextItems` option since it's essentially unused/untested functionality, as evident from the fact that it took almost two years for someone to notice that it's broken.
This commit is contained in:
Jonas Jenwald 2023-03-30 13:36:42 +02:00
parent 09da8026b6
commit 5063a6f2a9
6 changed files with 11 additions and 41 deletions

View file

@ -21,6 +21,7 @@ import {
ImageKind,
InvalidPDFException,
MissingPDFException,
objectSize,
OPS,
PasswordException,
PasswordResponses,
@ -2321,26 +2322,16 @@ describe("api", function () {
});
it("gets text content", async function () {
const defaultPromise = page.getTextContent();
const parametersPromise = page.getTextContent({
disableCombineTextItems: true,
});
const { items, styles } = await page.getTextContent();
const data = await Promise.all([defaultPromise, parametersPromise]);
expect(items.length).toEqual(15);
expect(objectSize(styles)).toEqual(5);
expect(!!data[0].items).toEqual(true);
expect(data[0].items.length).toEqual(15);
expect(!!data[0].styles).toEqual(true);
const page1 = mergeText(data[0].items);
expect(page1).toEqual(`Table Of Content
const text = mergeText(items);
expect(text).toEqual(`Table Of Content
Chapter 1 .......................................................... 2
Paragraph 1.1 ...................................................... 3
page 1 / 3`);
expect(!!data[1].items).toEqual(true);
expect(data[1].items.length).toEqual(6);
expect(!!data[1].styles).toEqual(true);
});
it("gets text content, with correct properties (issue 8276)", async function () {