mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-20 15:18:08 +02:00
[api-minor] Remove the disableCombineTextItems
option
*Please note:* This parameter has never been used within the PDF.js library/viewer itself, and it was only ever added for backwards compatibility reasons. This parameter was added in PR 7475, over six years ago, to try and optionally maintain the previous *default* text-extraction behaviour. However as part of the general text-extraction improvements in PR 13257, almost two years ago, the `disableCombineTextItems` functionality was accidentally "broken" in various ways. Note how the only (very basic) unit-test was updated in a way that doesn't really make sense, since generally speaking you'd expect that using the option should result in *more* (or at least the same number of) text-items. Furthermore there's also the recent issue 16209, where the option causes almost all textContent to be concatenated together. Hence this patch proposes that we simply remove the `disableCombineTextItems` option since it's essentially unused/untested functionality, as evident from the fact that it took almost two years for someone to notice that it's broken.
This commit is contained in:
parent
09da8026b6
commit
5063a6f2a9
6 changed files with 11 additions and 41 deletions
|
@ -21,6 +21,7 @@ import {
|
|||
ImageKind,
|
||||
InvalidPDFException,
|
||||
MissingPDFException,
|
||||
objectSize,
|
||||
OPS,
|
||||
PasswordException,
|
||||
PasswordResponses,
|
||||
|
@ -2321,26 +2322,16 @@ describe("api", function () {
|
|||
});
|
||||
|
||||
it("gets text content", async function () {
|
||||
const defaultPromise = page.getTextContent();
|
||||
const parametersPromise = page.getTextContent({
|
||||
disableCombineTextItems: true,
|
||||
});
|
||||
const { items, styles } = await page.getTextContent();
|
||||
|
||||
const data = await Promise.all([defaultPromise, parametersPromise]);
|
||||
expect(items.length).toEqual(15);
|
||||
expect(objectSize(styles)).toEqual(5);
|
||||
|
||||
expect(!!data[0].items).toEqual(true);
|
||||
expect(data[0].items.length).toEqual(15);
|
||||
expect(!!data[0].styles).toEqual(true);
|
||||
|
||||
const page1 = mergeText(data[0].items);
|
||||
expect(page1).toEqual(`Table Of Content
|
||||
const text = mergeText(items);
|
||||
expect(text).toEqual(`Table Of Content
|
||||
Chapter 1 .......................................................... 2
|
||||
Paragraph 1.1 ...................................................... 3
|
||||
page 1 / 3`);
|
||||
|
||||
expect(!!data[1].items).toEqual(true);
|
||||
expect(data[1].items.length).toEqual(6);
|
||||
expect(!!data[1].styles).toEqual(true);
|
||||
});
|
||||
|
||||
it("gets text content, with correct properties (issue 8276)", async function () {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue