mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-20 15:18:08 +02:00
[api-minor] Include the document /Lang attribute in the textContent-data
- These changes will allow a simpler way of implementing PR 17770. - The /Lang attribute is fetched lazily, with the first `getTextContent` invocation. Given the existing worker-thread caching, this will thus only need to be done *once* per PDF document (and most PDFs don't included this data). - This makes the /Lang attribute *directly available* in the `textLayer`, which has the following advantages: - We don't need to block, and thus delay, overall viewer initialization on fetching it (nor pass it around throughout the viewer). - Third-party users of the `textLayer` will automatically benefit from this, once we start actually using the /Lang attribute in PR 17770. *Please note:* This also, importantly, means that the `text` reference-tests will then cover this code (which wouldn't otherwise have been the case).
This commit is contained in:
parent
c0b5d93ef4
commit
6d523c316c
6 changed files with 56 additions and 41 deletions
|
@ -3128,10 +3128,11 @@ describe("api", function () {
|
|||
});
|
||||
|
||||
it("gets text content", async function () {
|
||||
const { items, styles } = await page.getTextContent();
|
||||
const { items, styles, lang } = await page.getTextContent();
|
||||
|
||||
expect(items.length).toEqual(15);
|
||||
expect(objectSize(styles)).toEqual(5);
|
||||
expect(lang).toEqual("en");
|
||||
|
||||
const text = mergeText(items);
|
||||
expect(text).toEqual(`Table Of Content
|
||||
|
@ -3146,13 +3147,14 @@ page 1 / 3`);
|
|||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items, styles } = await pdfPage.getTextContent({
|
||||
const { items, styles, lang } = await pdfPage.getTextContent({
|
||||
disableNormalization: true,
|
||||
});
|
||||
expect(items.length).toEqual(1);
|
||||
// Font name will be a random object id.
|
||||
const fontName = items[0].fontName;
|
||||
expect(Object.keys(styles)).toEqual([fontName]);
|
||||
expect(lang).toEqual(null);
|
||||
|
||||
expect(items[0]).toEqual({
|
||||
dir: "ltr",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue