1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

[api-minor] Include the document /Lang attribute in the textContent-data

- These changes will allow a simpler way of implementing PR 17770.

 - The /Lang attribute is fetched lazily, with the first `getTextContent` invocation. Given the existing worker-thread caching, this will thus only need to be done *once* per PDF document (and most PDFs don't included this data).

 - This makes the /Lang attribute *directly available* in the `textLayer`, which has the following advantages:
    - We don't need to block, and thus delay, overall viewer initialization on fetching it (nor pass it around throughout the viewer).

    - Third-party users of the `textLayer` will automatically benefit from this, once we start actually using the /Lang attribute in PR 17770.
      *Please note:* This also, importantly, means that the `text` reference-tests will then cover this code (which wouldn't otherwise have been the case).
This commit is contained in:
Jonas Jenwald 2024-04-15 12:30:09 +02:00
parent c0b5d93ef4
commit 6d523c316c
6 changed files with 56 additions and 41 deletions

View file

@ -3128,10 +3128,11 @@ describe("api", function () {
});
it("gets text content", async function () {
const { items, styles } = await page.getTextContent();
const { items, styles, lang } = await page.getTextContent();
expect(items.length).toEqual(15);
expect(objectSize(styles)).toEqual(5);
expect(lang).toEqual("en");
const text = mergeText(items);
expect(text).toEqual(`Table Of Content
@ -3146,13 +3147,14 @@ page 1 / 3`);
);
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const { items, styles } = await pdfPage.getTextContent({
const { items, styles, lang } = await pdfPage.getTextContent({
disableNormalization: true,
});
expect(items.length).toEqual(1);
// Font name will be a random object id.
const fontName = items[0].fontName;
expect(Object.keys(styles)).toEqual([fontName]);
expect(lang).toEqual(null);
expect(items[0]).toEqual({
dir: "ltr",