1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)

- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.
This commit is contained in:
Calixte Denizet 2022-02-13 19:39:40 +01:00
parent 78246719f8
commit 18e3a98c2b
5 changed files with 58 additions and 13 deletions

View file

@ -0,0 +1 @@
https://bugzilla.mozilla.org/attachment.cgi?id=9263657

View file

@ -1,4 +1,11 @@
[
{ "id": "bug1755201",
"file": "pdfs/bug1755201.pdf",
"md5": "cece14097812d8a1f69e86a51e4a3804",
"rounds": 1,
"link": true,
"type": "other"
},
{ "id": "filled-background-range",
"file": "pdfs/filled-background.pdf",
"md5": "2e3120255d9c3e79b96d2543b12d2589",

View file

@ -2219,6 +2219,22 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});
it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
const loadingTask = getDocument(buildGetDocumentParams("bug1755201.pdf"));
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(6);
const { items } = await pdfPage.getTextContent();
const text = mergeText(items);
expect(/win aisle/.test(text)).toEqual(false);
await loadingTask.destroy();
});
it("gets empty structure tree", async function () {
const tree = await page.getStructTree();