1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-19 14:48:08 +02:00

Merge pull request #18390 from alexcat3/fix-issue-18099

Handle toUnicode cMaps that omit leading zeros in hex encoded UTF-16 (issue 18099)
This commit is contained in:
Jonas Jenwald 2024-07-06 18:57:07 +02:00 committed by GitHub
commit 5ee61690f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 21 additions and 0 deletions

View file

@ -3852,6 +3852,11 @@ class PartialEvaluator {
map[charCode] = String.fromCodePoint(token);
return;
}
// Add back omitted leading zeros on odd length tokens
// (fixes issue #18099)
if (token.length % 2 !== 0) {
token = "\u0000" + token;
}
const str = [];
for (let k = 0; k < token.length; k += 2) {
const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);

View file

@ -653,3 +653,4 @@
!bug1539074.1.pdf
!issue18305.pdf
!issue18360.pdf
!issue18099_reduced.pdf

Binary file not shown.

View file

@ -3419,6 +3419,21 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});
it("gets text content, correctly handling documents with toUnicode cmaps that omit leading zeros on hex-encoded UTF-16", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("issue18099_reduced.pdf")
);
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const { items } = await pdfPage.getTextContent({
disableNormalization: true,
});
const text = mergeText(items);
expect(text).toEqual("Hello world!");
await loadingTask.destroy();
});
it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");