1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

Remove language codes from text strings.

And take care to have an even number of bytes with utf16 strings.
This commit is contained in:
Calixte Denizet 2023-11-24 20:17:15 +01:00
parent 58316369e5
commit eb5f610d18
2 changed files with 47 additions and 2 deletions

View file

@ -99,11 +99,21 @@ describe("util", function () {
expect(stringToPDFString(str)).toEqual("string");
});
it("handles incomplete UTF-16 big-endian strings", function () {
const str = "\xFE\xFF\x00\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00";
expect(stringToPDFString(str)).toEqual("strin");
});
it("handles UTF-16 little-endian strings", function () {
const str = "\xFF\xFE\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00\x67\x00";
expect(stringToPDFString(str)).toEqual("string");
});
it("handles incomplete UTF-16 little-endian strings", function () {
const str = "\xFF\xFE\x73\x00\x74\x00\x72\x00\x69\x00\x6E\x00\x67";
expect(stringToPDFString(str)).toEqual("strin");
});
it("handles UTF-8 strings", function () {
const simpleStr = "\xEF\xBB\xBF\x73\x74\x72\x69\x6E\x67";
expect(stringToPDFString(simpleStr)).toEqual("string");
@ -134,6 +144,22 @@ describe("util", function () {
const str4 = "\xEF\xBB\xBF";
expect(stringToPDFString(str4)).toEqual("");
});
it("handles strings with language code", function () {
// ISO Latin 1
const str1 = "hello \x1benUS\x1bworld";
expect(stringToPDFString(str1)).toEqual("hello world");
// UTF-16BE
const str2 =
"\xFE\xFF\x00h\x00e\x00l\x00l\x00o\x00 \x00\x1b\x00e\x00n\x00U\x00S\x00\x1b\x00w\x00o\x00r\x00l\x00d";
expect(stringToPDFString(str2)).toEqual("hello world");
// UTF-16LE
const str3 =
"\xFF\xFEh\x00e\x00l\x00l\x00o\x00 \x00\x1b\x00e\x00n\x00U\x00S\x00\x1b\x00w\x00o\x00r\x00l\x00d\x00";
expect(stringToPDFString(str3)).toEqual("hello world");
});
});
describe("ReadableStream", function () {