1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-20 15:18:08 +02:00

Fix encoding issues when printing/saving a form with non-ascii characters

This commit is contained in:
Calixte Denizet 2020-08-27 16:04:17 +02:00
parent 187542da8d
commit 56424967f2
6 changed files with 508 additions and 57 deletions

View file

@ -32,10 +32,18 @@ import {
import { createIdFactory, XRefMock } from "./test_utils.js";
import { Dict, Name, Ref, RefSetCache } from "../../src/core/primitives.js";
import { Lexer, Parser } from "../../src/core/parser.js";
import { DOMCMapReaderFactory } from "../../src/display/display_utils.js";
import { isNodeJS } from "../../src/shared/is_node.js";
import { NodeCMapReaderFactory } from "../../src/display/node_utils.js";
import { PartialEvaluator } from "../../src/core/evaluator.js";
import { StringStream } from "../../src/core/stream.js";
import { WorkerTask } from "../../src/core/worker.js";
const cMapUrl = {
dom: "../../external/bcmaps/",
node: "./external/bcmaps/",
};
describe("annotation", function () {
class PDFManagerMock {
constructor(params) {
@ -82,6 +90,30 @@ describe("annotation", function () {
pdfManagerMock = new PDFManagerMock({
docBaseUrl: null,
});
let CMapReaderFactory;
if (isNodeJS) {
CMapReaderFactory = new NodeCMapReaderFactory({
baseUrl: cMapUrl.node,
isCompressed: true,
});
} else {
CMapReaderFactory = new DOMCMapReaderFactory({
baseUrl: cMapUrl.dom,
isCompressed: true,
});
}
const builtInCMapCache = new Map();
builtInCMapCache.set(
"UniJIS-UTF16-H",
CMapReaderFactory.fetch({ name: "UniJIS-UTF16-H" })
);
builtInCMapCache.set(
"Adobe-Japan1-UCS2",
CMapReaderFactory.fetch({ name: "Adobe-Japan1-UCS2" })
);
idFactoryMock = createIdFactory(/* pageIndex = */ 0);
partialEvaluator = new PartialEvaluator({
xref: new XRefMock(),
@ -89,7 +121,9 @@ describe("annotation", function () {
pageIndex: 0,
idFactory: createIdFactory(/* pageIndex = */ 0),
fontCache: new RefSetCache(),
builtInCMapCache,
});
done();
});
@ -1419,7 +1453,7 @@ describe("annotation", function () {
});
describe("TextWidgetAnnotation", function () {
let textWidgetDict, fontRefObj;
let textWidgetDict, helvRefObj, gothRefObj;
beforeEach(function (done) {
textWidgetDict = new Dict();
@ -1432,11 +1466,38 @@ describe("annotation", function () {
helvDict.set("Type", Name.get("Font"));
helvDict.set("Subtype", Name.get("Type1"));
const fontRef = Ref.get(314, 0);
fontRefObj = { ref: fontRef, data: helvDict };
const gothDict = new Dict();
gothDict.set("BaseFont", Name.get("MSGothic"));
gothDict.set("Type", Name.get("Font"));
gothDict.set("Subtype", Name.get("Type0"));
gothDict.set("Encoding", Name.get("UniJIS-UTF16-H"));
gothDict.set("Name", Name.get("MSGothic"));
const cidSysInfoDict = new Dict();
cidSysInfoDict.set("Ordering", "Japan1");
cidSysInfoDict.set("Registry", "Adobe");
cidSysInfoDict.set("Supplement", "5");
const fontDescriptorDict = new Dict();
fontDescriptorDict.set("FontName", Name.get("MSGothic"));
fontDescriptorDict.set("CapHeight", "680");
const gothDescendantDict = new Dict();
gothDescendantDict.set("BaseFont", Name.get("MSGothic"));
gothDescendantDict.set("CIDSystemInfo", cidSysInfoDict);
gothDescendantDict.set("Subtype", Name.get("CIDFontType2"));
gothDescendantDict.set("Type", Name.get("Font"));
gothDescendantDict.set("FontDescriptor", fontDescriptorDict);
gothDict.set("DescendantFonts", [gothDescendantDict]);
const helvRef = Ref.get(314, 0);
const gothRef = Ref.get(159, 0);
helvRefObj = { ref: helvRef, data: helvDict };
gothRefObj = { ref: gothRef, data: gothDict };
const resourceDict = new Dict();
const fontDict = new Dict();
fontDict.set("Helv", fontRef);
fontDict.set("Helv", helvRef);
resourceDict.set("Font", fontDict);
textWidgetDict.set("DA", "/Helv 5 Tf");
@ -1447,7 +1508,7 @@ describe("annotation", function () {
});
afterEach(function () {
textWidgetDict = fontRefObj = null;
textWidgetDict = helvRefObj = gothRefObj = null;
});
it("should handle unknown text alignment, maximum length and flags", function (done) {
@ -1614,7 +1675,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1644,6 +1705,46 @@ describe("annotation", function () {
}, done.fail);
});
it("should render regular text in Japanese for printing", function (done) {
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 5 Tf");
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const id = annotation.data.id;
const annotationStorage = {};
annotationStorage[id] = { value: "こんにちは世界の" };
return annotation._getAppearance(
partialEvaluator,
task,
annotationStorage
);
}, done.fail)
.then(appearance => {
const utf16String =
"\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
expect(appearance).toEqual(
"/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 0 Tm" +
` 2.00 2.00 Td (${utf16String}) Tj ET Q EMC`
);
done();
}, done.fail);
});
it("should render regular text for printing using normal appearance", function (done) {
const textWidgetRef = Ref.get(271, 0);
@ -1658,7 +1759,7 @@ describe("annotation", function () {
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1699,7 +1800,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1729,13 +1830,53 @@ describe("annotation", function () {
}, done.fail);
});
it("should render auto-sized text in Japanese for printing", function (done) {
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 0 Tf");
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const id = annotation.data.id;
const annotationStorage = {};
annotationStorage[id] = { value: "こんにちは世界の" };
return annotation._getAppearance(
partialEvaluator,
task,
annotationStorage
);
}, done.fail)
.then(appearance => {
const utf16String =
"\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
expect(appearance).toEqual(
"/Tx BMC q BT /Goth 9 Tf 1 0 0 1 0 0 Tm" +
` 2.00 2.00 Td (${utf16String}) Tj ET Q EMC`
);
done();
}, done.fail);
});
it("should not render a password for printing", function (done) {
textWidgetDict.set("Ff", AnnotationFieldFlag.PASSWORD);
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1768,7 +1909,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1808,6 +1949,45 @@ describe("annotation", function () {
}, done.fail);
});
it("should render multiline text in Japanese for printing", function (done) {
textWidgetDict.set("Ff", AnnotationFieldFlag.MULTILINE);
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 5 Tf");
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const id = annotation.data.id;
const annotationStorage = {};
annotationStorage[id] = { value: "こんにちは世界の" };
return annotation._getAppearance(
partialEvaluator,
task,
annotationStorage
);
}, done.fail)
.then(appearance => {
expect(appearance).toEqual(
"/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 10 Tm " +
"2.00 -5.00 Td (\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f) Tj\n" +
"0.00 -5.00 Td (\x4e\x16\x75\x4c\x30\x6e) Tj ET Q EMC"
);
done();
}, done.fail);
});
it("should render multiline text with various EOL for printing", function (done) {
textWidgetDict.set("Ff", AnnotationFieldFlag.MULTILINE);
textWidgetDict.set("Rect", [0, 0, 128, 10]);
@ -1815,7 +1995,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1881,7 +2061,7 @@ describe("annotation", function () {
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
fontRefObj,
helvRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
@ -1914,9 +2094,55 @@ describe("annotation", function () {
}, done.fail);
});
it("should render comb with Japanese text for printing", function (done) {
textWidgetDict.set("Ff", AnnotationFieldFlag.COMB);
textWidgetDict.set("MaxLen", 4);
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 5 Tf");
textWidgetDict.set("Rect", [0, 0, 32, 10]);
const textWidgetRef = Ref.get(271, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
const task = new WorkerTask("test print");
partialEvaluator.xref = xref;
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const id = annotation.data.id;
const annotationStorage = {};
annotationStorage[id] = { value: "こんにちは世界の" };
return annotation._getAppearance(
partialEvaluator,
task,
annotationStorage
);
}, done.fail)
.then(appearance => {
expect(appearance).toEqual(
"/Tx BMC q BT /Goth 5 Tf 1 0 0 1 2 2 Tm" +
" (\x30\x53) Tj 8.00 0 Td (\x30\x93) Tj 8.00 0 Td (\x30\x6b) Tj" +
" 8.00 0 Td (\x30\x61) Tj 8.00 0 Td (\x30\x6f) Tj" +
" 8.00 0 Td (\x4e\x16) Tj 8.00 0 Td (\x75\x4c) Tj" +
" 8.00 0 Td (\x30\x6e) Tj ET Q EMC"
);
done();
}, done.fail);
});
it("should save text", function (done) {
const textWidgetRef = Ref.get(123, 0);
const xref = new XRefMock([{ ref: textWidgetRef, data: textWidgetDict }]);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
helvRefObj,
]);
partialEvaluator.xref = xref;
const task = new WorkerTask("test save");
@ -1935,17 +2161,17 @@ describe("annotation", function () {
expect(data.length).toEqual(2);
const [oldData, newData] = data;
expect(oldData.ref).toEqual(Ref.get(123, 0));
expect(newData.ref).toEqual(Ref.get(1, 0));
expect(newData.ref).toEqual(Ref.get(2, 0));
oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)");
expect(oldData.data).toEqual(
"123 0 obj\n" +
"<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Helv 5 Tf) /DR " +
"<< /Font << /Helv 314 0 R>>>> /Rect [0 0 32 10] " +
"/V (hello world) /AP << /N 1 0 R>> /M (date)>>\nendobj\n"
"/V (hello world) /AP << /N 2 0 R>> /M (date)>>\nendobj\n"
);
expect(newData.data).toEqual(
"1 0 obj\n<< /Length 77 /Subtype /Form /Resources " +
"2 0 obj\n<< /Length 77 /Subtype /Form /Resources " +
"<< /Font << /Helv 314 0 R>>>> /BBox [0 0 32 10]>> stream\n" +
"/Tx BMC q BT /Helv 5 Tf 1 0 0 1 0 0 Tm 2.00 2.00 Td (hello world) Tj " +
"ET Q EMC\nendstream\nendobj\n"
@ -2039,6 +2265,55 @@ describe("annotation", function () {
done();
}, done.fail);
});
it("should save Japanese text", function (done) {
textWidgetDict.get("DR").get("Font").set("Goth", gothRefObj.ref);
textWidgetDict.set("DA", "/Goth 5 Tf");
const textWidgetRef = Ref.get(123, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
gothRefObj,
]);
partialEvaluator.xref = xref;
const task = new WorkerTask("test save");
AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
)
.then(annotation => {
const annotationStorage = {};
annotationStorage[annotation.data.id] = { value: "こんにちは世界の" };
return annotation.save(partialEvaluator, task, annotationStorage);
}, done.fail)
.then(data => {
const utf16String =
"\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e";
expect(data.length).toEqual(2);
const [oldData, newData] = data;
expect(oldData.ref).toEqual(Ref.get(123, 0));
expect(newData.ref).toEqual(Ref.get(2, 0));
oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)");
expect(oldData.data).toEqual(
"123 0 obj\n" +
"<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Goth 5 Tf) /DR " +
"<< /Font << /Helv 314 0 R /Goth 159 0 R>>>> /Rect [0 0 32 10] " +
`/V (\xfe\xff${utf16String}) /AP << /N 2 0 R>> /M (date)>>\nendobj\n`
);
expect(newData.data).toEqual(
"2 0 obj\n<< /Length 82 /Subtype /Form /Resources " +
"<< /Font << /Helv 314 0 R /Goth 159 0 R>>>> /BBox [0 0 32 10]>> stream\n" +
`/Tx BMC q BT /Goth 5 Tf 1 0 0 1 0 0 Tm 2.00 2.00 Td (${utf16String}) Tj ` +
"ET Q EMC\nendstream\nendobj\n"
);
done();
}, done.fail);
});
});
describe("ButtonWidgetAnnotation", function () {

View file

@ -21,6 +21,7 @@ import {
escapeString,
getModificationDate,
isArrayBuffer,
isAscii,
isBool,
isNum,
isSameOrigin,
@ -29,6 +30,7 @@ import {
string32,
stringToBytes,
stringToPDFString,
stringToUTF16BEString,
} from "../../src/shared/util.js";
describe("util", function () {
@ -346,4 +348,26 @@ describe("util", function () {
expect(encodeToXmlString(str)).toEqual(str);
});
});
describe("isAscii", function () {
it("handles ascii/non-ascii strings", function () {
expect(isAscii("hello world")).toEqual(true);
expect(isAscii("こんにちは世界の")).toEqual(false);
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
false
);
});
});
describe("stringToUTF16BEString", function () {
it("should encode a string in UTF16BE with a BOM", function () {
expect(stringToUTF16BEString("hello world")).toEqual(
"\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
);
expect(stringToUTF16BEString("こんにちは世界の")).toEqual(
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
});
});