mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Fix encoding issues when printing/saving a form with non-ascii characters
This commit is contained in:
parent
187542da8d
commit
56424967f2
6 changed files with 508 additions and 57 deletions
|
@ -23,10 +23,12 @@ import {
|
|||
assert,
|
||||
escapeString,
|
||||
getModificationDate,
|
||||
isAscii,
|
||||
isString,
|
||||
OPS,
|
||||
shadow,
|
||||
stringToPDFString,
|
||||
stringToUTF16BEString,
|
||||
unreachable,
|
||||
Util,
|
||||
warn,
|
||||
|
@ -1222,7 +1224,7 @@ class WidgetAnnotation extends Annotation {
|
|||
appearance = newTransform.encryptString(appearance);
|
||||
}
|
||||
|
||||
dict.set("V", value);
|
||||
dict.set("V", isAscii(value) ? value : stringToUTF16BEString(value));
|
||||
dict.set("AP", AP);
|
||||
dict.set("M", `D:${getModificationDate()}`);
|
||||
|
||||
|
@ -1298,16 +1300,6 @@ class WidgetAnnotation extends Annotation {
|
|||
const defaultAppearance = this.data.defaultAppearance;
|
||||
const alignment = this.data.textAlignment;
|
||||
|
||||
if (this.data.comb) {
|
||||
return this._getCombAppearance(
|
||||
defaultAppearance,
|
||||
value,
|
||||
totalWidth,
|
||||
hPadding,
|
||||
vPadding
|
||||
);
|
||||
}
|
||||
|
||||
if (this.data.multiLine) {
|
||||
return this._getMultilineAppearance(
|
||||
defaultAppearance,
|
||||
|
@ -1322,18 +1314,34 @@ class WidgetAnnotation extends Annotation {
|
|||
);
|
||||
}
|
||||
|
||||
// TODO: need to handle chars which are not in the font.
|
||||
const encodedString = font.encodeString(value).join("");
|
||||
|
||||
if (this.data.comb) {
|
||||
return this._getCombAppearance(
|
||||
defaultAppearance,
|
||||
font,
|
||||
encodedString,
|
||||
totalWidth,
|
||||
hPadding,
|
||||
vPadding
|
||||
);
|
||||
}
|
||||
|
||||
if (alignment === 0 || alignment > 2) {
|
||||
// Left alignment: nothing to do
|
||||
return (
|
||||
"/Tx BMC q BT " +
|
||||
defaultAppearance +
|
||||
` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(value)}) Tj` +
|
||||
` 1 0 0 1 ${hPadding} ${vPadding} Tm (${escapeString(
|
||||
encodedString
|
||||
)}) Tj` +
|
||||
" ET Q EMC"
|
||||
);
|
||||
}
|
||||
|
||||
const renderedText = this._renderText(
|
||||
value,
|
||||
encodedString,
|
||||
font,
|
||||
fontSize,
|
||||
totalWidth,
|
||||
|
@ -1373,10 +1381,21 @@ class WidgetAnnotation extends Annotation {
|
|||
|
||||
_computeFontSize(font, fontName, fontSize, height) {
|
||||
if (fontSize === null || fontSize === 0) {
|
||||
const em = font.charsToGlyphs("M")[0].width / 1000;
|
||||
// According to https://en.wikipedia.org/wiki/Em_(typography)
|
||||
// an average cap height should be 70% of 1em
|
||||
const capHeight = 0.7 * em;
|
||||
let capHeight;
|
||||
if (font.capHeight) {
|
||||
capHeight = font.capHeight;
|
||||
} else {
|
||||
const glyphs = font.charsToGlyphs(font.encodeString("M").join(""));
|
||||
if (glyphs.length === 1 && glyphs[0].width) {
|
||||
const em = glyphs[0].width / 1000;
|
||||
// According to https://en.wikipedia.org/wiki/Em_(typography)
|
||||
// an average cap height should be 70% of 1em
|
||||
capHeight = 0.7 * em;
|
||||
} else {
|
||||
capHeight = 0.7;
|
||||
}
|
||||
}
|
||||
|
||||
// 1.5 * capHeight * fontSize seems to be a good value for lineHeight
|
||||
fontSize = Math.max(1, Math.floor(height / (1.5 * capHeight)));
|
||||
|
||||
|
@ -1510,11 +1529,12 @@ class TextWidgetAnnotation extends WidgetAnnotation {
|
|||
this.data.maxLen !== null;
|
||||
}
|
||||
|
||||
_getCombAppearance(defaultAppearance, text, width, hPadding, vPadding) {
|
||||
_getCombAppearance(defaultAppearance, font, text, width, hPadding, vPadding) {
|
||||
const combWidth = (width / this.data.maxLen).toFixed(2);
|
||||
const buf = [];
|
||||
for (const character of text) {
|
||||
buf.push(`(${escapeString(character)}) Tj`);
|
||||
const positions = font.getCharPositions(text);
|
||||
for (const [start, end] of positions) {
|
||||
buf.push(`(${escapeString(text.substring(start, end))}) Tj`);
|
||||
}
|
||||
|
||||
const renderedComb = buf.join(` ${combWidth} 0 Td `);
|
||||
|
@ -1568,49 +1588,61 @@ class TextWidgetAnnotation extends WidgetAnnotation {
|
|||
}
|
||||
|
||||
_splitLine(line, font, fontSize, width) {
|
||||
if (line.length <= 1) {
|
||||
// TODO: need to handle chars which are not in the font.
|
||||
line = font.encodeString(line).join("");
|
||||
|
||||
const glyphs = font.charsToGlyphs(line);
|
||||
|
||||
if (glyphs.length <= 1) {
|
||||
// Nothing to split
|
||||
return [line];
|
||||
}
|
||||
|
||||
const positions = font.getCharPositions(line);
|
||||
const scale = fontSize / 1000;
|
||||
const whitespace = font.charsToGlyphs(" ")[0].width * scale;
|
||||
const chunks = [];
|
||||
|
||||
let lastSpacePos = -1,
|
||||
let lastSpacePosInStringStart = -1,
|
||||
lastSpacePosInStringEnd = -1,
|
||||
lastSpacePos = -1,
|
||||
startChunk = 0,
|
||||
currentWidth = 0;
|
||||
|
||||
for (let i = 0, ii = line.length; i < ii; i++) {
|
||||
const character = line.charAt(i);
|
||||
if (character === " ") {
|
||||
if (currentWidth + whitespace > width) {
|
||||
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
||||
const [start, end] = positions[i];
|
||||
const glyph = glyphs[i];
|
||||
const glyphWidth = glyph.width * scale;
|
||||
if (glyph.unicode === " ") {
|
||||
if (currentWidth + glyphWidth > width) {
|
||||
// We can break here
|
||||
chunks.push(line.substring(startChunk, i));
|
||||
startChunk = i;
|
||||
currentWidth = whitespace;
|
||||
chunks.push(line.substring(startChunk, start));
|
||||
startChunk = start;
|
||||
currentWidth = glyphWidth;
|
||||
lastSpacePosInStringStart = -1;
|
||||
lastSpacePos = -1;
|
||||
} else {
|
||||
currentWidth += whitespace;
|
||||
currentWidth += glyphWidth;
|
||||
lastSpacePosInStringStart = start;
|
||||
lastSpacePosInStringEnd = end;
|
||||
lastSpacePos = i;
|
||||
}
|
||||
} else {
|
||||
const charWidth = font.charsToGlyphs(character)[0].width * scale;
|
||||
if (currentWidth + charWidth > width) {
|
||||
if (currentWidth + glyphWidth > width) {
|
||||
// We must break to the last white position (if available)
|
||||
if (lastSpacePos !== -1) {
|
||||
chunks.push(line.substring(startChunk, lastSpacePos + 1));
|
||||
startChunk = i = lastSpacePos + 1;
|
||||
lastSpacePos = -1;
|
||||
if (lastSpacePosInStringStart !== -1) {
|
||||
chunks.push(line.substring(startChunk, lastSpacePosInStringEnd));
|
||||
startChunk = lastSpacePosInStringEnd;
|
||||
i = lastSpacePos + 1;
|
||||
lastSpacePosInStringStart = -1;
|
||||
currentWidth = 0;
|
||||
} else {
|
||||
// Just break in the middle of the word
|
||||
chunks.push(line.substring(startChunk, i));
|
||||
startChunk = i;
|
||||
currentWidth = charWidth;
|
||||
chunks.push(line.substring(startChunk, start));
|
||||
startChunk = start;
|
||||
currentWidth = glyphWidth;
|
||||
}
|
||||
} else {
|
||||
currentWidth += charWidth;
|
||||
currentWidth += glyphWidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -338,6 +338,22 @@ class CMap {
|
|||
out.length = 1;
|
||||
}
|
||||
|
||||
getCharCodeLength(charCode) {
|
||||
const codespaceRanges = this.codespaceRanges;
|
||||
for (let n = 0, nn = codespaceRanges.length; n < nn; n++) {
|
||||
// Check each codespace range to see if it falls within.
|
||||
const codespaceRange = codespaceRanges[n];
|
||||
for (let k = 0, kk = codespaceRange.length; k < kk; ) {
|
||||
const low = codespaceRange[k++];
|
||||
const high = codespaceRange[k++];
|
||||
if (charCode >= low && charCode <= high) {
|
||||
return n + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
get length() {
|
||||
return this._map.length;
|
||||
}
|
||||
|
|
|
@ -590,6 +590,7 @@ var Font = (function FontClosure() {
|
|||
this.defaultWidth = properties.defaultWidth;
|
||||
this.composite = properties.composite;
|
||||
this.cMap = properties.cMap;
|
||||
this.capHeight = properties.capHeight / PDF_GLYPH_SPACE_UNITS;
|
||||
this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS;
|
||||
this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
|
||||
this.fontMatrix = properties.fontMatrix;
|
||||
|
@ -3351,9 +3352,93 @@ var Font = (function FontClosure() {
|
|||
return (charsCache[charsCacheKey] = glyphs);
|
||||
},
|
||||
|
||||
/**
|
||||
* Chars can have different sizes (depends on the encoding).
|
||||
* @param {String} a string encoded with font encoding.
|
||||
* @returns {Array<Array<number>>} the positions of each char in the string.
|
||||
*/
|
||||
getCharPositions(chars) {
|
||||
// This function doesn't use a cache because
|
||||
// it's called only when saving or printing.
|
||||
const positions = [];
|
||||
|
||||
if (this.cMap) {
|
||||
const c = Object.create(null);
|
||||
let i = 0;
|
||||
while (i < chars.length) {
|
||||
this.cMap.readCharCode(chars, i, c);
|
||||
const length = c.length;
|
||||
positions.push([i, i + length]);
|
||||
i += length;
|
||||
}
|
||||
} else {
|
||||
for (let i = 0, ii = chars.length; i < ii; ++i) {
|
||||
positions.push([i, i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
return positions;
|
||||
},
|
||||
|
||||
get glyphCacheValues() {
|
||||
return Object.values(this.glyphCache);
|
||||
},
|
||||
|
||||
/**
|
||||
* Encode a js string using font encoding.
|
||||
* The resulting array contains an encoded string at even positions
|
||||
* (can be empty) and a non-encoded one at odd positions.
|
||||
* @param {String} a js string.
|
||||
* @returns {Array<String>} an array of encoded strings or non-encoded ones.
|
||||
*/
|
||||
encodeString(str) {
|
||||
const buffers = [];
|
||||
const currentBuf = [];
|
||||
|
||||
// buffers will contain: encoded, non-encoded, encoded, ...
|
||||
// currentBuf is pushed in buffers each time there is a change.
|
||||
// So when buffers.length is odd then the last string is an encoded one
|
||||
// and currentBuf contains non-encoded chars.
|
||||
const hasCurrentBufErrors = () => buffers.length % 2 === 1;
|
||||
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const unicode = str.codePointAt(i);
|
||||
if (unicode > 0xd7ff && (unicode < 0xe000 || unicode > 0xfffd)) {
|
||||
// unicode is represented by two uint16
|
||||
i++;
|
||||
}
|
||||
if (this.toUnicode) {
|
||||
const char = String.fromCodePoint(unicode);
|
||||
const charCode = this.toUnicode.charCodeOf(char);
|
||||
if (charCode !== -1) {
|
||||
if (hasCurrentBufErrors()) {
|
||||
buffers.push(currentBuf.join(""));
|
||||
currentBuf.length = 0;
|
||||
}
|
||||
const charCodeLength = this.cMap
|
||||
? this.cMap.getCharCodeLength(charCode)
|
||||
: 1;
|
||||
for (let j = charCodeLength - 1; j >= 0; j--) {
|
||||
currentBuf.push(
|
||||
String.fromCharCode((charCode >> (8 * j)) & 0xff)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// unicode can't be encoded
|
||||
if (!hasCurrentBufErrors()) {
|
||||
buffers.push(currentBuf.join(""));
|
||||
currentBuf.length = 0;
|
||||
}
|
||||
currentBuf.push(String.fromCodePoint(unicode));
|
||||
}
|
||||
|
||||
buffers.push(currentBuf.join(""));
|
||||
|
||||
return buffers;
|
||||
},
|
||||
};
|
||||
|
||||
return Font;
|
||||
|
@ -3371,6 +3456,9 @@ var ErrorFont = (function ErrorFontClosure() {
|
|||
charsToGlyphs: function ErrorFont_charsToGlyphs() {
|
||||
return [];
|
||||
},
|
||||
encodeString: function ErrorFont_encodeString(chars) {
|
||||
return [chars];
|
||||
},
|
||||
exportData(extraProperties = false) {
|
||||
return { error: this.error };
|
||||
},
|
||||
|
|
|
@ -842,6 +842,20 @@ function escapeString(str) {
|
|||
});
|
||||
}
|
||||
|
||||
function isAscii(str) {
|
||||
return /^[\x00-\x7F]*$/.test(str);
|
||||
}
|
||||
|
||||
function stringToUTF16BEString(str) {
|
||||
const buf = ["\xFE\xFF"];
|
||||
for (let i = 0, ii = str.length; i < ii; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
buf.push(String.fromCharCode((char >> 8) & 0xff));
|
||||
buf.push(String.fromCharCode(char & 0xff));
|
||||
}
|
||||
return buf.join("");
|
||||
}
|
||||
|
||||
function stringToUTF8String(str) {
|
||||
return decodeURIComponent(escape(str));
|
||||
}
|
||||
|
@ -1044,6 +1058,7 @@ export {
|
|||
getModificationDate,
|
||||
getVerbosityLevel,
|
||||
info,
|
||||
isAscii,
|
||||
isArrayBuffer,
|
||||
isArrayEqual,
|
||||
isBool,
|
||||
|
@ -1061,6 +1076,7 @@ export {
|
|||
string32,
|
||||
stringToBytes,
|
||||
stringToPDFString,
|
||||
stringToUTF16BEString,
|
||||
stringToUTF8String,
|
||||
utf8StringToString,
|
||||
warn,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue