mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Add (basic) UTF-8 support in the stringToPDFString
helper function (issue 14449)
This patch implements this by looking for the UTF-8 BOM, i.e. `\xEF\xBB\xBF`, in order to determine the encoding.[1] The actual conversion is done using the `TextDecoder` interface, which should be available in all environments/browsers that we support; please see https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder#browser_compatibility --- [1] Assuming that everything lacking a UTF-16 BOM would have to be UTF-8 encoded really doesn't seem correct.
This commit is contained in:
parent
ea57ef116e
commit
76444888fb
2 changed files with 37 additions and 7 deletions
|
@ -957,26 +957,39 @@ const PDFStringTranslateTable = [
|
|||
function stringToPDFString(str) {
|
||||
const length = str.length,
|
||||
strBuf = [];
|
||||
// UTF-16BE BOM
|
||||
if (str[0] === "\xFE" && str[1] === "\xFF") {
|
||||
// UTF16BE BOM
|
||||
for (let i = 2; i < length; i += 2) {
|
||||
strBuf.push(
|
||||
String.fromCharCode((str.charCodeAt(i) << 8) | str.charCodeAt(i + 1))
|
||||
);
|
||||
}
|
||||
} else if (str[0] === "\xFF" && str[1] === "\xFE") {
|
||||
// UTF16LE BOM
|
||||
return strBuf.join("");
|
||||
}
|
||||
// UTF-16LE BOM
|
||||
if (str[0] === "\xFF" && str[1] === "\xFE") {
|
||||
for (let i = 2; i < length; i += 2) {
|
||||
strBuf.push(
|
||||
String.fromCharCode((str.charCodeAt(i + 1) << 8) | str.charCodeAt(i))
|
||||
);
|
||||
}
|
||||
} else {
|
||||
for (let i = 0; i < length; ++i) {
|
||||
const code = PDFStringTranslateTable[str.charCodeAt(i)];
|
||||
strBuf.push(code ? String.fromCharCode(code) : str.charAt(i));
|
||||
return strBuf.join("");
|
||||
}
|
||||
// UTF-8 BOM
|
||||
if (str[0] === "\xEF" && str[1] === "\xBB" && str[2] === "\xBF") {
|
||||
try {
|
||||
const decoder = new TextDecoder("utf-8", { fatal: true });
|
||||
const buffer = stringToBytes(str);
|
||||
return decoder.decode(buffer);
|
||||
} catch (ex) {
|
||||
warn(`stringToPDFString: "${ex}".`);
|
||||
}
|
||||
}
|
||||
// ISO Latin 1
|
||||
for (let i = 0; i < length; ++i) {
|
||||
const code = PDFStringTranslateTable[str.charCodeAt(i)];
|
||||
strBuf.push(code ? String.fromCharCode(code) : str.charAt(i));
|
||||
}
|
||||
return strBuf.join("");
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue