1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Print correctly documents containing chars with an unicode greater than 0xFFFF (bug 1669097)

This commit is contained in:
Calixte Denizet 2024-01-21 23:00:43 +01:00
parent d549c2ef4c
commit 06601fd90c
6 changed files with 44 additions and 34 deletions

View file

@ -3826,7 +3826,7 @@ class FreeTextAnnotation extends MarkupAnnotation {
fontColor,
strokeAlpha
);
this._streams.push(this.appearance, FakeUnicodeFont.toUnicodeStream);
this._streams.push(this.appearance);
} else {
warn(
"FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly."

View file

@ -386,6 +386,17 @@ const XMLEntities = {
/* ' */ 0x27: "'",
};
function* codePointIter(str) {
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.codePointAt(i);
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
// char is represented by two u16
i++;
}
yield char;
}
}
function encodeToXmlString(str) {
const buffer = [];
let start = 0;
@ -602,6 +613,7 @@ function getRotationMatrix(rotation, width, height) {
export {
arrayBuffersToBytes,
codePointIter,
collectActions,
encodeToXmlString,
escapePDFName,

View file

@ -13,13 +13,14 @@
* limitations under the License.
*/
import { Dict, Name } from "./primitives.js";
import {
codePointIter,
escapePDFName,
getRotationMatrix,
numberToString,
stringToUTF16HexString,
} from "./core_utils.js";
import { Dict, Name } from "./primitives.js";
import {
LINE_DESCENT_FACTOR,
LINE_FACTOR,
@ -251,35 +252,6 @@ class FakeUnicodeFont {
);
}
get toUnicodeRef() {
if (!FakeUnicodeFont._toUnicodeRef) {
const toUnicode = `/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (UCS) /Supplement 0 >> def
/CMapName /Adobe-Identity-UCS def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfrange
<0000> <FFFF> <0000>
endbfrange
endcmap CMapName currentdict /CMap defineresource pop end end`;
const toUnicodeStream = (FakeUnicodeFont.toUnicodeStream =
new StringStream(toUnicode));
const toUnicodeDict = new Dict(this.xref);
toUnicodeStream.dict = toUnicodeDict;
toUnicodeDict.set("Length", toUnicode.length);
FakeUnicodeFont._toUnicodeRef =
this.xref.getNewPersistentRef(toUnicodeStream);
}
return FakeUnicodeFont._toUnicodeRef;
}
get fontDescriptorRef() {
if (!FakeUnicodeFont._fontDescriptorRef) {
const fontDescriptor = new Dict(this.xref);
@ -350,7 +322,7 @@ endcmap CMapName currentdict /CMap defineresource pop end end`;
baseFont.set("Subtype", Name.get("Type0"));
baseFont.set("Encoding", Name.get("Identity-H"));
baseFont.set("DescendantFonts", [this.descendantFontRef]);
baseFont.set("ToUnicode", this.toUnicodeRef);
baseFont.set("ToUnicode", Name.get("Identity-H"));
return this.xref.getNewPersistentRef(baseFont);
}
@ -420,8 +392,8 @@ endcmap CMapName currentdict /CMap defineresource pop end end`;
// languages, like arabic, it'd be wrong because of ligatures.
const lineWidth = ctx.measureText(line).width;
maxWidth = Math.max(maxWidth, lineWidth);
for (const char of line.split("")) {
const code = char.charCodeAt(0);
for (const code of codePointIter(line)) {
const char = String.fromCodePoint(code);
let width = this.widths.get(code);
if (width === undefined) {
const metrics = ctx.measureText(char);