diff --git a/src/core/fonts.js b/src/core/fonts.js index 0301d49ad..29f186fa3 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -3293,6 +3293,47 @@ class Font { return builder.toArray(); } + /** + * @private + */ + get _spaceWidth() { + // trying to estimate space character width + const possibleSpaceReplacements = ["space", "minus", "one", "i", "I"]; + let width; + for (const glyphName of possibleSpaceReplacements) { + // if possible, getting width by glyph name + if (glyphName in this.widths) { + width = this.widths[glyphName]; + break; + } + const glyphsUnicodeMap = getGlyphsUnicode(); + const glyphUnicode = glyphsUnicodeMap[glyphName]; + // finding the charcode via unicodeToCID map + let charcode = 0; + if (this.composite && this.cMap.contains(glyphUnicode)) { + charcode = this.cMap.lookup(glyphUnicode); + + if (typeof charcode === "string") { + charcode = convertCidString(glyphUnicode, charcode); + } + } + // ... via toUnicode map + if (!charcode && this.toUnicode) { + charcode = this.toUnicode.charCodeOf(glyphUnicode); + } + // setting it to unicode if negative or undefined + if (charcode <= 0) { + charcode = glyphUnicode; + } + // trying to get width via charcode + width = this.widths[charcode]; + if (width) { + break; // the non-zero width found + } + } + return shadow(this, "_spaceWidth", width || this.defaultWidth); + } + /** * @private */ @@ -3338,6 +3379,13 @@ class Font { // .notdef glyphs should be invisible in non-embedded Type1 fonts, so // replace them with spaces. fontCharCode = 0x20; + + if (glyphName === "") { + // Ensure that other relevant glyph properties are also updated + // (fixes issue18059.pdf). + width ||= this._spaceWidth; + unicode = String.fromCharCode(fontCharCode); + } } fontCharCode = mapSpecialUnicodeValues(fontCharCode); } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 8629cd5bc..df69d2255 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -57,6 +57,7 @@ !issue17679_2.pdf !issue18030.pdf !issue18042.pdf +!issue18059.pdf !issue14953.pdf !issue15367.pdf !issue15372.pdf diff --git a/test/pdfs/issue18059.pdf b/test/pdfs/issue18059.pdf new file mode 100644 index 000000000..82416e266 Binary files /dev/null and b/test/pdfs/issue18059.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 78c4d8f59..90ad056ec 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -2983,6 +2983,27 @@ "rounds": 1, "type": "eq" }, + { + "id": "issue11403-text", + "file": "pdfs/issue11403_reduced.pdf", + "md5": "08287b64f442cb7c329b97c4774aa1cd", + "rounds": 1, + "type": "text" + }, + { + "id": "issue18059", + "file": "pdfs/issue18059.pdf", + "md5": "b70373894edfcd571a41caa1a0776b6f", + "rounds": 1, + "type": "eq" + }, + { + "id": "issue18059-text", + "file": "pdfs/issue18059.pdf", + "md5": "b70373894edfcd571a41caa1a0776b6f", + "rounds": 1, + "type": "text" + }, { "id": "issue11139", "file": "pdfs/issue11139.pdf", @@ -6479,6 +6500,14 @@ "type": "eq", "about": "Has a 4 bit per component image with mask and decode." }, + { + "id": "issue2770-text", + "file": "pdfs/issue2770.pdf", + "md5": "36070d756d06eaa35c2227efb069fb1b", + "rounds": 1, + "link": true, + "type": "text" + }, { "id": "issue2984", "file": "pdfs/issue2984.pdf",