1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-22 16:18:08 +02:00

Merge pull request #11186 from Snuffleupagus/issue-9655

Improve the heuristics, in `PartialEvaluator._buildSimpleFontToUnicode`, for glyphNames of the Cdd{d}/cdd{d} format (issue 9655)
This commit is contained in:
Tim van der Meij 2019-10-06 19:50:43 +02:00 committed by GitHub
commit cead77ef3a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 32 additions and 7 deletions

View file

@ -1977,7 +1977,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
* @returns {ToUnicodeMap}
* @private
*/
_buildSimpleFontToUnicode(properties) {
_buildSimpleFontToUnicode(properties, forceGlyphs = false) {
assert(!properties.composite, 'Must be a simple font.');
let toUnicode = [], charcode, glyphName;
@ -2017,14 +2017,31 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
code = parseInt(glyphName.substring(1), 16);
}
break;
case 'C': // Cddd glyph
case 'c': // cddd glyph
if (glyphName.length >= 3) {
code = +glyphName.substring(1);
case 'C': // Cdd{d} glyph
case 'c': // cdd{d} glyph
if (glyphName.length >= 3 && glyphName.length <= 4) {
const codeStr = glyphName.substring(1);
if (forceGlyphs) {
code = parseInt(codeStr, 16);
break;
}
// Normally the Cdd{d}/cdd{d} glyphName format will contain
// regular, i.e. base 10, charCodes (see issue4550.pdf)...
code = +codeStr;
// ... however some PDF generators violate that assumption by
// containing glyph, i.e. base 16, codes instead.
// In that case we need to re-parse the *entire* encoding to
// prevent broken text-selection (fixes issue9655_reduced.pdf).
if (Number.isNaN(code) &&
Number.isInteger(parseInt(codeStr, 16))) {
return this._buildSimpleFontToUnicode(properties,
/* forceGlyphs */ true);
}
}
break;
default:
// 'uniXXXX'/'uXXXX{XX}' glyphs
default: // 'uniXXXX'/'uXXXX{XX}' glyphs
let unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) {
code = unicode;