1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-26 10:08:06 +02:00

Remove the invisible format marks from the text chunks

- it aims to fix issue #9186.
This commit is contained in:
Calixte Denizet 2022-01-23 23:04:18 +01:00
parent 88236e1163
commit e1d3a3b414
7 changed files with 99 additions and 14 deletions

View file

@ -2561,6 +2561,9 @@ class PartialEvaluator {
for (let i = 0, ii = glyphs.length; i < ii; i++) {
const glyph = glyphs[i];
if (glyph.isInvisibleFormatMark) {
continue;
}
let charSpacing =
textState.charSpacing + (i + 1 === ii ? extraSpacing : 0);
@ -2601,7 +2604,7 @@ class PartialEvaluator {
// Must be called after compareWithLastPosition because
// the textContentItem could have been flushed.
const textChunk = ensureTextContentItem();
if (glyph.isDiacritic) {
if (glyph.isZeroWidthDiacritic) {
scaledDim = 0;
}

View file

@ -216,7 +216,8 @@ class Glyph {
const category = getCharUnicodeCategory(unicode);
this.isWhitespace = category.isWhitespace;
this.isDiacritic = category.isDiacritic;
this.isZeroWidthDiacritic = category.isZeroWidthDiacritic;
this.isInvisibleFormatMark = category.isInvisibleFormatMark;
}
matchesForCache(

View file

@ -1640,12 +1640,13 @@ function reverseIfRtl(chars) {
return buf.join("");
}
const SpecialCharRegExp = new RegExp("^(\\s)|(\\p{Mn})$", "u");
const SpecialCharRegExp = new RegExp("^(\\s)|(\\p{Mn})|(\\p{Cf})$", "u");
function getCharUnicodeCategory(char) {
const groups = char.match(SpecialCharRegExp);
return {
isWhitespace: !!(groups && groups[1]),
isDiacritic: !!(groups && groups[2]),
isZeroWidthDiacritic: !!(groups && groups[2]),
isInvisibleFormatMark: !!(groups && groups[3]),
};
}