1
0
Fork 0
mirror of https://github.com/mozilla/pdf.js.git synced 2025-04-25 09:38:06 +02:00

Remove the invisible format marks from the text chunks

- it aims to fix issue #9186.
This commit is contained in:
Calixte Denizet 2022-01-23 23:04:18 +01:00
parent 88236e1163
commit e1d3a3b414
7 changed files with 99 additions and 14 deletions

View file

@ -1640,12 +1640,13 @@ function reverseIfRtl(chars) {
return buf.join("");
}
const SpecialCharRegExp = new RegExp("^(\\s)|(\\p{Mn})$", "u");
const SpecialCharRegExp = new RegExp("^(\\s)|(\\p{Mn})|(\\p{Cf})$", "u");
function getCharUnicodeCategory(char) {
const groups = char.match(SpecialCharRegExp);
return {
isWhitespace: !!(groups && groups[1]),
isDiacritic: !!(groups && groups[2]),
isZeroWidthDiacritic: !!(groups && groups[2]),
isInvisibleFormatMark: !!(groups && groups[3]),
};
}