mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Don't add an extra space after a Katakana or a Hiragana at the eol when searching
This commit is contained in:
parent
44bc315444
commit
ea1995991b
4 changed files with 27 additions and 1 deletions
|
@ -132,7 +132,11 @@ function normalize(text) {
|
|||
"\u3244-\u32bf" + // Circled ideograms/numbers.
|
||||
"\u32d0-\u32fe" + // Circled ideograms.
|
||||
"\uff00-\uffef"; // Halfwidth, fullwidth forms.
|
||||
const regexp = `([${replace}])|([${toNormalizeWithNFKC}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(\\p{Ideographic}\\n)|(\\n)`;
|
||||
|
||||
// 3040-309F: Hiragana
|
||||
// 30A0-30FF: Katakana
|
||||
const CJK = "(?:\\p{Ideographic}|[\u3040-\u30FF])";
|
||||
const regexp = `([${replace}])|([${toNormalizeWithNFKC}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(${CJK}\\n)|(\\n)`;
|
||||
|
||||
if (syllablePositions.length === 0) {
|
||||
// Most of the syllables belong to Hangul so there are no need
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue