mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-22 16:18:08 +02:00
Tweak the Bidi-detection heuristics for very short RTL strings (issue 11656)
Very short strings can narrowly miss the existing Bidi-detection threshold, leading to incorrect text-selection and copying behaviour. In my testing, neither Adobe Reader or PDFium seem to handle copying "correctly" for this document. Hence it's not entirely clear to me that we actually want to fix this, since tweaking these heuristics can *obviously* cause regressions elsewhere (and our test coverage for RTL-text isn't exactly great).
This commit is contained in:
parent
6a15973a1b
commit
5f77d3719b
5 changed files with 44 additions and 2 deletions
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
|
@ -448,6 +448,7 @@
|
|||
!annotation-square-circle-without-appearance.pdf
|
||||
!annotation-stamp.pdf
|
||||
!issue14048.pdf
|
||||
!issue11656.pdf
|
||||
!annotation-fileattachment.pdf
|
||||
!annotation-text-widget.pdf
|
||||
!annotation-choice-widget.pdf
|
||||
|
|
BIN
test/pdfs/issue11656.pdf
Normal file
BIN
test/pdfs/issue11656.pdf
Normal file
Binary file not shown.
|
@ -5080,6 +5080,12 @@
|
|||
"lastPage": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue11656",
|
||||
"file": "pdfs/issue11656.pdf",
|
||||
"md5": "82d5d4f5978a4974707deb1ea98e62f2",
|
||||
"rounds": 1,
|
||||
"type": "text"
|
||||
},
|
||||
{ "id": "vertical",
|
||||
"file": "pdfs/vertical.pdf",
|
||||
"md5": "8a74d33504701edcefeef2afd022765e",
|
||||
|
|
|
@ -16,6 +16,28 @@
|
|||
import { bidi } from "../../src/core/bidi.js";
|
||||
|
||||
describe("bidi", function () {
|
||||
it(
|
||||
"should mark text as LTR if there's only LTR-characters, " +
|
||||
"when the string is very short",
|
||||
function () {
|
||||
const str = "foo";
|
||||
const bidiText = bidi(str, -1, false);
|
||||
|
||||
expect(bidiText.str).toEqual("foo");
|
||||
expect(bidiText.dir).toEqual("ltr");
|
||||
}
|
||||
);
|
||||
|
||||
it("should mark text as LTR if there's only LTR-characters", function () {
|
||||
const str = "Lorem ipsum dolor sit amet, consectetur adipisicing elit.";
|
||||
const bidiText = bidi(str, -1, false);
|
||||
|
||||
expect(bidiText.str).toEqual(
|
||||
"Lorem ipsum dolor sit amet, consectetur adipisicing elit."
|
||||
);
|
||||
expect(bidiText.dir).toEqual("ltr");
|
||||
});
|
||||
|
||||
it("should mark text as RTL if more than 30% of text is RTL", function () {
|
||||
// 33% of test text are RTL characters
|
||||
const test = "\u0645\u0635\u0631 Egypt";
|
||||
|
@ -34,4 +56,16 @@ describe("bidi", function () {
|
|||
expect(bidiText.str).toEqual(result);
|
||||
expect(bidiText.dir).toEqual("ltr");
|
||||
});
|
||||
|
||||
it(
|
||||
"should mark text as RTL if less than 30% of text is RTL, " +
|
||||
"when the string is very short (issue 11656)",
|
||||
function () {
|
||||
const str = "()\u05d1("; // 25% of the string is RTL characters.
|
||||
const bidiText = bidi(str, -1, false);
|
||||
|
||||
expect(bidiText.str).toEqual("(\u05d1)(");
|
||||
expect(bidiText.dir).toEqual("rtl");
|
||||
}
|
||||
);
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue