mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-24 09:08:07 +02:00
[text selection] Add the whitespaces present in the pdf in the text chunk
- it aims to fix issue #14627; - the basic idea of the recent text refactoring was to only consider the rendered visible whitespaces. But sometimes, the heuristics aren't correct and although some whitespaces are in the text stream they weren't in the text chunks because they were too small. Hence we added some exceptions, for example, we always add a whitespace when it is between two non-whitespace chars but only when in the same Tj. So basically, this patch removes the constraint to have the chars in the same Tj (in using a circular buffer to save the two last chars) but don't add a space when the visible space is really too small (hence `NOT_A_SPACE_FACTOR`).
This commit is contained in:
parent
db4f3adc5e
commit
18e79e3c0b
6 changed files with 94 additions and 23 deletions
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
|
@ -515,3 +515,4 @@
|
|||
!issue14497.pdf
|
||||
!issue14502.pdf
|
||||
!issue13211.pdf
|
||||
!issue14627.pdf
|
||||
|
|
BIN
test/pdfs/issue14627.pdf
Executable file
BIN
test/pdfs/issue14627.pdf
Executable file
Binary file not shown.
|
@ -6329,5 +6329,11 @@
|
|||
"md5": "d193853e8a123dc50eeea593a4150b60",
|
||||
"rounds": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue14627",
|
||||
"file": "pdfs/issue14627.pdf",
|
||||
"md5": "5d1bfcc3b3130bfa7e33e43990e2213a",
|
||||
"rounds": 1,
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
|
|
|
@ -1999,7 +1999,7 @@ describe("api", function () {
|
|||
const data = await Promise.all([defaultPromise, parametersPromise]);
|
||||
|
||||
expect(!!data[0].items).toEqual(true);
|
||||
expect(data[0].items.length).toEqual(11);
|
||||
expect(data[0].items.length).toEqual(15);
|
||||
expect(!!data[0].styles).toEqual(true);
|
||||
|
||||
const page1 = mergeText(data[0].items);
|
||||
|
|
|
@ -579,14 +579,14 @@ describe("pdf_find_controller", function () {
|
|||
},
|
||||
pageMatches: [
|
||||
[
|
||||
299, 337, 414, 476, 623, 797, 978, 984, 1010, 1058, 1079, 1144, 1152,
|
||||
1274, 1343, 1391, 1399, 1421, 1497, 1521, 1527, 1684, 1774, 1786,
|
||||
1857, 1879, 1909, 1946, 2064, 2074, 2161, 2178, 2213, 2227, 2272,
|
||||
2322, 2359, 2401, 2412, 2423, 2462, 2532, 2538, 2553, 2562, 2576,
|
||||
2602, 2613, 2638, 2668, 2792, 2805, 2836, 2848, 2859, 2896, 2902,
|
||||
2916, 2940, 2960, 3091, 3239, 3249, 3339, 3387, 3394, 3468, 3477,
|
||||
3485, 3502, 3690, 3696, 3711, 3758, 3789, 3865, 3977, 4052, 4058,
|
||||
4071,
|
||||
302, 340, 418, 481, 628, 802, 983, 989, 1015, 1063, 1084, 1149, 1157,
|
||||
1278, 1346, 1394, 1402, 1424, 1500, 1524, 1530, 1686, 1776, 1788,
|
||||
1859, 1881, 1911, 1948, 2066, 2076, 2163, 2180, 2215, 2229, 2274,
|
||||
2324, 2360, 2402, 2413, 2424, 2463, 2532, 2538, 2553, 2562, 2576,
|
||||
2602, 2613, 2638, 2668, 2792, 2805, 2836, 2847, 2858, 2895, 2901,
|
||||
2915, 2939, 2959, 3089, 3236, 3246, 3336, 3384, 3391, 3465, 3474,
|
||||
3482, 3499, 3687, 3693, 3708, 3755, 3786, 3862, 3974, 4049, 4055,
|
||||
4068,
|
||||
],
|
||||
],
|
||||
pageMatchesLength: [
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue