mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-25 09:38:06 +02:00
Represent cid chars using integers, not strings.
cid chars are 16-bit unsigned integers. Currently we convert them to single-char strings when inserting them into the CMap, and then convert them back to integers when extracting them from the CMap. This patch changes CMap so that cid chars stay in integer format throughout, saving both time and space. When loading the PDF from issue #4580, this change reduces peak RSS from ~600 to ~370 MiB. It also improves overall speed on that PDF by ~26%, going from 724 ms to 533 ms.
This commit is contained in:
parent
ad2ea78280
commit
adf58ed687
3 changed files with 46 additions and 37 deletions
|
@ -199,6 +199,10 @@ var CMap = (function CMapClosure() {
|
|||
// where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...]
|
||||
this.codespaceRanges = [[], [], [], []];
|
||||
this.numCodespaceRanges = 0;
|
||||
// Map entries have one of two forms.
|
||||
// - cid chars are 16-bit unsigned integers, stored as integers.
|
||||
// - bf chars are variable-length byte sequences, stored as strings, with
|
||||
// one byte per character.
|
||||
this._map = [];
|
||||
this.vertical = false;
|
||||
this.useCMap = null;
|
||||
|
@ -210,18 +214,23 @@ var CMap = (function CMapClosure() {
|
|||
this.numCodespaceRanges++;
|
||||
},
|
||||
|
||||
mapRange: function(low, high, dstLow) {
|
||||
var lastByte = dstLow.length - 1;
|
||||
mapCidRange: function(low, high, dstLow) {
|
||||
while (low <= high) {
|
||||
this._map[low] = dstLow;
|
||||
// Only the last byte has to be incremented.
|
||||
dstLow = dstLow.substr(0, lastByte) +
|
||||
String.fromCharCode(dstLow.charCodeAt(lastByte) + 1);
|
||||
++low;
|
||||
this._map[low++] = dstLow++;
|
||||
}
|
||||
},
|
||||
|
||||
mapRangeToArray: function(low, high, array) {
|
||||
mapBfRange: function(low, high, dstLow) {
|
||||
var lastByte = dstLow.length - 1;
|
||||
while (low <= high) {
|
||||
this._map[low++] = dstLow;
|
||||
// Only the last byte has to be incremented.
|
||||
dstLow = dstLow.substr(0, lastByte) +
|
||||
String.fromCharCode(dstLow.charCodeAt(lastByte) + 1);
|
||||
}
|
||||
},
|
||||
|
||||
mapBfRangeToArray: function(low, high, array) {
|
||||
var i = 0, ii = array.length;
|
||||
while (low <= high && i < ii) {
|
||||
this._map[low] = array[i++];
|
||||
|
@ -229,6 +238,7 @@ var CMap = (function CMapClosure() {
|
|||
}
|
||||
},
|
||||
|
||||
// This is used for both bf and cid chars.
|
||||
mapOne: function(src, dst) {
|
||||
this._map[src] = dst;
|
||||
},
|
||||
|
@ -302,7 +312,7 @@ var IdentityCMap = (function IdentityCMapClosure() {
|
|||
CMap.call(this);
|
||||
this.vertical = vertical;
|
||||
this.addCodespaceRange(n, 0, 0xffff);
|
||||
this.mapRange(0, 0xffff, '\u0000');
|
||||
this.mapCidRange(0, 0xffff, 0);
|
||||
}
|
||||
Util.inherit(IdentityCMap, CMap, {});
|
||||
|
||||
|
@ -522,7 +532,7 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
|
|||
case 2: // cidchar
|
||||
stream.readHex(char, dataSize);
|
||||
code = stream.readNumber();
|
||||
cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code));
|
||||
cMap.mapOne(hexToInt(char, dataSize), code);
|
||||
for (i = 1; i < subitemsCount; i++) {
|
||||
incHex(char, dataSize);
|
||||
if (!sequence) {
|
||||
|
@ -530,7 +540,7 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
|
|||
addHex(char, tmp, dataSize);
|
||||
}
|
||||
code = stream.readSigned() + (code + 1);
|
||||
cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code));
|
||||
cMap.mapOne(hexToInt(char, dataSize), code);
|
||||
}
|
||||
break;
|
||||
case 3: // cidrange
|
||||
|
@ -538,8 +548,8 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
|
|||
stream.readHexNumber(end, dataSize);
|
||||
addHex(end, start, dataSize);
|
||||
code = stream.readNumber();
|
||||
cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
|
||||
String.fromCharCode(code));
|
||||
cMap.mapCidRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
|
||||
code);
|
||||
for (i = 1; i < subitemsCount; i++) {
|
||||
incHex(end, dataSize);
|
||||
if (!sequence) {
|
||||
|
@ -551,8 +561,8 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
|
|||
stream.readHexNumber(end, dataSize);
|
||||
addHex(end, start, dataSize);
|
||||
code = stream.readNumber();
|
||||
cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
|
||||
String.fromCharCode(code));
|
||||
cMap.mapCidRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
|
||||
code);
|
||||
}
|
||||
break;
|
||||
case 4: // bfchar
|
||||
|
@ -578,9 +588,9 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
|
|||
stream.readHexNumber(end, ucs2DataSize);
|
||||
addHex(end, start, ucs2DataSize);
|
||||
stream.readHex(charCode, dataSize);
|
||||
cMap.mapRange(hexToInt(start, ucs2DataSize),
|
||||
hexToInt(end, ucs2DataSize),
|
||||
hexToStr(charCode, dataSize));
|
||||
cMap.mapBfRange(hexToInt(start, ucs2DataSize),
|
||||
hexToInt(end, ucs2DataSize),
|
||||
hexToStr(charCode, dataSize));
|
||||
for (i = 1; i < subitemsCount; i++) {
|
||||
incHex(end, ucs2DataSize);
|
||||
if (!sequence) {
|
||||
|
@ -592,9 +602,9 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
|
|||
stream.readHexNumber(end, ucs2DataSize);
|
||||
addHex(end, start, ucs2DataSize);
|
||||
stream.readHex(charCode, dataSize);
|
||||
cMap.mapRange(hexToInt(start, ucs2DataSize),
|
||||
hexToInt(end, ucs2DataSize),
|
||||
hexToStr(charCode, dataSize));
|
||||
cMap.mapBfRange(hexToInt(start, ucs2DataSize),
|
||||
hexToInt(end, ucs2DataSize),
|
||||
hexToStr(charCode, dataSize));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -675,7 +685,7 @@ var CMapFactory = (function CMapFactoryClosure() {
|
|||
obj = lexer.getObj();
|
||||
if (isInt(obj) || isString(obj)) {
|
||||
var dstLow = isInt(obj) ? String.fromCharCode(obj) : obj;
|
||||
cMap.mapRange(low, high, dstLow);
|
||||
cMap.mapBfRange(low, high, dstLow);
|
||||
} else if (isCmd(obj, '[')) {
|
||||
obj = lexer.getObj();
|
||||
var array = [];
|
||||
|
@ -683,7 +693,7 @@ var CMapFactory = (function CMapFactoryClosure() {
|
|||
array.push(obj);
|
||||
obj = lexer.getObj();
|
||||
}
|
||||
cMap.mapRangeToArray(low, high, array);
|
||||
cMap.mapBfRangeToArray(low, high, array);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -704,7 +714,7 @@ var CMapFactory = (function CMapFactoryClosure() {
|
|||
var src = strToInt(obj);
|
||||
obj = lexer.getObj();
|
||||
expectInt(obj);
|
||||
var dst = String.fromCharCode(obj);
|
||||
var dst = obj;
|
||||
cMap.mapOne(src, dst);
|
||||
}
|
||||
}
|
||||
|
@ -725,8 +735,8 @@ var CMapFactory = (function CMapFactoryClosure() {
|
|||
var high = strToInt(obj);
|
||||
obj = lexer.getObj();
|
||||
expectInt(obj);
|
||||
var dstLow = String.fromCharCode(obj);
|
||||
cMap.mapRange(low, high, dstLow);
|
||||
var dstLow = obj;
|
||||
cMap.mapCidRange(low, high, dstLow);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue