mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-25 09:38:06 +02:00
CMaps binary packing
This commit is contained in:
parent
e5cd75083f
commit
69efd9cb96
13 changed files with 1156 additions and 35 deletions
378
src/core/cmap.js
378
src/core/cmap.js
|
@ -15,7 +15,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
/* globals Util, isString, isInt, warn, error, isCmd, isEOF, isName, Lexer,
|
||||
isStream, StringStream */
|
||||
isStream, StringStream, PDFJS, assert */
|
||||
|
||||
'use strict';
|
||||
|
||||
|
@ -275,6 +275,314 @@ var IdentityCMap = (function IdentityCMapClosure() {
|
|||
return IdentityCMap;
|
||||
})();
|
||||
|
||||
var BinaryCMapReader = (function BinaryCMapReaderClosure() {
|
||||
function fetchBinaryData(url) {
|
||||
var nonBinaryRequest = PDFJS.disableWorker;
|
||||
var request = new XMLHttpRequest();
|
||||
request.open('GET', url, false);
|
||||
if (!nonBinaryRequest) {
|
||||
try {
|
||||
request.responseType = 'arraybuffer';
|
||||
nonBinaryRequest = request.responseType !== 'arraybuffer';
|
||||
} catch (e) {
|
||||
nonBinaryRequest = true;
|
||||
}
|
||||
}
|
||||
if (nonBinaryRequest && request.overrideMimeType) {
|
||||
request.overrideMimeType('text/plain; charset=x-user-defined');
|
||||
}
|
||||
request.send(null);
|
||||
if (request.status === 0 && /^https?:/i.test(url)) {
|
||||
error('Unable to get binary cMap at: ' + url);
|
||||
}
|
||||
if (nonBinaryRequest) {
|
||||
var data = Array.prototype.map.call(request.responseText, function (ch) {
|
||||
return ch.charCodeAt(0) & 255;
|
||||
});
|
||||
return new Uint8Array(data);
|
||||
}
|
||||
return new Uint8Array(request.response);
|
||||
}
|
||||
|
||||
function hexToInt(a, size) {
|
||||
var n = 0;
|
||||
for (var i = 0; i <= size; i++) {
|
||||
n = (n << 8) | a[i];
|
||||
}
|
||||
return n >>> 0;
|
||||
}
|
||||
|
||||
function hexToStr(a, size) {
|
||||
return String.fromCharCode.apply(null, a.subarray(0, size + 1));
|
||||
}
|
||||
|
||||
function addHex(a, b, size) {
|
||||
var c = 0;
|
||||
for (var i = size; i >= 0; i--) {
|
||||
c += a[i] + b[i];
|
||||
a[i] = c & 255;
|
||||
c >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
function incHex(a, size) {
|
||||
var c = 1;
|
||||
for (var i = size; i >= 0 && c > 0; i--) {
|
||||
c += a[i];
|
||||
a[i] = c & 255;
|
||||
c >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
var MAX_NUM_SIZE = 16;
|
||||
var MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8)
|
||||
|
||||
function BinaryCMapStream(data) {
|
||||
this.buffer = data;
|
||||
this.pos = 0;
|
||||
this.end = data.length;
|
||||
this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE);
|
||||
}
|
||||
|
||||
BinaryCMapStream.prototype = {
|
||||
readByte: function () {
|
||||
if (this.pos >= this.end) {
|
||||
return -1;
|
||||
}
|
||||
return this.buffer[this.pos++];
|
||||
},
|
||||
readNumber: function () {
|
||||
var n = 0;
|
||||
var last;
|
||||
do {
|
||||
var b = this.readByte();
|
||||
if (b < 0) {
|
||||
error('unexpected EOF in bcmap');
|
||||
}
|
||||
last = !(b & 0x80);
|
||||
n = (n << 7) | (b & 0x7F);
|
||||
} while (!last);
|
||||
return n;
|
||||
},
|
||||
readSigned: function () {
|
||||
var n = this.readNumber();
|
||||
return (n & 1) ? ~(n >>> 1) : n >>> 1;
|
||||
},
|
||||
readHex: function (num, size) {
|
||||
num.set(this.buffer.subarray(this.pos,
|
||||
this.pos + size + 1));
|
||||
this.pos += size + 1;
|
||||
},
|
||||
readHexNumber: function (num, size) {
|
||||
var last;
|
||||
var stack = this.tmpBuf, sp = 0;
|
||||
do {
|
||||
var b = this.readByte();
|
||||
if (b < 0) {
|
||||
error('unexpected EOF in bcmap');
|
||||
}
|
||||
last = !(b & 0x80);
|
||||
stack[sp++] = b & 0x7F;
|
||||
} while (!last);
|
||||
var i = size, buffer = 0, bufferSize = 0;
|
||||
while (i >= 0) {
|
||||
while (bufferSize < 8 && stack.length > 0) {
|
||||
buffer = (stack[--sp] << bufferSize) | buffer;
|
||||
bufferSize += 7;
|
||||
}
|
||||
num[i] = buffer & 255;
|
||||
i--;
|
||||
buffer >>= 8;
|
||||
bufferSize -= 8;
|
||||
}
|
||||
},
|
||||
readHexSigned: function (num, size) {
|
||||
this.readHexNumber(num, size);
|
||||
var sign = num[size] & 1 ? 255 : 0;
|
||||
var c = 0;
|
||||
for (var i = 0; i <= size; i++) {
|
||||
c = ((c & 1) << 8) | num[i];
|
||||
num[i] = (c >> 1) ^ sign;
|
||||
}
|
||||
},
|
||||
readString: function () {
|
||||
var len = this.readNumber();
|
||||
var s = '';
|
||||
for (var i = 0; i < len; i++) {
|
||||
s += String.fromCharCode(this.readNumber());
|
||||
}
|
||||
return s;
|
||||
}
|
||||
};
|
||||
|
||||
function processBinaryCMap(url, cMap, extend) {
|
||||
var data = fetchBinaryData(url);
|
||||
var stream = new BinaryCMapStream(data);
|
||||
|
||||
var header = stream.readByte();
|
||||
cMap.vertical = !!(header & 1);
|
||||
|
||||
var useCMap = null;
|
||||
var start = new Uint8Array(MAX_NUM_SIZE);
|
||||
var end = new Uint8Array(MAX_NUM_SIZE);
|
||||
var char = new Uint8Array(MAX_NUM_SIZE);
|
||||
var charCode = new Uint8Array(MAX_NUM_SIZE);
|
||||
var tmp = new Uint8Array(MAX_NUM_SIZE);
|
||||
var code;
|
||||
|
||||
var b;
|
||||
while ((b = stream.readByte()) >= 0) {
|
||||
var type = b >> 5;
|
||||
if (type === 7) { // metadata, e.g. comment or usecmap
|
||||
switch (b & 0x1F) {
|
||||
case 0:
|
||||
stream.readString(); // skipping comment
|
||||
break;
|
||||
case 1:
|
||||
useCMap = stream.readString();
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
var sequence = !!(b & 0x10);
|
||||
var dataSize = b & 15;
|
||||
|
||||
assert(dataSize + 1 <= MAX_NUM_SIZE);
|
||||
|
||||
var ucs2DataSize = 1;
|
||||
var subitemsCount = stream.readNumber();
|
||||
switch (type) {
|
||||
case 0: // codespacerange
|
||||
stream.readHex(start, dataSize);
|
||||
stream.readHexNumber(end, dataSize);
|
||||
addHex(end, start, dataSize);
|
||||
cMap.addCodespaceRange(dataSize + 1, hexToInt(start, dataSize),
|
||||
hexToInt(end, dataSize));
|
||||
for (var i = 1; i < subitemsCount; i++) {
|
||||
incHex(end, dataSize);
|
||||
stream.readHexNumber(start, dataSize);
|
||||
addHex(start, end, dataSize);
|
||||
stream.readHexNumber(end, dataSize);
|
||||
addHex(end, start, dataSize);
|
||||
cMap.addCodespaceRange(dataSize + 1, hexToInt(start, dataSize),
|
||||
hexToInt(end, dataSize));
|
||||
}
|
||||
break;
|
||||
case 1: // notdefrange
|
||||
stream.readHex(start, dataSize);
|
||||
stream.readHexNumber(end, dataSize);
|
||||
addHex(end, start, dataSize);
|
||||
code = stream.readNumber();
|
||||
// undefined range, skipping
|
||||
for (var i = 1; i < subitemsCount; i++) {
|
||||
incHex(end, dataSize);
|
||||
stream.readHexNumber(start, dataSize);
|
||||
addHex(start, end, dataSize);
|
||||
stream.readHexNumber(end, dataSize);
|
||||
addHex(end, start, dataSize);
|
||||
code = stream.readNumber();
|
||||
// nop
|
||||
}
|
||||
break;
|
||||
case 2: // cidchar
|
||||
stream.readHex(char, dataSize);
|
||||
code = stream.readNumber();
|
||||
cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code));
|
||||
for (var i = 1; i < subitemsCount; i++) {
|
||||
incHex(char, dataSize);
|
||||
if (!sequence) {
|
||||
stream.readHexNumber(tmp, dataSize);
|
||||
addHex(char, tmp, dataSize);
|
||||
}
|
||||
code = stream.readSigned() + (code + 1);
|
||||
cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code));
|
||||
}
|
||||
break;
|
||||
case 3: // cidrange
|
||||
stream.readHex(start, dataSize);
|
||||
stream.readHexNumber(end, dataSize);
|
||||
addHex(end, start, dataSize);
|
||||
code = stream.readNumber();
|
||||
cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
|
||||
String.fromCharCode(code));
|
||||
for (var i = 1; i < subitemsCount; i++) {
|
||||
incHex(end, dataSize);
|
||||
if (!sequence) {
|
||||
stream.readHexNumber(start, dataSize);
|
||||
addHex(start, end, dataSize);
|
||||
} else {
|
||||
start.set(end);
|
||||
}
|
||||
stream.readHexNumber(end, dataSize);
|
||||
addHex(end, start, dataSize);
|
||||
code = stream.readNumber();
|
||||
cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
|
||||
String.fromCharCode(code));
|
||||
}
|
||||
break;
|
||||
case 4: // bfchar
|
||||
stream.readHex(char, ucs2DataSize);
|
||||
stream.readHex(charCode, dataSize);
|
||||
cMap.mapOne(hexToInt(char, ucs2DataSize),
|
||||
hexToStr(charCode, dataSize));
|
||||
for (var i = 1; i < subitemsCount; i++) {
|
||||
incHex(char, ucs2DataSize);
|
||||
if (!sequence) {
|
||||
stream.readHexNumber(tmp, ucs2DataSize);
|
||||
addHex(char, tmp, ucs2DataSize);
|
||||
}
|
||||
incHex(charCode, dataSize);
|
||||
stream.readHexSigned(tmp, dataSize);
|
||||
addHex(charCode, tmp, dataSize);
|
||||
cMap.mapOne(hexToInt(char, ucs2DataSize),
|
||||
hexToStr(charCode, dataSize));
|
||||
}
|
||||
break;
|
||||
case 5: // bfrange
|
||||
stream.readHex(start, ucs2DataSize);
|
||||
stream.readHexNumber(end, ucs2DataSize);
|
||||
addHex(end, start, ucs2DataSize);
|
||||
stream.readHex(charCode, dataSize);
|
||||
cMap.mapRange(hexToInt(start, ucs2DataSize),
|
||||
hexToInt(end, ucs2DataSize),
|
||||
hexToStr(charCode, dataSize));
|
||||
for (var i = 1; i < subitemsCount; i++) {
|
||||
incHex(end, ucs2DataSize);
|
||||
if (!sequence) {
|
||||
stream.readHexNumber(start, ucs2DataSize);
|
||||
addHex(start, end, ucs2DataSize);
|
||||
} else {
|
||||
start.set(end);
|
||||
}
|
||||
stream.readHexNumber(end, ucs2DataSize);
|
||||
addHex(end, start, ucs2DataSize);
|
||||
stream.readHex(charCode, dataSize);
|
||||
cMap.mapRange(hexToInt(start, ucs2DataSize),
|
||||
hexToInt(end, ucs2DataSize),
|
||||
hexToStr(charCode, dataSize));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
error('Unknown type: ' + type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (useCMap) {
|
||||
extend(useCMap);
|
||||
}
|
||||
return cMap;
|
||||
}
|
||||
|
||||
function BinaryCMapReader() {}
|
||||
|
||||
BinaryCMapReader.prototype = {
|
||||
read: processBinaryCMap
|
||||
};
|
||||
|
||||
return BinaryCMapReader;
|
||||
})();
|
||||
|
||||
var CMapFactory = (function CMapFactoryClosure() {
|
||||
function strToInt(str) {
|
||||
var a = 0;
|
||||
|
@ -417,7 +725,7 @@ var CMapFactory = (function CMapFactoryClosure() {
|
|||
}
|
||||
}
|
||||
|
||||
function parseCMap(cMap, lexer, builtInCMapUrl, useCMap) {
|
||||
function parseCMap(cMap, lexer, builtInCMapParams, useCMap) {
|
||||
var previous;
|
||||
var embededUseCMap;
|
||||
objLoop: while (true) {
|
||||
|
@ -463,28 +771,41 @@ var CMapFactory = (function CMapFactoryClosure() {
|
|||
useCMap = embededUseCMap;
|
||||
}
|
||||
if (useCMap) {
|
||||
cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapUrl);
|
||||
// If there aren't any code space ranges defined clone all the parent ones
|
||||
// into this cMap.
|
||||
if (cMap.numCodespaceRanges === 0) {
|
||||
var useCodespaceRanges = cMap.useCMap.codespaceRanges;
|
||||
for (var i = 0; i < useCodespaceRanges.length; i++) {
|
||||
cMap.codespaceRanges[i] = useCodespaceRanges[i].slice();
|
||||
}
|
||||
cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
|
||||
}
|
||||
// Merge the map into the current one, making sure not to override
|
||||
// any previously defined entries.
|
||||
for (var key in cMap.useCMap.map) {
|
||||
if (key in cMap.map) {
|
||||
continue;
|
||||
}
|
||||
cMap.map[key] = cMap.useCMap.map[key];
|
||||
}
|
||||
extendCMap(cMap, builtInCMapParams, useCMap);
|
||||
}
|
||||
}
|
||||
|
||||
function createBuiltInCMap(name, builtInCMapUrl) {
|
||||
function extendCMap(cMap, builtInCMapParams, useCMap) {
|
||||
cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapParams);
|
||||
// If there aren't any code space ranges defined clone all the parent ones
|
||||
// into this cMap.
|
||||
if (cMap.numCodespaceRanges === 0) {
|
||||
var useCodespaceRanges = cMap.useCMap.codespaceRanges;
|
||||
for (var i = 0; i < useCodespaceRanges.length; i++) {
|
||||
cMap.codespaceRanges[i] = useCodespaceRanges[i].slice();
|
||||
}
|
||||
cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
|
||||
}
|
||||
// Merge the map into the current one, making sure not to override
|
||||
// any previously defined entries.
|
||||
for (var key in cMap.useCMap.map) {
|
||||
if (key in cMap.map) {
|
||||
continue;
|
||||
}
|
||||
cMap.map[key] = cMap.useCMap.map[key];
|
||||
}
|
||||
}
|
||||
|
||||
function parseBinaryCMap(name, builtInCMapParams) {
|
||||
var url = builtInCMapParams.url + name + '.bcmap';
|
||||
var cMap = new CMap(true);
|
||||
new BinaryCMapReader().read(url, cMap, function (useCMap) {
|
||||
extendCMap(cMap, builtInCMapParams, useCMap);
|
||||
});
|
||||
return cMap;
|
||||
}
|
||||
|
||||
function createBuiltInCMap(name, builtInCMapParams) {
|
||||
if (name === 'Identity-H') {
|
||||
return new IdentityCMap(false, 2);
|
||||
} else if (name === 'Identity-V') {
|
||||
|
@ -493,9 +814,14 @@ var CMapFactory = (function CMapFactoryClosure() {
|
|||
if (BUILT_IN_CMAPS.indexOf(name) === -1) {
|
||||
error('Unknown cMap name: ' + name);
|
||||
}
|
||||
assert (builtInCMapParams, 'buildin cmap parameters are not provided');
|
||||
|
||||
if (builtInCMapParams.packed) {
|
||||
return parseBinaryCMap(name, builtInCMapParams);
|
||||
}
|
||||
|
||||
var request = new XMLHttpRequest();
|
||||
var url = builtInCMapUrl + name;
|
||||
var url = builtInCMapParams.url + name;
|
||||
request.open('GET', url, false);
|
||||
request.send(null);
|
||||
if (request.status === 0 && /^https?:/i.test(url)) {
|
||||
|
@ -503,19 +829,19 @@ var CMapFactory = (function CMapFactoryClosure() {
|
|||
}
|
||||
var cMap = new CMap(true);
|
||||
var lexer = new Lexer(new StringStream(request.responseText));
|
||||
parseCMap(cMap, lexer, builtInCMapUrl, null);
|
||||
parseCMap(cMap, lexer, builtInCMapParams, null);
|
||||
return cMap;
|
||||
}
|
||||
|
||||
return {
|
||||
create: function (encoding, builtInCMapUrl, useCMap) {
|
||||
create: function (encoding, builtInCMapParams, useCMap) {
|
||||
if (isName(encoding)) {
|
||||
return createBuiltInCMap(encoding.name, builtInCMapUrl);
|
||||
return createBuiltInCMap(encoding.name, builtInCMapParams);
|
||||
} else if (isStream(encoding)) {
|
||||
var cMap = new CMap();
|
||||
var lexer = new Lexer(encoding);
|
||||
try {
|
||||
parseCMap(cMap, lexer, builtInCMapUrl, useCMap);
|
||||
parseCMap(cMap, lexer, builtInCMapParams, useCMap);
|
||||
} catch (e) {
|
||||
warn('Invalid CMap data. ' + e);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue