From 028151d13a0a56fc6a5e53b5bc257523e393dc38 Mon Sep 17 00:00:00 2001 From: Brendan Dahl Date: Wed, 27 Mar 2013 17:15:44 -0700 Subject: [PATCH] Restructure/rewrite of the Type1 font parser. --- src/fonts.js | 477 ++++++++++++++++++++--------------------- test/unit/font_spec.js | 93 +++++++- 2 files changed, 325 insertions(+), 245 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index 07fccd9d2..893a34fef 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -17,7 +17,7 @@ /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset, ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode, info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream, - stringToBytes, TextDecoder, TODO, warn */ + stringToBytes, TextDecoder, TODO, warn, Lexer */ 'use strict'; @@ -5057,8 +5057,11 @@ var Type1CharString = (function Type1CharStringClosure() { * Type1Parser encapsulate the needed code for parsing a Type1 font * program. Some of its logic depends on the Type2 charstrings * structure. + * Note: this doesn't really parse the font since that would require evaluation + * of PostScript, but it is possible in most cases to extract what we need + * without a full parse. */ -var Type1Parser = function type1Parser() { +var Type1Parser = (function Type1ParserClosure() { /* * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence * of Plaintext Bytes. The function took a key as a parameter which can be @@ -5081,271 +5084,258 @@ var Type1Parser = function type1Parser() { return decryptedString.slice(discardNumber); } - /* - * Returns an object containing a Subrs array and a CharStrings - * array extracted from and eexec encrypted block of data - */ - function readNumberArray(str, index) { - var start = index; - while (str[index++] != '[') - start++; - start++; - - var count = 0; - while (str[index++] != ']') - count++; - - str = str.substr(start, count); - - str = str.trim(); - // Remove adjacent spaces - str = str.replace(/\s+/g, ' '); - - var array = str.split(' '); - for (var i = 0, ii = array.length; i < ii; i++) - array[i] = parseFloat(array[i] || 0); - return array; + function isSpecial(c) { + return c === '/' || + c === '[' || c === ']' || + c === '{' || c === '}' || + c === '(' || c === ')'; } - function readNumber(str, index) { - while (str[index] == ' ') - index++; - - var start = index; - - var count = 0; - while (str[index++] != ' ') - count++; - - return parseFloat(str.substr(start, count) || 0); - } - - function readBoolean(str, index) { - while (str[index] == ' ') - index++; - - var start = index; - - var count = 0; - var length = str.length; - while (index < length && str[index++] != ' ') { - count++; + function Type1Parser(stream, encrypted) { + if (encrypted) { + stream = new Stream(decrypt(stream.getBytes(), EEXEC_ENCRYPT_KEY, 4)); } - - // Use 1 and 0 since that's what type2 charstrings use. - return str.substr(start, count) === 'true' ? 1 : 0; + this.stream = stream; } - - function isSeparator(c) { - return c == ' ' || c == '\n' || c == '\x0d'; - } - - this.extractFontProgram = function Type1Parser_extractFontProgram(stream) { - var eexec = decrypt(stream, EEXEC_ENCRYPT_KEY, 4); - var eexecStr = ''; - for (var i = 0, ii = eexec.length; i < ii; i++) - eexecStr += String.fromCharCode(eexec[i]); - - var glyphsSection = false, subrsSection = false; - var subrs = [], charstrings = []; - var program = { - subrs: [], - charstrings: [], - properties: { - 'privateData': { - 'lenIV': 4 + Type1Parser.prototype = { + readNumberArray: function Type1Parser_readNumberArray() { + this.getToken(); // read '[' or '{' (arrays can start with either) + var array = []; + while (true) { + var token = this.getToken(); + if (token === null || token === ']' || token === '}') { + break; } + array.push(parseFloat(token || 0)); } - }; + return array; + }, - var glyph = ''; - var token = ''; - var length = 0; + readNumber: function Type1Parser_readNumber() { + var token = this.getToken(); + return parseFloat(token || 0); + }, - var c = ''; - var count = eexecStr.length; - for (var i = 0; i < count; i++) { - var getToken = function getToken() { - while (i < count && isSeparator(eexecStr[i])) - ++i; + readInt: function Type1Parser_readInt() { + // Use '| 0' to prevent setting a double into length such as the double + // does not flow into the loop variable. + var token = this.getToken(); + return parseInt(token || 0, 10) | 0; + }, - var token = ''; - while (i < count && !isSeparator(eexecStr[i])) - token += eexecStr[i++]; + readBoolean: function Type1Parser_readBoolean() { + var token = this.getToken(); - return token; - }; - var c = eexecStr[i]; + // Use 1 and 0 since that's what type2 charstrings use. + return token === 'true' ? 1 : 0; + }, - if ((glyphsSection || subrsSection) && - (token == 'RD' || token == '-|')) { - i++; - var data = eexec.slice(i, i + length); - var lenIV = program.properties.privateData['lenIV']; - var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV); + getToken: function Type1Parser_getToken() { + // Eat whitespace and comments. + var comment = false; + var ch; + var stream = this.stream; + while (true) { + if ((ch = stream.lookChar()) === null) + return null; - if (glyphsSection) { - charstrings.push({ - glyph: glyph, - encoded: encoded - }); - } else { - subrs.push(encoded); - } - i += length; - token = ''; - } else if (isSeparator(c)) { - // Use '| 0' to prevent setting a double into length such as the double - // does not flow into the loop variable. - length = parseInt(token, 10) | 0; - token = ''; - } else { - token += c; - if (!glyphsSection) { - switch (token) { - case '/CharString': - glyphsSection = true; - break; - case '/Subrs': - ++i; - var num = parseInt(getToken(), 10); - getToken(); // read in 'array' - for (var j = 0; j < num; ++j) { - var t = getToken(); // read in 'dup' - if (t == 'ND' || t == '|-' || t == 'noaccess') - break; - var index = parseInt(getToken(), 10); - if (index > j) - j = index; - var length = parseInt(getToken(), 10); - getToken(); // read in 'RD' - var data = eexec.slice(i + 1, i + 1 + length); - var lenIV = program.properties.privateData['lenIV']; - var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV); - i = i + 1 + length; - t = getToken(); // read in 'NP' - if (t == 'noaccess') - getToken(); // read in 'put' - subrs[index] = encoded; - } - break; - case '/BlueValues': - case '/OtherBlues': - case '/FamilyBlues': - case '/FamilyOtherBlues': - var blueArray = readNumberArray(eexecStr, i + 1); - // *Blue* values may contain invalid data: disables reading of - // those values when hinting is disabled. - if (blueArray.length > 0 && (blueArray.length % 2) === 0 && - HINTING_ENABLED) { - program.properties.privateData[token.substring(1)] = blueArray; - } - break; - case '/StemSnapH': - case '/StemSnapV': - program.properties.privateData[token.substring(1)] = - readNumberArray(eexecStr, i + 1); - break; - case '/StdHW': - case '/StdVW': - program.properties.privateData[token.substring(1)] = - readNumberArray(eexecStr, i + 1)[0]; - break; - case '/BlueShift': - case '/lenIV': - case '/BlueFuzz': - case '/BlueScale': - case '/LanguageGroup': - case '/ExpansionFactor': - program.properties.privateData[token.substring(1)] = - readNumber(eexecStr, i + 1); - break; - case '/ForceBold': - program.properties.privateData[token.substring(1)] = - readBoolean(eexecStr, i + 1); - break; + if (comment) { + if (ch === '\x0a' || ch === '\x0d') { + comment = false; } - } else if (c == '/') { - token = glyph = ''; - while ((c = eexecStr[++i]) != ' ') - glyph += c; + } else if (ch === '%') { + comment = true; + } else if (!Lexer.isSpace(ch)) { + break; } + stream.skip(); } - } - - for (var i = 0; i < charstrings.length; i++) { - var glyph = charstrings[i].glyph; - var encoded = charstrings[i].encoded; - var charString = new Type1CharString(); - var error = charString.convert(encoded, subrs); - var output = charString.output; - if (error) { - // It seems when FreeType encounters an error while evaluating a glyph - // that it completely ignores the glyph so we'll mimic that behaviour - // here and put an endchar to make the validator happy. - output = [14]; + if (isSpecial(ch)) { + stream.skip(); + return ch; } - program.charstrings.push({ - glyph: glyph, - data: output, - seac: charString.seac, - lsb: charString.lsb, - width: charString.width - }); - } + var token = ''; + do { + token += ch; + stream.skip(); + ch = stream.lookChar(); + } while (ch !== null && !Lexer.isSpace(ch) && !isSpecial(ch)); + return token; + }, - return program; - }; + /* + * Returns an object containing a Subrs array and a CharStrings + * array extracted from and eexec encrypted block of data + */ + extractFontProgram: function Type1Parser_extractFontProgram() { + var stream = this.stream; - this.extractFontHeader = function Type1Parser_extractFontHeader(stream, - properties) { - var headerString = ''; - for (var i = 0, ii = stream.length; i < ii; i++) - headerString += String.fromCharCode(stream[i]); - - var token = ''; - var count = headerString.length; - for (var i = 0; i < count; i++) { - var getToken = function getToken() { - var character = headerString[i]; - while (i < count && (isSeparator(character) || character == '/')) - character = headerString[++i]; - - var token = ''; - while (i < count && !(isSeparator(character) || character == '/')) { - token += character; - character = headerString[++i]; + var subrs = [], charstrings = []; + var program = { + subrs: [], + charstrings: [], + properties: { + 'privateData': { + 'lenIV': 4 + } } - - return token; }; - - var c = headerString[i]; - if (isSeparator(c)) { + var token; + while ((token = this.getToken()) !== null) { + if (token !== '/') { + continue; + } + token = this.getToken(); switch (token) { - case '/FontMatrix': - var matrix = readNumberArray(headerString, i + 1); + case 'CharStrings': + // The number immediately following CharStrings must be greater or + // equal to the number of CharStrings. + this.getToken(); + this.getToken(); // read in 'dict' + this.getToken(); // read in 'dup' + this.getToken(); // read in 'begin' + while(true) { + token = this.getToken(); + if (token === null || token === 'end') { + break; + } + + if (token !== '/') { + continue; + } + var glyph = this.getToken(); + var length = this.readInt(); + this.getToken(); // read in 'RD' or '-|' + var data = stream.makeSubStream(stream.pos + 1, length); + var lenIV = program.properties.privateData['lenIV']; + var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY, + lenIV); + // Skip past the required space and binary data. + stream.skip(1 + length); + token = this.getToken(); // read in 'ND' or '|-' + if (token === 'noaccess') { + this.getToken(); // read in 'def' + } + charstrings.push({ + glyph: glyph, + encoded: encoded + }); + } + break; + case 'Subrs': + var num = this.readInt(); + this.getToken(); // read in 'array' + for (var j = 0; j < num; ++j) { + token = this.getToken(); // read in 'dup' + var index = this.readInt(); + if (index > j) + j = index; + var length = this.readInt(); + this.getToken(); // read in 'RD' or '-|' + var data = stream.makeSubStream(stream.pos + 1, length); + var lenIV = program.properties.privateData['lenIV']; + var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY, + lenIV); + // Skip past the required space and binary data. + stream.skip(1 + length); + token = this.getToken(); // read in 'NP' or '|' + if (token === 'noaccess') { + this.getToken(); // read in 'put' + } + subrs[index] = encoded; + } + break; + case 'BlueValues': + case 'OtherBlues': + case 'FamilyBlues': + case 'FamilyOtherBlues': + var blueArray = this.readNumberArray(); + // *Blue* values may contain invalid data: disables reading of + // those values when hinting is disabled. + if (blueArray.length > 0 && (blueArray.length % 2) === 0 && + HINTING_ENABLED) { + program.properties.privateData[token] = blueArray; + } + break; + case 'StemSnapH': + case 'StemSnapV': + program.properties.privateData[token] = this.readNumberArray(); + break; + case 'StdHW': + case 'StdVW': + program.properties.privateData[token] = + this.readNumberArray()[0]; + break; + case 'BlueShift': + case 'lenIV': + case 'BlueFuzz': + case 'BlueScale': + case 'LanguageGroup': + case 'ExpansionFactor': + program.properties.privateData[token] = this.readNumber(); + break; + case 'ForceBold': + program.properties.privateData[token] = this.readBoolean(); + break; + } + } + + for (var i = 0; i < charstrings.length; i++) { + var glyph = charstrings[i].glyph; + var encoded = charstrings[i].encoded; + var charString = new Type1CharString(); + var error = charString.convert(encoded, subrs); + var output = charString.output; + if (error) { + // It seems when FreeType encounters an error while evaluating a glyph + // that it completely ignores the glyph so we'll mimic that behaviour + // here and put an endchar to make the validator happy. + output = [14]; + } + program.charstrings.push({ + glyph: glyph, + data: output, + seac: charString.seac, + lsb: charString.lsb, + width: charString.width + }); + } + + return program; + }, + + extractFontHeader: function Type1Parser_extractFontHeader(properties) { + var token; + while ((token = this.getToken()) !== null) { + if (token !== '/') { + continue; + } + token = this.getToken(); + switch (token) { + case 'FontMatrix': + var matrix = this.readNumberArray(); properties.fontMatrix = matrix; break; - case '/Encoding': - var encodingArg = getToken(); + case 'Encoding': + var encodingArg = this.getToken(); var encoding; if (!/^\d+$/.test(encodingArg)) { // encoding name is specified encoding = Encodings[encodingArg]; } else { encoding = []; - var size = parseInt(encodingArg, 10); - getToken(); // read in 'array' + var size = parseInt(encodingArg, 10) | 0; + this.getToken(); // read in 'array' for (var j = 0; j < size; j++) { - var token = getToken(); - if (token == 'dup') { - var index = parseInt(getToken(), 10); - var glyph = getToken(); + var token = this.getToken(); + if (token === 'dup') { + var index = this.readInt(); + this.getToken(); // read in '/' + var glyph = this.getToken(); encoding[index] = glyph; - getToken(); // read the in 'put' + this.getToken(); // read the in 'put' } } } @@ -5355,13 +5345,12 @@ var Type1Parser = function type1Parser() { } break; } - token = ''; - } else { - token += c; } } }; -}; + + return Type1Parser; +})(); /** * The CFF class takes a Type1 file and wrap it into a @@ -5435,17 +5424,17 @@ var CFFStandardStrings = [ 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman', 'Semibold' ]; -var type1Parser = new Type1Parser(); - // Type1Font is also a CIDFontType0. var Type1Font = function Type1Font(name, file, properties) { // Get the data block containing glyphs and subrs informations - var headerBlock = file.getBytes(properties.length1); - type1Parser.extractFontHeader(headerBlock, properties); + var headerBlock = new Stream(file.getBytes(properties.length1)); + var headerBlockParser = new Type1Parser(headerBlock); + headerBlockParser.extractFontHeader(properties); // Decrypt the data blocks and retrieve it's content - var eexecBlock = file.getBytes(properties.length2); - var data = type1Parser.extractFontProgram(eexecBlock); + var eexecBlock = new Stream(file.getBytes(properties.length2)); + var eexecBlockParser = new Type1Parser(eexecBlock, true); + var data = eexecBlockParser.extractFontProgram(); for (var info in data.properties) properties[info] = data.properties[info]; diff --git a/test/unit/font_spec.js b/test/unit/font_spec.js index 02a663936..01e1f3dec 100644 --- a/test/unit/font_spec.js +++ b/test/unit/font_spec.js @@ -1,7 +1,7 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ /* globals expect, it, describe, CFFCompiler, CFFParser, CFFIndex, CFFStrings, - SEAC_ANALYSIS_ENABLED:true */ + SEAC_ANALYSIS_ENABLED:true, Type1Parser, StringStream */ 'use strict'; @@ -297,4 +297,95 @@ describe('font', function() { }); // TODO a lot more compiler tests }); + + describe('Type1Parser', function() { + + it('splits tokens', function() { + var stream = new StringStream('/BlueValues[-17 0]noaccess def'); + var parser = new Type1Parser(stream); + expect(parser.getToken()).toEqual('/'); + expect(parser.getToken()).toEqual('BlueValues'); + expect(parser.getToken()).toEqual('['); + expect(parser.getToken()).toEqual('-17'); + expect(parser.getToken()).toEqual('0'); + expect(parser.getToken()).toEqual(']'); + expect(parser.getToken()).toEqual('noaccess'); + expect(parser.getToken()).toEqual('def'); + expect(parser.getToken()).toEqual(null); + }); + it('handles glued tokens', function() { + var stream = new StringStream('dup/CharStrings'); + var parser = new Type1Parser(stream); + expect(parser.getToken()).toEqual('dup'); + expect(parser.getToken()).toEqual('/'); + expect(parser.getToken()).toEqual('CharStrings'); + }); + it('ignores whitespace', function() { + var stream = new StringStream('\nab c\t'); + var parser = new Type1Parser(stream); + expect(parser.getToken()).toEqual('ab'); + expect(parser.getToken()).toEqual('c'); + }); + it('parses numbers', function() { + var stream = new StringStream('123'); + var parser = new Type1Parser(stream); + expect(parser.readNumber()).toEqual(123); + }); + it('parses booleans', function() { + var stream = new StringStream('true false'); + var parser = new Type1Parser(stream); + expect(parser.readBoolean()).toEqual(1); + expect(parser.readBoolean()).toEqual(0); + }); + it('parses number arrays', function() { + var stream = new StringStream('[1 2]'); + var parser = new Type1Parser(stream); + expect(parser.readNumberArray()).toEqual([1, 2]); + // Variation on spacing. + var stream = new StringStream('[ 1 2 ]'); + parser = new Type1Parser(stream); + expect(parser.readNumberArray()).toEqual([1, 2]); + }); + it('skips comments', function() { + var stream = new StringStream( + '%!PS-AdobeFont-1.0: CMSY10 003.002\n' + + '%%Title: CMSY10\n' + + '%Version: 003.002\n' + + 'FontDirectory'); + var parser = new Type1Parser(stream); + expect(parser.getToken()).toEqual('FontDirectory'); + }); + it('parses font program', function() { + var stream = new StringStream( + '/ExpansionFactor 99\n' + + '/Subrs 1 array\n' + + 'dup 0 1 RD x noaccess put\n'+ + '/CharStrings 46 dict dup begin\n' + + '/.notdef 1 RD x ND' + '\n' + + 'end'); + var parser = new Type1Parser(stream); + var program = parser.extractFontProgram(); + expect(program.charstrings.length).toEqual(1); + expect(program.properties.privateData.ExpansionFactor).toEqual(99); + }); + it('parses font header font matrix', function() { + var stream = new StringStream( + '/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def\n'); + var parser = new Type1Parser(stream); + var props = {}; + var program = parser.extractFontHeader(props); + expect(props.fontMatrix).toEqual([0.001, 0, 0, 0.001, 0, 0]); + }); + it('parses font header encoding', function() { + var stream = new StringStream( + '/Encoding 256 array\n' + + '0 1 255 {1 index exch /.notdef put} for\n' + + 'dup 33 /arrowright put\n' + + 'readonly def\n'); + var parser = new Type1Parser(stream); + var props = {}; + var program = parser.extractFontHeader(props); + expect(props.baseEncoding[33]).toEqual('arrowright'); + }); + }); });