From eb6f7499dc8c556ef3f8f4e40d346f6bf194953a Mon Sep 17 00:00:00 2001
From: Vivien Nicolas <21@vingtetun.org>
Date: Fri, 17 Jun 2011 06:02:29 +0200
Subject: [PATCH] Rework some code to enhance performance (CIIM6/CIIM9 are
 rejected by the sanitizer now)

---
 PDFFont.js      | 338 ++++++++++++++++++++----------------------------
 PDFFontUtils.js |   3 +-
 2 files changed, 142 insertions(+), 199 deletions(-)

diff --git a/PDFFont.js b/PDFFont.js
index 4278ae29a..1bf437066 100644
--- a/PDFFont.js
+++ b/PDFFont.js
@@ -546,7 +546,14 @@ Font.prototype = {
  * to fonts in particular and needs to be share between them.
  */
 var FontsUtils = {
+  _bytesArray: new Uint8Array(4),
   integerToBytes: function fu_integerToBytes(aValue, aBytesCount) {
+    // If we want only one byte, take a fast path
+    if (aBytesCount == 1) {
+      this._bytesArray.set([aValue]);
+      return this._bytesArray[0];
+    }
+
     var bytes = [];
     for (var i = 0; i < aBytesCount; i++)
       bytes[i] = 0x00;
@@ -1087,61 +1094,6 @@ var Type1Parser = function() {
     return charString;
   };
 
-  /*
-   * The operand stack holds arbitrary PostScript objects that are the operands
-   * and results of PostScript operators being executed. The interpreter pushes
-   * objects on the operand stack when it encounters them as literal data in a
-   * program being executed. When an operator requires one or more operands, it
-   * obtains them by popping them off the top of the operand stack. When an
-   * operator returns one or more results, it does so by pushing them on the
-   * operand stack.
-   */
-   var operandStack = new Stack(40);
-
-   // Flag indicating if the topmost operand of the operandStack is an array
-   var operandIsArray = 0;
-
-  /*
-   * The dictionary stack holds only dictionary objects. The current set of
-   * dictionaries on the dictionary stack defines the environment for all
-   * implicit name searches, such as those that occur when the interpreter
-   * encounters an executable name. The role of the dictionary stack is
-   * introduced in Section 3.3, “Data Types and Objects,” and is further
-   * explained in Section 3.5, “Execution.” of the PostScript Language
-   * Reference.
-   */
-  var systemDict = new Dict(),
-      globalDict = new Dict(),
-      userDict   = new Dict();
-
-  var dictionaryStack = new Stack();
-  dictionaryStack.push(systemDict);
-  dictionaryStack.push(globalDict);
-  dictionaryStack.push(userDict);
-
-  /*
-   * The execution stack holds executable objects (mainly procedures and files)
-   * that are in intermediate stages of execution. At any point in the
-   * execution of a PostScript program, this stack represents the program’s
-   * call stack. Whenever the interpreter suspends execution of an object to
-   * execute some other object, it pushes the new object on the execution
-   * stack. When the interpreter finishes executing an object, it pops that
-   * object off the execution stack and resumes executing the suspended object
-   * beneath it.
-   */
-  var executionStack = new Stack();
-
-  /*
-   * Return the next token in the execution stack
-   */
-  function nextInStack() {
-    var currentProcedure = executionStack.peek();
-    var command = currentProcedure.shift();
-    if (!currentProcedure.length)
-      executionStack.pop();
-    return command;
-  };
-
   /**
    * Returns an object containing a Subrs array and a CharStrings array
    * extracted from and eexec encrypted block of data
@@ -1205,79 +1157,6 @@ var Type1Parser = function() {
       subrs: subrs,
       charstrings: glyphs
     }
-  };
-
-  /*
-   * Flatten the commands by interpreting the postscript code and replacing
-   * every 'callsubr', 'callothersubr' by the real commands.
-   * At the moment OtherSubrs are not fully supported and only otherSubrs 0-4
-   * as described in 'Using Subroutines' of 'Adobe Type 1 Font Format',
-   * chapter 8.
-   */
-  this.flattenCharstring = function(aCharstring, aSubrs) {
-    operandStack.clear();
-    executionStack.clear();
-    executionStack.push(aCharstring.slice());
-
-    var leftSidebearing = 0;
-    var lastPoint = 0;
-    while (true) {
-      var obj = nextInStack();
-      if (IsInt(obj) || IsBool(obj)) {
-        operandStack.push(obj);
-      } else {
-        switch (obj) {
-          case "callsubr":
-            var index = operandStack.pop();
-            executionStack.push(aSubrs[index].slice());
-            break;
-
-          case "callothersubr":
-            var index = operandStack.pop();
-            var count = operandStack.pop();
-            var data = operandStack.pop();
-            // XXX The callothersubr needs to support at least the 3 defaults
-            // otherSubrs of the spec
-            if (index != 3)
-              error("callothersubr for index: " + index);
-            operandStack.push(3);
-            operandStack.push("callothersubr");
-            break;
-
-          case "div":
-            var num2 = operandStack.pop();
-            var num1 = operandStack.pop();
-            operandStack.push(num2 / num1);
-            break;
-
-          case "pop":
-            operandStack.pop();
-            break;
-
-          case "return":
-            break;
-
-          case "hsbw":
-            var charWidthVector = operandStack.pop();
-            var leftSidebearing = operandStack.pop();
-            operandStack.push(charWidthVector);
-
-            if (leftSidebearing) {
-              operandStack.push(leftSidebearing);
-              operandStack.push("hmoveto");
-            }
-            break;
-
-          case "endchar":
-            operandStack.push("endchar");
-            return operandStack.clone();
-
-          default:
-            operandStack.push(obj);
-            break;
-        }
-      }
-    }
   }
 };
 
@@ -1339,10 +1218,11 @@ CFF.prototype = {
     return data;
   },
 
-  encodeNumber: function(aValue) {
+  encodeNumber: function(aValue, aIsCharstring) {
     var x = 0;
-    // XXX we don't really care about Type2 optimization here...
-    if (aValue >= -32768 && aValue <= 32767) {
+    if (aIsCharstring && aValue >= -107 && aValue <= 107) {
+      return [aValue + 139];
+    } else if (aValue >= -32768 && aValue <= 32767) {
       return [
         28,
         FontsUtils.integerToBytes(aValue >> 8, 1),
@@ -1389,6 +1269,128 @@ CFF.prototype = {
     return charstrings;
   },
 
+  /*
+   * Flatten the commands by interpreting the postscript code and replacing
+   * every 'callsubr', 'callothersubr' by the real commands.
+   * 
+   * TODO This function also do a string to command number transformation
+   * that can probably be avoided if the Type1 decodeCharstring code is smarter
+   */
+  commandsMap: {
+    "hstem": 1,
+    "vstem": 3,
+    "vmoveto": 4,
+    "rlineto": 5,
+    "hlineto": 6,
+    "vlineto": 7,
+    "rrcurveto": 8,
+    "endchar": 14,
+    "rmoveto": 21,
+    "hmoveto": 22,
+    "vhcurveto": 30,
+    "hvcurveto": 31,
+  },
+
+  flattenCharstring: function(aCharstring, aSubrs) {
+    var i = 0;
+    while (true) {
+      var obj = aCharstring[i];
+      if (IsString(obj)) {
+        switch (obj) {
+          case "callsubr":
+            var subr = aSubrs[aCharstring[i- 1]].slice();
+            if (subr.length > 1) {
+              subr = this.flattenCharstring(subr, aSubrs);
+              subr.pop();
+              aCharstring.splice(i - 1, 2, subr);
+            }
+            else
+              aCharstring.splice(i - 1, 2);
+
+            i -= 1;
+            break;
+
+          case "callothersubr":
+            var index = aCharstring[i - 1];
+            var count = aCharstring[i - 2];
+            var data = aCharstring[i - 3];
+
+            // XXX The callothersubr needs to support at least the 3 defaults
+            // otherSubrs of the spec
+            if (index != 3)
+              error("callothersubr for index: " + index + " (" + aCharstring + ")");
+
+            if (!data) {
+              aCharstring.splice(i - 2, 3, "pop", 3);
+              i -= 2;
+            } else {
+              // 5 to remove the arguments, the callothersubr call and the pop command
+              aCharstring.splice(i - 3, 5, 3);
+              i -= 3;
+            }
+            break;
+
+          case "div":
+            var num2 = aCharstring[i - 1];
+            var num1 = aCharstring[i - 2];
+            aCharstring.splice(i - 2, 3, num2 / num1);
+            i -= 2;
+            break;
+
+          case "pop":
+            aCharstring.splice(i - 2, 2);
+            i -= 1;
+            break;
+
+
+          case "hsbw":
+            var charWidthVector = aCharstring[i - 1];
+            var leftSidebearing = aCharstring[i - 2];
+            aCharstring.splice(i - 2, 3, charWidthVector, leftSidebearing, "hmoveto");
+            break;
+
+          case "endchar":
+          case "return":
+            // CharString is ready to be re-encode to commands number at this point
+            for (var j = 0; j < aCharstring.length; j++) {
+              var command = aCharstring[j];
+              if (IsNum(command)) {
+                var number = this.encodeNumber(command, true);
+                aCharstring.splice(j, 1);
+                for (var k = 0; k < number.length; k++)
+                  aCharstring.splice(j + k, 0, number[k]);
+                j+= number.length - 1;
+              } else if (IsString(command)) {
+                var command = this.commandsMap[command];
+                if (IsArray(command)) {
+                  aCharstring.splice(j - 1, 1, command[0], command[1]);
+                  j += 1;
+                } else {
+                  aCharstring[j] = command;
+                }
+              } else if (IsArray(command)) {
+                aCharstring.splice(j, 1);
+
+                // command has already been translated, just add them to the
+                // charstring directly
+                for (var k = 0; k < command.length; k++)
+                  aCharstring.splice(j + k, 0, command[k]);
+                j+= command.length - 1;
+              } else { // what else?
+                error("Error while flattening the Type1 charstring: " + aCharstring);
+              }
+            }
+            return aCharstring;
+
+          default:
+            break;
+        }
+      }
+      i++;
+    }
+    error("failing with i = " + i + " in charstring:" + aCharstring + "(" + aCharstring.length + ")");
+  },
+
   convertToCFF: function(aFontInfo) {
     var debug = false;
     function dump(aMsg) {
@@ -1398,39 +1400,24 @@ CFF.prototype = {
 
     var charstrings = this.getOrderedCharStrings(aFontInfo.charstrings);
 
+    // Starts the conversion of the Type1 charstrings to Type2
+    var start = Date.now();
     var charstringsCount = 0;
     var charstringsDataLength = 0;
     var glyphs = [];
-    var glyphsChecker = {};
-    var subrs = aFontInfo.subrs;
-
-    // FIXME This code is actually the only reason the dummy PS Interpreter
-    //       called Type1Parser continue to lives, basically the goal here is
-    //       to embed the OtherSubrs/Subrs into the charstring directly.
-    //       But since Type2 charstrings use a bias to index Subrs and can
-    //       theorically store twice the number of Type1 we could directly
-    //       save the OtherSubrs and Subrs in the Type2 table for Subrs
-    //       and avoid this 'flattening' slow method.
-    //
-    //       The other thinds done by this method is splitting the initial
-    //       'width lsb hswb' command of Type1 to something similar in Type2
-    //       that is: 'width dx moveto' but this can be done in the
-    //       decodeCharstring method directly (maybe one day it will be called
-    //       translateCharstring?)
-    var parser = new Type1Parser();
     for (var i = 0; i < charstrings.length; i++) {
       var charstring = charstrings[i].charstring.slice();
       var glyph = charstrings[i].glyph;
-      if (glyphsChecker[glyph])
-        error("glyphs already exists!");
-      glyphsChecker[glyph] = true;
 
-      var flattened = parser.flattenCharstring(charstring, subrs);
+      var flattened = this.flattenCharstring(charstring, aFontInfo.subrs);
       glyphs.push(flattened);
       charstringsCount++;
       charstringsDataLength += flattened.length;
     }
+
+    var end = Date.now();
     dump("There is " + charstringsCount + " glyphs (size: " + charstringsDataLength + ")");
+    dump("Time to flatten the strings is : " + (end -start));
 
     // Create a CFF font data
     var cff = new Uint8Array(kMaxFontFileSize);
@@ -1473,52 +1460,7 @@ CFF.prototype = {
       charset.push(bytes[1]);
     }
 
-    // Convert charstrings
-    var getNumFor = {
-      "hstem": 1,
-      "vstem": 3,
-      "vmoveto": 4,
-      "rlineto": 5,
-      "hlineto": 6,
-      "vlineto": 7,
-      "rrcurveto": 8,
-      "endchar": 14,
-      "rmoveto": 21,
-      "hmoveto": 22,
-      "vhcurveto": 30,
-      "hvcurveto": 31,
-    };
-
-    // FIXME Concatenating array with this algorithm (O²) is expensive and
-    //       can be avoided if the voodoo's dance of charstrings decoding
-    //       encoding is left for dead. Actually charstrings command number
-    //       are converted to a string and then back to a number with the
-    //       next few lines of code...
-    var r = [[0x40, 0x0E]];
-    for (var i = 0; i < glyphs.length; i++) {
-      var data = glyphs[i].slice();
-      var charstring = [];
-      for (var j = 0; j < data.length; j++) {
-        var c = data[j];
-        if (!IsNum(c)) {
-          var token = getNumFor[c];
-          if (!token)
-            error("Token " + c + " is not recognized in charstring " + data);
-          charstring.push(token);
-        } else {
-          try {
-            var bytes = this.encodeNumber(c);
-          } catch(e) {
-            log("Glyph " + i + " has a wrong value: " + c + " in charstring: " + data);
-            log("the default value is glyph " + charstrings[i].glyph + " and is supposed to be: " + charstrings[i].charstring);
-          }
-          charstring = charstring.concat(bytes);
-        }
-      }
-      r.push(charstring);
-    }
-
-    var charstringsIndex = this.createCFFIndexHeader(r, true);
+    var charstringsIndex = this.createCFFIndexHeader([[0x40, 0x0E]].concat(glyphs), true);
     charstringsIndex = charstringsIndex.join(" ").split(" "); // XXX why?
 
     //Top Dict Index
diff --git a/PDFFontUtils.js b/PDFFontUtils.js
index e242121db..086648fe2 100644
--- a/PDFFontUtils.js
+++ b/PDFFontUtils.js
@@ -310,7 +310,8 @@ var Type2Parser = function(aFilePath) {
     // Read the Global Subr Index that comes just after the Strings Index
     // (cf. "The Compact Font Format Specification" Chapter 16)
     dump("Reading Global Subr Index");
-    var subrs = readFontIndexData(aStream);
+    var subrs = readFontIndexData(aStream, true);
+    dump(subrs);
 
     // Reading Private Dict
     var private = font.get("Private");