Construct the correct field name and strip out classes when searching

The classes were stripped out during when creating the field name but it led to a wrong name. Since class components in a path are irrelevant, they're just ignored when searching for a node in the datasets.
2025-04-23 08:38:06 +02:00 · 2023-09-07 15:52:58 +02:00 · 2023-09-07 15:52:58 +02:00 · a8a50c567a
commit a8a50c567a
parent cf5a1d60a6
3 changed files with 18 additions and 6 deletions
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@ -1284,12 +1284,7 @@ class Annotation {
      }

      if (loopDict.has("T")) {
-        const t = stringToPDFString(loopDict.get("T"));
-        if (!t.startsWith("#")) {
-          // If it starts with a # then it's a class which is not a concept for
-          // datasets elements (https://www.pdfa.org/norm-refs/XFA-3_3.pdf#page=96).
-          fieldName.unshift(t);
-        }
+        fieldName.unshift(stringToPDFString(loopDict.get("T")));
      }
    }
    return fieldName.join(".");
--- a/src/core/xml_parser.js
+++ b/src/core/xml_parser.js
@ -354,6 +354,11 @@ class SimpleDOMNode {
    }

    const component = paths[pos];
+    if (component.name.startsWith("#") && pos < paths.length - 1) {
+      // If it starts with a # then it's a class which is not a concept for
+      // datasets elements (https://www.pdfa.org/norm-refs/XFA-3_3.pdf#page=96).
+      return this.searchNode(paths, pos + 1);
+    }
    const stack = [];
    let node = this;