From ef1ad675c2bd15f157b9c1ad3d7301657802ad69 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 6 May 2025 15:11:33 +0200 Subject: [PATCH 1/2] Unify method return values in the `ObjectLoader` class Given that all the methods are already asynchronous we can just use `await` more throughout this code, rather than having to explicitly return function-calls and `undefined`. Note also how none of the `ObjectLoader.prototype.load` call-sites use the return value. --- src/core/object_loader.js | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/core/object_loader.js b/src/core/object_loader.js index 6f75e4561..28763b25a 100644 --- a/src/core/object_loader.js +++ b/src/core/object_loader.js @@ -54,17 +54,18 @@ function addChildren(node, nodesToVisit) { * entire PDF document object graph to be traversed. */ class ObjectLoader { + refSet = null; + constructor(dict, keys, xref) { this.dict = dict; this.keys = keys; this.xref = xref; - this.refSet = null; } async load() { // Don't walk the graph if all the data is already loaded. if (this.xref.stream.isDataLoaded) { - return undefined; + return; } const { keys, dict } = this; @@ -78,10 +79,12 @@ class ObjectLoader { nodesToVisit.push(rawValue); } } - return this._walk(nodesToVisit); + await this.#walk(nodesToVisit); + + this.refSet = null; // Everything is loaded, clear the cache. } - async _walk(nodesToVisit) { + async #walk(nodesToVisit) { const nodesToRevisit = []; const pendingRequests = []; // DFS walk of the object graph. @@ -99,11 +102,10 @@ class ObjectLoader { currentNode = this.xref.fetch(currentNode); } catch (ex) { if (!(ex instanceof MissingDataException)) { - warn(`ObjectLoader._walk - requesting all data: "${ex}".`); - this.refSet = null; + warn(`ObjectLoader.#walk - requesting all data: "${ex}".`); - const { manager } = this.xref.stream; - return manager.requestAllChunks(); + await this.xref.stream.manager.requestAllChunks(); + return; } nodesToRevisit.push(currentNode); pendingRequests.push({ begin: ex.begin, end: ex.end }); @@ -139,11 +141,8 @@ class ObjectLoader { this.refSet.remove(node); } } - return this._walk(nodesToRevisit); + await this.#walk(nodesToRevisit); } - // Everything is loaded. - this.refSet = null; - return undefined; } } From 62009ffa701a69f2ff179682169dae53d1d336b8 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 6 May 2025 15:28:36 +0200 Subject: [PATCH 2/2] Simplify how the `ObjectLoader` is used The `ObjectLoader.prototype.load` method has a fast-path, which avoids any lookup/parsing if the entire PDF document is already loaded. However, we still need to create an `ObjectLoader`-instance which seems unnecessary in that case. Hence we introduce a *static* `ObjectLoader.load` method, which will help avoid creating `ObjectLoader`-instances needlessly and also (slightly) shortens the call-sites. To ensure that the new method will be used, we extend the `no-restricted-syntax` ESLint rule to "forbid" direct usage of `new ObjectLoader()`. --- eslint.config.mjs | 5 +++++ src/core/annotation.js | 15 ++++++--------- src/core/document.js | 9 +++------ src/core/object_loader.js | 18 +++++++++++------- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/eslint.config.mjs b/eslint.config.mjs index ffb146870..7dd65dd6e 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -303,6 +303,11 @@ export default [ selector: "NewExpression[callee.name='Name']", message: "Use `Name.get()` rather than `new Name()`.", }, + { + selector: "NewExpression[callee.name='ObjectLoader']", + message: + "Use `ObjectLoader.load()` rather than `new ObjectLoader()`.", + }, { selector: "NewExpression[callee.name='Ref']", message: "Use `Ref.get()` rather than `new Ref()`.", diff --git a/src/core/annotation.js b/src/core/annotation.js index 3bdf9b788..b6106fbee 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -1158,15 +1158,12 @@ class Annotation { } } - loadResources(keys, appearance) { - return appearance.dict.getAsync("Resources").then(resources => { - if (!resources) { - return undefined; - } - - const objectLoader = new ObjectLoader(resources, keys, resources.xref); - return objectLoader.load().then(() => resources); - }); + async loadResources(keys, appearance) { + const resources = await appearance.dict.getAsync("Resources"); + if (resources) { + await ObjectLoader.load(resources, keys, resources.xref); + } + return resources; } async getOperatorList(evaluator, task, intent, annotationStorage) { diff --git a/src/core/document.js b/src/core/document.js index b51ddad40..0eb0f96d7 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -417,8 +417,7 @@ class Page { // TODO: add async `_getInheritableProperty` and remove this. await (this.resourcesPromise ??= this.pdfManager.ensure(this, "resources")); - const objectLoader = new ObjectLoader(this.resources, keys, this.xref); - await objectLoader.load(); + await ObjectLoader.load(this.resources, keys, this.xref); } async #getMergedResources(streamDict, keys) { @@ -430,8 +429,7 @@ class Page { if (!(localResources instanceof Dict && localResources.size)) { return this.resources; } - const objectLoader = new ObjectLoader(localResources, keys, this.xref); - await objectLoader.load(); + await ObjectLoader.load(localResources, keys, this.xref); return Dict.merge({ xref: this.xref, @@ -1254,8 +1252,7 @@ class PDFDocument { if (!(resources instanceof Dict)) { return; } - const objectLoader = new ObjectLoader(resources, ["Font"], this.xref); - await objectLoader.load(); + await ObjectLoader.load(resources, ["Font"], this.xref); const fontRes = resources.get("Font"); if (!(fontRes instanceof Dict)) { diff --git a/src/core/object_loader.js b/src/core/object_loader.js index 28763b25a..f36959239 100644 --- a/src/core/object_loader.js +++ b/src/core/object_loader.js @@ -54,7 +54,7 @@ function addChildren(node, nodesToVisit) { * entire PDF document object graph to be traversed. */ class ObjectLoader { - refSet = null; + refSet = new RefSet(); constructor(dict, keys, xref) { this.dict = dict; @@ -63,13 +63,7 @@ class ObjectLoader { } async load() { - // Don't walk the graph if all the data is already loaded. - if (this.xref.stream.isDataLoaded) { - return; - } - const { keys, dict } = this; - this.refSet = new RefSet(); // Setup the initial nodes to visit. const nodesToVisit = []; for (const key of keys) { @@ -144,6 +138,16 @@ class ObjectLoader { await this.#walk(nodesToRevisit); } } + + static async load(obj, keys, xref) { + // Don't walk the graph if all the data is already loaded. + if (xref.stream.isDataLoaded) { + return; + } + // eslint-disable-next-line no-restricted-syntax + const objLoader = new ObjectLoader(obj, keys, xref); + await objLoader.load(); + } } export { ObjectLoader };