From 0105237af60f72a3a671c02a3050ba3736d269ce Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 20 May 2025 13:23:38 +0200 Subject: [PATCH 1/2] Move a few helper functions/classes out of the `src/display/api.js` file Given that this file represents the official API, it's difficult to avoid it becoming fairly large as we add new functionality. However, it also contains a couple of smaller (and internal) helpers that we can move into a new utils-file. Also, we inline the `DEFAULT_RANGE_CHUNK_SIZE` constant since it's only used *once* and its value has never been changed in over a decade. --- src/display/api.js | 152 ++------------------------- src/display/api_utils.js | 167 ++++++++++++++++++++++++++++++ src/pdf.js | 2 +- test/unit/message_handler_spec.js | 2 +- test/unit/pdf_spec.js | 2 +- 5 files changed, 179 insertions(+), 146 deletions(-) create mode 100644 src/display/api_utils.js diff --git a/src/display/api.js b/src/display/api.js index 7aa43e5de..aec5e98a8 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -18,7 +18,6 @@ */ import { - _isValidExplicitDest, AbortException, AnnotationMode, assert, @@ -29,7 +28,6 @@ import { RenderingIntentFlag, setVerbosityLevel, shadow, - stringToBytes, unreachable, warn, } from "../shared/util.js"; @@ -47,6 +45,13 @@ import { StatTimer, } from "./display_utils.js"; import { FontFaceObject, FontLoader } from "./font_loader.js"; +import { + getDataProp, + getFactoryUrlProp, + getUrlProp, + isRefProxy, + LoopbackPort, +} from "./api_utils.js"; import { MessageHandler, wrapReason } from "../shared/message_handler.js"; import { NodeCanvasFactory, @@ -71,7 +76,6 @@ import { PDFNodeStream } from "display-node_stream"; import { TextLayer } from "./text_layer.js"; import { XfaText } from "./xfa_text.js"; -const DEFAULT_RANGE_CHUNK_SIZE = 65536; // 2^16 = 65536 const RENDERING_CANCELLED_TIMEOUT = 100; // ms /** @@ -111,7 +115,7 @@ const RENDERING_CANCELLED_TIMEOUT = 100; // ms * @property {PDFDataRangeTransport} [range] - Allows for using a custom range * transport implementation. * @property {number} [rangeChunkSize] - Specify maximum number of bytes fetched - * per range request. The default value is {@link DEFAULT_RANGE_CHUNK_SIZE}. + * per range request. The default value is 65536 (= 2^16). * @property {PDFWorker} [worker] - The worker that will be used for loading and * parsing the PDF data. * @property {number} [verbosity] - Controls the logging level; the constants @@ -255,7 +259,7 @@ function getDocument(src = {}) { const rangeChunkSize = Number.isInteger(src.rangeChunkSize) && src.rangeChunkSize > 0 ? src.rangeChunkSize - : DEFAULT_RANGE_CHUNK_SIZE; + : 2 ** 16; let worker = src.worker instanceof PDFWorker ? src.worker : null; const verbosity = src.verbosity; // Ignore "data:"-URLs, since they can't be used to recover valid absolute @@ -507,94 +511,6 @@ function getDocument(src = {}) { return task; } -function getUrlProp(val) { - if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { - return null; // The 'url' is unused with `PDFDataRangeTransport`. - } - if (val instanceof URL) { - return val.href; - } - if (typeof val === "string") { - if ( - typeof PDFJSDev !== "undefined" && - PDFJSDev.test("GENERIC") && - isNodeJS - ) { - return val; // Use the url as-is in Node.js environments. - } - - // The full path is required in the 'url' field. - const url = URL.parse(val, window.location); - if (url) { - return url.href; - } - } - throw new Error( - "Invalid PDF url data: " + - "either string or URL-object is expected in the url property." - ); -} - -function getDataProp(val) { - // Converting string or array-like data to Uint8Array. - if ( - typeof PDFJSDev !== "undefined" && - PDFJSDev.test("GENERIC") && - isNodeJS && - typeof Buffer !== "undefined" && // eslint-disable-line no-undef - val instanceof Buffer // eslint-disable-line no-undef - ) { - throw new Error( - "Please provide binary data as `Uint8Array`, rather than `Buffer`." - ); - } - if (val instanceof Uint8Array && val.byteLength === val.buffer.byteLength) { - // Use the data as-is when it's already a Uint8Array that completely - // "utilizes" its underlying ArrayBuffer, to prevent any possible - // issues when transferring it to the worker-thread. - return val; - } - if (typeof val === "string") { - return stringToBytes(val); - } - if ( - val instanceof ArrayBuffer || - ArrayBuffer.isView(val) || - (typeof val === "object" && !isNaN(val?.length)) - ) { - return new Uint8Array(val); - } - throw new Error( - "Invalid PDF binary data: either TypedArray, " + - "string, or array-like object is expected in the data property." - ); -} - -function getFactoryUrlProp(val) { - if (typeof val !== "string") { - return null; - } - if (val.endsWith("/")) { - return val; - } - throw new Error(`Invalid factory url: "${val}" must include trailing slash.`); -} - -const isRefProxy = v => - typeof v === "object" && - Number.isInteger(v?.num) && - v.num >= 0 && - Number.isInteger(v?.gen) && - v.gen >= 0; - -const isNameProxy = v => typeof v === "object" && typeof v?.name === "string"; - -const isValidExplicitDest = _isValidExplicitDest.bind( - null, - /* validRef = */ isRefProxy, - /* validName = */ isNameProxy -); - /** * @typedef {Object} OnProgressParameters * @property {number} loaded - Currently loaded number of bytes. @@ -2012,54 +1928,6 @@ class PDFPageProxy { } } -class LoopbackPort { - #listeners = new Map(); - - #deferred = Promise.resolve(); - - postMessage(obj, transfer) { - const event = { - data: structuredClone(obj, transfer ? { transfer } : null), - }; - - this.#deferred.then(() => { - for (const [listener] of this.#listeners) { - listener.call(this, event); - } - }); - } - - addEventListener(name, listener, options = null) { - let rmAbort = null; - if (options?.signal instanceof AbortSignal) { - const { signal } = options; - if (signal.aborted) { - warn("LoopbackPort - cannot use an `aborted` signal."); - return; - } - const onAbort = () => this.removeEventListener(name, listener); - rmAbort = () => signal.removeEventListener("abort", onAbort); - - signal.addEventListener("abort", onAbort); - } - this.#listeners.set(listener, rmAbort); - } - - removeEventListener(name, listener) { - const rmAbort = this.#listeners.get(listener); - rmAbort?.(); - - this.#listeners.delete(listener); - } - - terminate() { - for (const [, rmAbort] of this.#listeners) { - rmAbort?.(); - } - this.#listeners.clear(); - } -} - /** * @typedef {Object} PDFWorkerParameters * @property {string} [name] - The name of the worker. @@ -3511,8 +3379,6 @@ const build = export { build, getDocument, - isValidExplicitDest, - LoopbackPort, PDFDataRangeTransport, PDFDocumentLoadingTask, PDFDocumentProxy, diff --git a/src/display/api_utils.js b/src/display/api_utils.js new file mode 100644 index 000000000..bc63f4a20 --- /dev/null +++ b/src/display/api_utils.js @@ -0,0 +1,167 @@ +/* Copyright 2012 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { + _isValidExplicitDest, + isNodeJS, + stringToBytes, + warn, +} from "../shared/util.js"; + +function getUrlProp(val) { + if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { + return null; // The 'url' is unused with `PDFDataRangeTransport`. + } + if (val instanceof URL) { + return val.href; + } + if (typeof val === "string") { + if ( + typeof PDFJSDev !== "undefined" && + PDFJSDev.test("GENERIC") && + isNodeJS + ) { + return val; // Use the url as-is in Node.js environments. + } + + // The full path is required in the 'url' field. + const url = URL.parse(val, window.location); + if (url) { + return url.href; + } + } + throw new Error( + "Invalid PDF url data: " + + "either string or URL-object is expected in the url property." + ); +} + +function getDataProp(val) { + // Converting string or array-like data to Uint8Array. + if ( + typeof PDFJSDev !== "undefined" && + PDFJSDev.test("GENERIC") && + isNodeJS && + typeof Buffer !== "undefined" && // eslint-disable-line no-undef + val instanceof Buffer // eslint-disable-line no-undef + ) { + throw new Error( + "Please provide binary data as `Uint8Array`, rather than `Buffer`." + ); + } + if (val instanceof Uint8Array && val.byteLength === val.buffer.byteLength) { + // Use the data as-is when it's already a Uint8Array that completely + // "utilizes" its underlying ArrayBuffer, to prevent any possible + // issues when transferring it to the worker-thread. + return val; + } + if (typeof val === "string") { + return stringToBytes(val); + } + if ( + val instanceof ArrayBuffer || + ArrayBuffer.isView(val) || + (typeof val === "object" && !isNaN(val?.length)) + ) { + return new Uint8Array(val); + } + throw new Error( + "Invalid PDF binary data: either TypedArray, " + + "string, or array-like object is expected in the data property." + ); +} + +function getFactoryUrlProp(val) { + if (typeof val !== "string") { + return null; + } + if (val.endsWith("/")) { + return val; + } + throw new Error(`Invalid factory url: "${val}" must include trailing slash.`); +} + +const isRefProxy = v => + typeof v === "object" && + Number.isInteger(v?.num) && + v.num >= 0 && + Number.isInteger(v?.gen) && + v.gen >= 0; + +const isNameProxy = v => typeof v === "object" && typeof v?.name === "string"; + +const isValidExplicitDest = _isValidExplicitDest.bind( + null, + /* validRef = */ isRefProxy, + /* validName = */ isNameProxy +); + +class LoopbackPort { + #listeners = new Map(); + + #deferred = Promise.resolve(); + + postMessage(obj, transfer) { + const event = { + data: structuredClone(obj, transfer ? { transfer } : null), + }; + + this.#deferred.then(() => { + for (const [listener] of this.#listeners) { + listener.call(this, event); + } + }); + } + + addEventListener(name, listener, options = null) { + let rmAbort = null; + if (options?.signal instanceof AbortSignal) { + const { signal } = options; + if (signal.aborted) { + warn("LoopbackPort - cannot use an `aborted` signal."); + return; + } + const onAbort = () => this.removeEventListener(name, listener); + rmAbort = () => signal.removeEventListener("abort", onAbort); + + signal.addEventListener("abort", onAbort); + } + this.#listeners.set(listener, rmAbort); + } + + removeEventListener(name, listener) { + const rmAbort = this.#listeners.get(listener); + rmAbort?.(); + + this.#listeners.delete(listener); + } + + terminate() { + for (const [, rmAbort] of this.#listeners) { + rmAbort?.(); + } + this.#listeners.clear(); + } +} + +export { + getDataProp, + getFactoryUrlProp, + getUrlProp, + isNameProxy, + isRefProxy, + isValidExplicitDest, + LoopbackPort, +}; diff --git a/src/pdf.js b/src/pdf.js index d707b01ce..3aef82f1a 100644 --- a/src/pdf.js +++ b/src/pdf.js @@ -47,7 +47,6 @@ import { import { build, getDocument, - isValidExplicitDest, PDFDataRangeTransport, PDFWorker, version, @@ -76,6 +75,7 @@ import { DOMSVGFactory } from "./display/svg_factory.js"; import { DrawLayer } from "./display/draw_layer.js"; import { GlobalWorkerOptions } from "./display/worker_options.js"; import { HighlightOutliner } from "./display/editor/drawers/highlight.js"; +import { isValidExplicitDest } from "./display/api_utils.js"; import { SignatureExtractor } from "./display/editor/drawers/signaturedraw.js"; import { TextLayer } from "./display/text_layer.js"; import { TouchManager } from "./display/touch_manager.js"; diff --git a/test/unit/message_handler_spec.js b/test/unit/message_handler_spec.js index 35ddacb71..b20fbd009 100644 --- a/test/unit/message_handler_spec.js +++ b/test/unit/message_handler_spec.js @@ -17,7 +17,7 @@ import { AbortException, UnknownErrorException, } from "../../src/shared/util.js"; -import { LoopbackPort } from "../../src/display/api.js"; +import { LoopbackPort } from "../../src/display/api_utils.js"; import { MessageHandler } from "../../src/shared/message_handler.js"; describe("message_handler", function () { diff --git a/test/unit/pdf_spec.js b/test/unit/pdf_spec.js index b594eb83d..26b8a5171 100644 --- a/test/unit/pdf_spec.js +++ b/test/unit/pdf_spec.js @@ -38,7 +38,6 @@ import { import { build, getDocument, - isValidExplicitDest, PDFDataRangeTransport, PDFWorker, version, @@ -66,6 +65,7 @@ import { ColorPicker } from "../../src/display/editor/color_picker.js"; import { DOMSVGFactory } from "../../src/display/svg_factory.js"; import { DrawLayer } from "../../src/display/draw_layer.js"; import { GlobalWorkerOptions } from "../../src/display/worker_options.js"; +import { isValidExplicitDest } from "../../src/display/api_utils.js"; import { SignatureExtractor } from "../../src/display/editor/drawers/signaturedraw.js"; import { TextLayer } from "../../src/display/text_layer.js"; import { TouchManager } from "../../src/display/touch_manager.js"; From e91b480c090d0560f975fea6eed33c5f85d915a7 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 20 May 2025 13:38:05 +0200 Subject: [PATCH 2/2] Move the `PDFObjects` class to its own file This isn't directly part of the official API, and having this class in its own file could help avoid future changes (e.g. issue 18148) affecting the size of the `src/display/api.js` file unnecessarily. --- src/display/api.js | 110 +------------------------------- src/display/pdf_objects.js | 125 +++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 109 deletions(-) create mode 100644 src/display/pdf_objects.js diff --git a/src/display/api.js b/src/display/api.js index aec5e98a8..2c17e277a 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -73,6 +73,7 @@ import { PDFDataTransportStream } from "./transport_stream.js"; import { PDFFetchStream } from "display-fetch_stream"; import { PDFNetworkStream } from "display-network"; import { PDFNodeStream } from "display-node_stream"; +import { PDFObjects } from "./pdf_objects.js"; import { TextLayer } from "./text_layer.js"; import { XfaText } from "./xfa_text.js"; @@ -3010,115 +3011,6 @@ class WorkerTransport { } } -const INITIAL_DATA = Symbol("INITIAL_DATA"); - -/** - * A PDF document and page is built of many objects. E.g. there are objects for - * fonts, images, rendering code, etc. These objects may get processed inside of - * a worker. This class implements some basic methods to manage these objects. - */ -class PDFObjects { - #objs = Object.create(null); - - /** - * Ensures there is an object defined for `objId`. - * - * @param {string} objId - * @returns {Object} - */ - #ensureObj(objId) { - return (this.#objs[objId] ||= { - ...Promise.withResolvers(), - data: INITIAL_DATA, - }); - } - - /** - * If called *without* callback, this returns the data of `objId` but the - * object needs to be resolved. If it isn't, this method throws. - * - * If called *with* a callback, the callback is called with the data of the - * object once the object is resolved. That means, if you call this method - * and the object is already resolved, the callback gets called right away. - * - * @param {string} objId - * @param {function} [callback] - * @returns {any} - */ - get(objId, callback = null) { - // If there is a callback, then the get can be async and the object is - // not required to be resolved right now. - if (callback) { - const obj = this.#ensureObj(objId); - obj.promise.then(() => callback(obj.data)); - return null; - } - // If there isn't a callback, the user expects to get the resolved data - // directly. - const obj = this.#objs[objId]; - // If there isn't an object yet or the object isn't resolved, then the - // data isn't ready yet! - if (!obj || obj.data === INITIAL_DATA) { - throw new Error(`Requesting object that isn't resolved yet ${objId}.`); - } - return obj.data; - } - - /** - * @param {string} objId - * @returns {boolean} - */ - has(objId) { - const obj = this.#objs[objId]; - return !!obj && obj.data !== INITIAL_DATA; - } - - /** - * @param {string} objId - * @returns {boolean} - */ - delete(objId) { - const obj = this.#objs[objId]; - if (!obj || obj.data === INITIAL_DATA) { - // Only allow removing the object *after* it's been resolved. - return false; - } - delete this.#objs[objId]; - return true; - } - - /** - * Resolves the object `objId` with optional `data`. - * - * @param {string} objId - * @param {any} [data] - */ - resolve(objId, data = null) { - const obj = this.#ensureObj(objId); - obj.data = data; - obj.resolve(); - } - - clear() { - for (const objId in this.#objs) { - const { data } = this.#objs[objId]; - data?.bitmap?.close(); // Release any `ImageBitmap` data. - } - this.#objs = Object.create(null); - } - - *[Symbol.iterator]() { - for (const objId in this.#objs) { - const { data } = this.#objs[objId]; - - if (data === INITIAL_DATA) { - continue; - } - yield [objId, data]; - } - } -} - /** * Allows controlling of the rendering tasks. */ diff --git a/src/display/pdf_objects.js b/src/display/pdf_objects.js new file mode 100644 index 000000000..7e801c1c9 --- /dev/null +++ b/src/display/pdf_objects.js @@ -0,0 +1,125 @@ +/* Copyright 2012 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const INITIAL_DATA = Symbol("INITIAL_DATA"); + +/** + * A PDF document and page is built of many objects. E.g. there are objects for + * fonts, images, rendering code, etc. These objects may get processed inside of + * a worker. This class implements some basic methods to manage these objects. + */ +class PDFObjects { + #objs = Object.create(null); + + /** + * Ensures there is an object defined for `objId`. + * + * @param {string} objId + * @returns {Object} + */ + #ensureObj(objId) { + return (this.#objs[objId] ||= { + ...Promise.withResolvers(), + data: INITIAL_DATA, + }); + } + + /** + * If called *without* callback, this returns the data of `objId` but the + * object needs to be resolved. If it isn't, this method throws. + * + * If called *with* a callback, the callback is called with the data of the + * object once the object is resolved. That means, if you call this method + * and the object is already resolved, the callback gets called right away. + * + * @param {string} objId + * @param {function} [callback] + * @returns {any} + */ + get(objId, callback = null) { + // If there is a callback, then the get can be async and the object is + // not required to be resolved right now. + if (callback) { + const obj = this.#ensureObj(objId); + obj.promise.then(() => callback(obj.data)); + return null; + } + // If there isn't a callback, the user expects to get the resolved data + // directly. + const obj = this.#objs[objId]; + // If there isn't an object yet or the object isn't resolved, then the + // data isn't ready yet! + if (!obj || obj.data === INITIAL_DATA) { + throw new Error(`Requesting object that isn't resolved yet ${objId}.`); + } + return obj.data; + } + + /** + * @param {string} objId + * @returns {boolean} + */ + has(objId) { + const obj = this.#objs[objId]; + return !!obj && obj.data !== INITIAL_DATA; + } + + /** + * @param {string} objId + * @returns {boolean} + */ + delete(objId) { + const obj = this.#objs[objId]; + if (!obj || obj.data === INITIAL_DATA) { + // Only allow removing the object *after* it's been resolved. + return false; + } + delete this.#objs[objId]; + return true; + } + + /** + * Resolves the object `objId` with optional `data`. + * + * @param {string} objId + * @param {any} [data] + */ + resolve(objId, data = null) { + const obj = this.#ensureObj(objId); + obj.data = data; + obj.resolve(); + } + + clear() { + for (const objId in this.#objs) { + const { data } = this.#objs[objId]; + data?.bitmap?.close(); // Release any `ImageBitmap` data. + } + this.#objs = Object.create(null); + } + + *[Symbol.iterator]() { + for (const objId in this.#objs) { + const { data } = this.#objs[objId]; + + if (data === INITIAL_DATA) { + continue; + } + yield [objId, data]; + } + } +} + +export { PDFObjects };