diff --git a/src/core/catalog.js b/src/core/catalog.js index 9e2d08474..9db150c8f 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -76,7 +76,7 @@ function fetchRemoteDest(action) { dest = dest.name; } if (typeof dest === "string") { - return stringToPDFString(dest); + return stringToPDFString(dest, /* keepEscapeSequence = */ true); } else if (isValidExplicitDest(dest)) { return JSON.stringify(dest); } @@ -674,7 +674,8 @@ class Catalog { for (const [key, value] of obj.getAll()) { const dest = fetchDest(value); if (dest) { - dests[stringToPDFString(key)] = dest; + dests[stringToPDFString(key, /* keepEscapeSequence = */ true)] = + dest; } } } else if (obj instanceof Dict) { @@ -682,7 +683,8 @@ class Catalog { const dest = fetchDest(value); if (dest) { // Always let the NameTree take precedence. - dests[stringToPDFString(key)] ||= dest; + dests[stringToPDFString(key, /* keepEscapeSequence = */ true)] ||= + dest; } } } @@ -1046,7 +1048,8 @@ class Catalog { for (const [key, value] of nameTree.getAll()) { const fs = new FileSpec(value, this.xref); attachments ??= Object.create(null); - attachments[stringToPDFString(key)] = fs.serializable; + attachments[stringToPDFString(key, /* keepEscapeSequence = */ true)] = + fs.serializable; } } return shadow(this, "attachments", attachments); @@ -1060,7 +1063,10 @@ class Catalog { const nameTree = new NameTree(obj.getRaw("XFAImages"), this.xref); for (const [key, value] of nameTree.getAll()) { xfaImages ??= new Dict(this.xref); - xfaImages.set(stringToPDFString(key), value); + xfaImages.set( + stringToPDFString(key, /* keepEscapeSequence = */ true), + value + ); } } return shadow(this, "xfaImages", xfaImages); @@ -1084,7 +1090,10 @@ class Catalog { } else if (typeof js !== "string") { return; } - js = stringToPDFString(js).replaceAll("\x00", ""); + js = stringToPDFString(js, /* keepEscapeSequence = */ true).replaceAll( + "\x00", + "" + ); // Skip empty entries, similar to the `_collectJS` function. if (js) { (javaScript ||= new Map()).set(name, js); @@ -1094,7 +1103,10 @@ class Catalog { if (obj instanceof Dict && obj.has("JavaScript")) { const nameTree = new NameTree(obj.getRaw("JavaScript"), this.xref); for (const [key, value] of nameTree.getAll()) { - appendIfJavaScriptDict(stringToPDFString(key), value); + appendIfJavaScriptDict( + stringToPDFString(key, /* keepEscapeSequence = */ true), + value + ); } } // Append OpenAction "JavaScript" actions, if any, to the JavaScript map. @@ -1633,7 +1645,10 @@ class Catalog { const name = target.get("N"); if (isName(relationship, "C") && typeof name === "string") { - attachment = docAttachments[stringToPDFString(name)]; + attachment = + docAttachments[ + stringToPDFString(name, /* keepEscapeSequence = */ true) + ]; } } @@ -1699,7 +1714,11 @@ class Catalog { js = jsAction; } - const jsURL = js && recoverJsURL(stringToPDFString(js)); + const jsURL = + js && + recoverJsURL( + stringToPDFString(js, /* keepEscapeSequence = */ true) + ); if (jsURL) { url = jsURL.url; resultObj.newWindow = jsURL.newWindow; @@ -1735,7 +1754,10 @@ class Catalog { dest = dest.name; } if (typeof dest === "string") { - resultObj.dest = stringToPDFString(dest); + resultObj.dest = stringToPDFString( + dest, + /* keepEscapeSequence = */ true + ); } else if (isValidExplicitDest(dest)) { resultObj.dest = dest; } diff --git a/src/core/core_utils.js b/src/core/core_utils.js index eef79cddd..b91cb375c 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -424,7 +424,10 @@ function _collectJS(entry, xref, list, parents) { } else if (typeof js === "string") { code = js; } - code &&= stringToPDFString(code).replaceAll("\x00", ""); + code &&= stringToPDFString( + code, + /* keepEscapeSequence = */ true + ).replaceAll("\x00", ""); if (code) { list.push(code); } diff --git a/src/core/file_spec.js b/src/core/file_spec.js index 003ae64f0..d331af04e 100644 --- a/src/core/file_spec.js +++ b/src/core/file_spec.js @@ -77,7 +77,7 @@ class FileSpec { const item = pickPlatformItem(this.root); if (item && typeof item === "string") { - filename = stringToPDFString(item) + filename = stringToPDFString(item, /* keepEscapeSequence = */ true) .replaceAll("\\\\", "\\") .replaceAll("\\/", "/") .replaceAll("\\", "/"); diff --git a/src/shared/util.js b/src/shared/util.js index 0ced6e137..cc1456a44 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -1022,9 +1022,9 @@ const PDFStringTranslateTable = [ 0x131, 0x142, 0x153, 0x161, 0x17e, 0, 0x20ac, ]; -function stringToPDFString(str) { +function stringToPDFString(str, keepEscapeSequence = false) { // See section 7.9.2.2 Text String Type. - // The string can contain some language codes bracketed with 0x0b, + // The string can contain some language codes bracketed with 0x1b, // so we must remove them. if (str[0] >= "\xEF") { let encoding; @@ -1047,7 +1047,7 @@ function stringToPDFString(str) { const decoder = new TextDecoder(encoding, { fatal: true }); const buffer = stringToBytes(str); const decoded = decoder.decode(buffer); - if (!decoded.includes("\x1b")) { + if (keepEscapeSequence || !decoded.includes("\x1b")) { return decoded; } return decoded.replaceAll(/\x1b[^\x1b]*(?:\x1b|$)/g, ""); @@ -1060,7 +1060,7 @@ function stringToPDFString(str) { const strBuf = []; for (let i = 0, ii = str.length; i < ii; i++) { const charCode = str.charCodeAt(i); - if (charCode === 0x1b) { + if (!keepEscapeSequence && charCode === 0x1b) { // eslint-disable-next-line no-empty while (++i < ii && str.charCodeAt(i) !== 0x1b) {} continue; diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 37f2453c8..e9c84f7f3 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1421,6 +1421,28 @@ describe("api", function () { await loadingTask.destroy(); }); + it("gets a destination containing Unicode escape sequence (\x1b), from /Dests dictionary with keys using PDFDocEncoding", async function () { + if (isNodeJS) { + pending("Linked test-cases are not supported in Node.js."); + } + const loadingTask = getDocument(buildGetDocumentParams("issue19835.pdf")); + const pdfDoc = await loadingTask.promise; + + const page3 = await pdfDoc.getPage(3); + const annots = await page3.getAnnotations(); + + const annot = annots.find(x => x.id === "55R"); + // Sanity check to make sure that we found the "correct" annotation. + expect(annot.dest).toEqual( + "\u02d9\u0064\u002a\u0010\u000e\u0061\u00d6\u0002\u005b\u00b7\u201a\u0022\u00c5\u00da\u017e\u00bb\u00d5\u0062\u02dd\u00d1" + ); + + const dest = await pdfDoc.getDestination(annot.dest); + expect(dest).toEqual([28, { name: "XYZ" }, 34.0799999, 73.5199999, 0]); + + await loadingTask.destroy(); + }); + it("gets non-string destination", async function () { let numberPromise = pdfDocument.getDestination(4.3); let booleanPromise = pdfDocument.getDestination(true);