Prefer /Resources from the /Contents stream-dict, if available

In rare cases /Resources are also found in the /Contents stream-dict, in addition to in the /Page dict, hence we need to prefer those when available; see `issue18894.pdf`.
This commit is contained in:
Jonas Jenwald 2025-04-11 12:35:53 +02:00
parent 53c5a53668
commit 7a94fafd30
3 changed files with 58 additions and 20 deletions

View File

@ -49,6 +49,8 @@ import {
lookupNormalRect, lookupNormalRect,
lookupRect, lookupRect,
numberToString, numberToString,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
stringToAsciiOrUTF16BE, stringToAsciiOrUTF16BE,
stringToUTF16String, stringToUTF16String,
} from "./core_utils.js"; } from "./core_utils.js";
@ -1196,7 +1198,7 @@ class Annotation {
const appearanceDict = appearance.dict; const appearanceDict = appearance.dict;
const resources = await this.loadResources( const resources = await this.loadResources(
["ExtGState", "ColorSpace", "Pattern", "Shading", "XObject", "Font"], RESOURCES_KEYS_OPERATOR_LIST,
appearance appearance
); );
const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]); const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]);
@ -1257,7 +1259,7 @@ class Annotation {
} }
const resources = await this.loadResources( const resources = await this.loadResources(
["ExtGState", "Font", "Properties", "XObject"], RESOURCES_KEYS_TEXT_CONTENT,
this.appearance this.appearance
); );

View File

@ -32,6 +32,23 @@ const MIN_INT_32 = -(2 ** 31);
const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0]; const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0];
const RESOURCES_KEYS_OPERATOR_LIST = [
"ColorSpace",
"ExtGState",
"Font",
"Pattern",
"Properties",
"Shading",
"XObject",
];
const RESOURCES_KEYS_TEXT_CONTENT = [
"ExtGState",
"Font",
"Properties",
"XObject",
];
function getLookupTableFactory(initializer) { function getLookupTableFactory(initializer) {
let lookup; let lookup;
return function () { return function () {
@ -745,6 +762,8 @@ export {
readUint16, readUint16,
readUint32, readUint32,
recoverJsURL, recoverJsURL,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
stringToAsciiOrUTF16BE, stringToAsciiOrUTF16BE,
stringToUTF16HexString, stringToUTF16HexString,
stringToUTF16String, stringToUTF16String,

View File

@ -45,6 +45,8 @@ import {
lookupNormalRect, lookupNormalRect,
MissingDataException, MissingDataException,
PDF_VERSION_REGEXP, PDF_VERSION_REGEXP,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
validateCSSFont, validateCSSFont,
XRefEntryException, XRefEntryException,
XRefParseException, XRefParseException,
@ -419,6 +421,25 @@ class Page {
await objectLoader.load(); await objectLoader.load();
} }
async #getMergedResources(streamDict, keys) {
// In rare cases /Resources are also found in the /Contents stream-dict,
// in addition to in the /Page dict, hence we need to prefer those when
// available (see issue18894.pdf).
const localResources = streamDict?.get("Resources");
if (!(localResources instanceof Dict)) {
return this.resources;
}
const objectLoader = new ObjectLoader(localResources, keys, this.xref);
await objectLoader.load();
return Dict.merge({
xref: this.xref,
dictArray: [localResources, this.resources],
mergeSubDicts: true,
});
}
async getOperatorList({ async getOperatorList({
handler, handler,
sink, sink,
@ -429,15 +450,7 @@ class Page {
modifiedIds = null, modifiedIds = null,
}) { }) {
const contentStreamPromise = this.getContentStream(); const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([ const resourcesPromise = this.loadResources(RESOURCES_KEYS_OPERATOR_LIST);
"ColorSpace",
"ExtGState",
"Font",
"Pattern",
"Properties",
"Shading",
"XObject",
]);
const partialEvaluator = new PartialEvaluator({ const partialEvaluator = new PartialEvaluator({
xref: this.xref, xref: this.xref,
@ -525,11 +538,15 @@ class Page {
contentStreamPromise, contentStreamPromise,
resourcesPromise, resourcesPromise,
]).then(async ([contentStream]) => { ]).then(async ([contentStream]) => {
const resources = await this.#getMergedResources(
contentStream.dict,
RESOURCES_KEYS_OPERATOR_LIST
);
const opList = new OperatorList(intent, sink); const opList = new OperatorList(intent, sink);
handler.send("StartRenderPage", { handler.send("StartRenderPage", {
transparency: partialEvaluator.hasBlendModes( transparency: partialEvaluator.hasBlendModes(
this.resources, resources,
this.nonBlendModesSet this.nonBlendModesSet
), ),
pageIndex: this.pageIndex, pageIndex: this.pageIndex,
@ -539,7 +556,7 @@ class Page {
await partialEvaluator.getOperatorList({ await partialEvaluator.getOperatorList({
stream: contentStream, stream: contentStream,
task, task,
resources: this.resources, resources,
operatorList: opList, operatorList: opList,
}); });
return opList; return opList;
@ -642,12 +659,7 @@ class Page {
sink, sink,
}) { }) {
const contentStreamPromise = this.getContentStream(); const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([ const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT);
"ExtGState",
"Font",
"Properties",
"XObject",
]);
const langPromise = this.pdfManager.ensureCatalog("lang"); const langPromise = this.pdfManager.ensureCatalog("lang");
const [contentStream, , lang] = await Promise.all([ const [contentStream, , lang] = await Promise.all([
@ -655,6 +667,11 @@ class Page {
resourcesPromise, resourcesPromise,
langPromise, langPromise,
]); ]);
const resources = await this.#getMergedResources(
contentStream.dict,
RESOURCES_KEYS_TEXT_CONTENT
);
const partialEvaluator = new PartialEvaluator({ const partialEvaluator = new PartialEvaluator({
xref: this.xref, xref: this.xref,
handler, handler,
@ -672,7 +689,7 @@ class Page {
return partialEvaluator.getTextContent({ return partialEvaluator.getTextContent({
stream: contentStream, stream: contentStream,
task, task,
resources: this.resources, resources,
includeMarkedContent, includeMarkedContent,
disableNormalization, disableNormalization,
sink, sink,