Prefer /Resources from the /Contents stream-dict, if available
In rare cases /Resources are also found in the /Contents stream-dict, in addition to in the /Page dict, hence we need to prefer those when available; see `issue18894.pdf`.
This commit is contained in:
parent
53c5a53668
commit
7a94fafd30
@ -49,6 +49,8 @@ import {
|
|||||||
lookupNormalRect,
|
lookupNormalRect,
|
||||||
lookupRect,
|
lookupRect,
|
||||||
numberToString,
|
numberToString,
|
||||||
|
RESOURCES_KEYS_OPERATOR_LIST,
|
||||||
|
RESOURCES_KEYS_TEXT_CONTENT,
|
||||||
stringToAsciiOrUTF16BE,
|
stringToAsciiOrUTF16BE,
|
||||||
stringToUTF16String,
|
stringToUTF16String,
|
||||||
} from "./core_utils.js";
|
} from "./core_utils.js";
|
||||||
@ -1196,7 +1198,7 @@ class Annotation {
|
|||||||
|
|
||||||
const appearanceDict = appearance.dict;
|
const appearanceDict = appearance.dict;
|
||||||
const resources = await this.loadResources(
|
const resources = await this.loadResources(
|
||||||
["ExtGState", "ColorSpace", "Pattern", "Shading", "XObject", "Font"],
|
RESOURCES_KEYS_OPERATOR_LIST,
|
||||||
appearance
|
appearance
|
||||||
);
|
);
|
||||||
const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]);
|
const bbox = lookupRect(appearanceDict.getArray("BBox"), [0, 0, 1, 1]);
|
||||||
@ -1257,7 +1259,7 @@ class Annotation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const resources = await this.loadResources(
|
const resources = await this.loadResources(
|
||||||
["ExtGState", "Font", "Properties", "XObject"],
|
RESOURCES_KEYS_TEXT_CONTENT,
|
||||||
this.appearance
|
this.appearance
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@ -32,6 +32,23 @@ const MIN_INT_32 = -(2 ** 31);
|
|||||||
|
|
||||||
const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0];
|
const IDENTITY_MATRIX = [1, 0, 0, 1, 0, 0];
|
||||||
|
|
||||||
|
const RESOURCES_KEYS_OPERATOR_LIST = [
|
||||||
|
"ColorSpace",
|
||||||
|
"ExtGState",
|
||||||
|
"Font",
|
||||||
|
"Pattern",
|
||||||
|
"Properties",
|
||||||
|
"Shading",
|
||||||
|
"XObject",
|
||||||
|
];
|
||||||
|
|
||||||
|
const RESOURCES_KEYS_TEXT_CONTENT = [
|
||||||
|
"ExtGState",
|
||||||
|
"Font",
|
||||||
|
"Properties",
|
||||||
|
"XObject",
|
||||||
|
];
|
||||||
|
|
||||||
function getLookupTableFactory(initializer) {
|
function getLookupTableFactory(initializer) {
|
||||||
let lookup;
|
let lookup;
|
||||||
return function () {
|
return function () {
|
||||||
@ -745,6 +762,8 @@ export {
|
|||||||
readUint16,
|
readUint16,
|
||||||
readUint32,
|
readUint32,
|
||||||
recoverJsURL,
|
recoverJsURL,
|
||||||
|
RESOURCES_KEYS_OPERATOR_LIST,
|
||||||
|
RESOURCES_KEYS_TEXT_CONTENT,
|
||||||
stringToAsciiOrUTF16BE,
|
stringToAsciiOrUTF16BE,
|
||||||
stringToUTF16HexString,
|
stringToUTF16HexString,
|
||||||
stringToUTF16String,
|
stringToUTF16String,
|
||||||
|
|||||||
@ -45,6 +45,8 @@ import {
|
|||||||
lookupNormalRect,
|
lookupNormalRect,
|
||||||
MissingDataException,
|
MissingDataException,
|
||||||
PDF_VERSION_REGEXP,
|
PDF_VERSION_REGEXP,
|
||||||
|
RESOURCES_KEYS_OPERATOR_LIST,
|
||||||
|
RESOURCES_KEYS_TEXT_CONTENT,
|
||||||
validateCSSFont,
|
validateCSSFont,
|
||||||
XRefEntryException,
|
XRefEntryException,
|
||||||
XRefParseException,
|
XRefParseException,
|
||||||
@ -419,6 +421,25 @@ class Page {
|
|||||||
await objectLoader.load();
|
await objectLoader.load();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async #getMergedResources(streamDict, keys) {
|
||||||
|
// In rare cases /Resources are also found in the /Contents stream-dict,
|
||||||
|
// in addition to in the /Page dict, hence we need to prefer those when
|
||||||
|
// available (see issue18894.pdf).
|
||||||
|
const localResources = streamDict?.get("Resources");
|
||||||
|
|
||||||
|
if (!(localResources instanceof Dict)) {
|
||||||
|
return this.resources;
|
||||||
|
}
|
||||||
|
const objectLoader = new ObjectLoader(localResources, keys, this.xref);
|
||||||
|
await objectLoader.load();
|
||||||
|
|
||||||
|
return Dict.merge({
|
||||||
|
xref: this.xref,
|
||||||
|
dictArray: [localResources, this.resources],
|
||||||
|
mergeSubDicts: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
async getOperatorList({
|
async getOperatorList({
|
||||||
handler,
|
handler,
|
||||||
sink,
|
sink,
|
||||||
@ -429,15 +450,7 @@ class Page {
|
|||||||
modifiedIds = null,
|
modifiedIds = null,
|
||||||
}) {
|
}) {
|
||||||
const contentStreamPromise = this.getContentStream();
|
const contentStreamPromise = this.getContentStream();
|
||||||
const resourcesPromise = this.loadResources([
|
const resourcesPromise = this.loadResources(RESOURCES_KEYS_OPERATOR_LIST);
|
||||||
"ColorSpace",
|
|
||||||
"ExtGState",
|
|
||||||
"Font",
|
|
||||||
"Pattern",
|
|
||||||
"Properties",
|
|
||||||
"Shading",
|
|
||||||
"XObject",
|
|
||||||
]);
|
|
||||||
|
|
||||||
const partialEvaluator = new PartialEvaluator({
|
const partialEvaluator = new PartialEvaluator({
|
||||||
xref: this.xref,
|
xref: this.xref,
|
||||||
@ -525,11 +538,15 @@ class Page {
|
|||||||
contentStreamPromise,
|
contentStreamPromise,
|
||||||
resourcesPromise,
|
resourcesPromise,
|
||||||
]).then(async ([contentStream]) => {
|
]).then(async ([contentStream]) => {
|
||||||
|
const resources = await this.#getMergedResources(
|
||||||
|
contentStream.dict,
|
||||||
|
RESOURCES_KEYS_OPERATOR_LIST
|
||||||
|
);
|
||||||
const opList = new OperatorList(intent, sink);
|
const opList = new OperatorList(intent, sink);
|
||||||
|
|
||||||
handler.send("StartRenderPage", {
|
handler.send("StartRenderPage", {
|
||||||
transparency: partialEvaluator.hasBlendModes(
|
transparency: partialEvaluator.hasBlendModes(
|
||||||
this.resources,
|
resources,
|
||||||
this.nonBlendModesSet
|
this.nonBlendModesSet
|
||||||
),
|
),
|
||||||
pageIndex: this.pageIndex,
|
pageIndex: this.pageIndex,
|
||||||
@ -539,7 +556,7 @@ class Page {
|
|||||||
await partialEvaluator.getOperatorList({
|
await partialEvaluator.getOperatorList({
|
||||||
stream: contentStream,
|
stream: contentStream,
|
||||||
task,
|
task,
|
||||||
resources: this.resources,
|
resources,
|
||||||
operatorList: opList,
|
operatorList: opList,
|
||||||
});
|
});
|
||||||
return opList;
|
return opList;
|
||||||
@ -642,12 +659,7 @@ class Page {
|
|||||||
sink,
|
sink,
|
||||||
}) {
|
}) {
|
||||||
const contentStreamPromise = this.getContentStream();
|
const contentStreamPromise = this.getContentStream();
|
||||||
const resourcesPromise = this.loadResources([
|
const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT);
|
||||||
"ExtGState",
|
|
||||||
"Font",
|
|
||||||
"Properties",
|
|
||||||
"XObject",
|
|
||||||
]);
|
|
||||||
const langPromise = this.pdfManager.ensureCatalog("lang");
|
const langPromise = this.pdfManager.ensureCatalog("lang");
|
||||||
|
|
||||||
const [contentStream, , lang] = await Promise.all([
|
const [contentStream, , lang] = await Promise.all([
|
||||||
@ -655,6 +667,11 @@ class Page {
|
|||||||
resourcesPromise,
|
resourcesPromise,
|
||||||
langPromise,
|
langPromise,
|
||||||
]);
|
]);
|
||||||
|
const resources = await this.#getMergedResources(
|
||||||
|
contentStream.dict,
|
||||||
|
RESOURCES_KEYS_TEXT_CONTENT
|
||||||
|
);
|
||||||
|
|
||||||
const partialEvaluator = new PartialEvaluator({
|
const partialEvaluator = new PartialEvaluator({
|
||||||
xref: this.xref,
|
xref: this.xref,
|
||||||
handler,
|
handler,
|
||||||
@ -672,7 +689,7 @@ class Page {
|
|||||||
return partialEvaluator.getTextContent({
|
return partialEvaluator.getTextContent({
|
||||||
stream: contentStream,
|
stream: contentStream,
|
||||||
task,
|
task,
|
||||||
resources: this.resources,
|
resources,
|
||||||
includeMarkedContent,
|
includeMarkedContent,
|
||||||
disableNormalization,
|
disableNormalization,
|
||||||
sink,
|
sink,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user