Merge branch 'rcs/assume-all-pages-in-top-level-when-likely' into rcs/assume-all-pages-in-top-level-when-likely-master

This commit is contained in:
Richard Smith (smir) 2024-08-27 10:35:52 +01:00
commit 50300a1d47

View File

@ -122,6 +122,8 @@ function fetchRemoteDest(action) {
return null; return null;
} }
const MIN_PAGES_TO_ASSUME_ALL_TOP_LEVEL = 20;
class Catalog { class Catalog {
constructor(pdfManager, xref) { constructor(pdfManager, xref) {
this.pdfManager = pdfManager; this.pdfManager = pdfManager;
@ -1190,6 +1192,14 @@ class Catalog {
pageDictCache = this.pageDictCache; pageDictCache = this.pageDictCache;
let currentPageIndex = 0; let currentPageIndex = 0;
const getIsPage = async obj => {
let type = obj.getRaw("Type");
if (type instanceof Ref) {
type = await xref.fetchAsync(type);
}
return isName(type, "Page") || !obj.has("Kids");
};
while (nodesToVisit.length) { while (nodesToVisit.length) {
const currentNode = nodesToVisit.pop(); const currentNode = nodesToVisit.pop();
@ -1209,11 +1219,8 @@ class Catalog {
const obj = await (pageDictCache.get(currentNode) || const obj = await (pageDictCache.get(currentNode) ||
xref.fetchAsync(currentNode)); xref.fetchAsync(currentNode));
if (obj instanceof Dict) { if (obj instanceof Dict) {
let type = obj.getRaw("Type"); const isPage = await getIsPage(obj);
if (type instanceof Ref) { if (isPage) {
type = await xref.fetchAsync(type);
}
if (isName(type, "Page") || !obj.has("Kids")) {
// Cache the Page reference, since it can *greatly* improve // Cache the Page reference, since it can *greatly* improve
// performance by reducing redundant lookups in long documents // performance by reducing redundant lookups in long documents
// where all nodes are found at *one* level of the tree. // where all nodes are found at *one* level of the tree.
@ -1288,6 +1295,22 @@ class Catalog {
// Always check all `Kids` nodes, to avoid getting stuck in an empty // Always check all `Kids` nodes, to avoid getting stuck in an empty
// node further down in the tree (see issue5644.pdf, issue8088.pdf), // node further down in the tree (see issue5644.pdf, issue8088.pdf),
// and to ensure that we actually find the correct `Page` dict. // and to ensure that we actually find the correct `Page` dict.
// EXCEPT if it looks like this is likely an "everything in the top level
// /Pages" PDF and there's enough pages to be worth not reading everything
// See if the n'th item is a page
if (
currentNode === this.toplevelPagesDict &&
count >= MIN_PAGES_TO_ASSUME_ALL_TOP_LEVEL &&
count === kids.length
) {
const maybePage = await xref.fetchAsync(kids[pageIndex]);
const isPage = await getIsPage(maybePage);
if (isPage) {
return [maybePage, kids[pageIndex]];
}
}
for (let last = kids.length - 1; last >= 0; last--) { for (let last = kids.length - 1; last >= 0; last--) {
const lastKid = kids[last]; const lastKid = kids[last];
nodesToVisit.push(lastKid); nodesToVisit.push(lastKid);