pdf.js/web/struct_tree_layer_builder.js
Jonas Jenwald 4b8628637a Improve the StructTreeLayerBuilder.render method
In hindsight it occurred to me that there's a couple of smaller issues with this method after it's made asynchronous (in PR 18658).

 - If the `render`-method is invoked back-to-back the existing caching doesn't guarantee that re-parsing won't occur, which we can address by introducing a new (private) promise.

 - If there's any errors fetching and/or parsing the structTree-data, we'd attempt to parse it again on re-rendering despite that being pointless.
2024-09-04 11:53:51 +02:00

194 lines
4.7 KiB
JavaScript

/* Copyright 2021 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { removeNullCharacters } from "./ui_utils.js";
const PDF_ROLE_TO_HTML_ROLE = {
// Document level structure types
Document: null, // There's a "document" role, but it doesn't make sense here.
DocumentFragment: null,
// Grouping level structure types
Part: "group",
Sect: "group", // XXX: There's a "section" role, but it's abstract.
Div: "group",
Aside: "note",
NonStruct: "none",
// Block level structure types
P: null,
// H<n>,
H: "heading",
Title: null,
FENote: "note",
// Sub-block level structure type
Sub: "group",
// General inline level structure types
Lbl: null,
Span: null,
Em: null,
Strong: null,
Link: "link",
Annot: "note",
Form: "form",
// Ruby and Warichu structure types
Ruby: null,
RB: null,
RT: null,
RP: null,
Warichu: null,
WT: null,
WP: null,
// List standard structure types
L: "list",
LI: "listitem",
LBody: null,
// Table standard structure types
Table: "table",
TR: "row",
TH: "columnheader",
TD: "cell",
THead: "columnheader",
TBody: null,
TFoot: null,
// Standard structure type Caption
Caption: null,
// Standard structure type Figure
Figure: "figure",
// Standard structure type Formula
Formula: null,
// standard structure type Artifact
Artifact: null,
};
const HEADING_PATTERN = /^H(\d+)$/;
class StructTreeLayerBuilder {
#promise;
#treeDom = null;
#treePromise;
#elementAttributes = new Map();
constructor(pdfPage) {
this.#promise = pdfPage.getStructTree();
}
async render() {
if (this.#treePromise) {
return this.#treePromise;
}
const { promise, resolve, reject } = Promise.withResolvers();
this.#treePromise = promise;
try {
this.#treeDom = this.#walk(await this.#promise);
} catch (ex) {
reject(ex);
}
this.#promise = null;
this.#treeDom?.classList.add("structTree");
resolve(this.#treeDom);
return promise;
}
async getAriaAttributes(annotationId) {
await this.render();
return this.#elementAttributes.get(annotationId);
}
hide() {
if (this.#treeDom && !this.#treeDom.hidden) {
this.#treeDom.hidden = true;
}
}
show() {
if (this.#treeDom?.hidden) {
this.#treeDom.hidden = false;
}
}
#setAttributes(structElement, htmlElement) {
const { alt, id, lang } = structElement;
if (alt !== undefined) {
// Don't add the label in the struct tree layer but on the annotation
// in the annotation layer.
let added = false;
const label = removeNullCharacters(alt);
for (const child of structElement.children) {
if (child.type === "annotation") {
let attrs = this.#elementAttributes.get(child.id);
if (!attrs) {
attrs = new Map();
this.#elementAttributes.set(child.id, attrs);
}
attrs.set("aria-label", label);
added = true;
}
}
if (!added) {
htmlElement.setAttribute("aria-label", label);
}
}
if (id !== undefined) {
htmlElement.setAttribute("aria-owns", id);
}
if (lang !== undefined) {
htmlElement.setAttribute(
"lang",
removeNullCharacters(lang, /* replaceInvisible = */ true)
);
}
}
#walk(node) {
if (!node) {
return null;
}
const element = document.createElement("span");
if ("role" in node) {
const { role } = node;
const match = role.match(HEADING_PATTERN);
if (match) {
element.setAttribute("role", "heading");
element.setAttribute("aria-level", match[1]);
} else if (PDF_ROLE_TO_HTML_ROLE[role]) {
element.setAttribute("role", PDF_ROLE_TO_HTML_ROLE[role]);
}
}
this.#setAttributes(node, element);
if (node.children) {
if (node.children.length === 1 && "id" in node.children[0]) {
// Often there is only one content node so just set the values on the
// parent node to avoid creating an extra span.
this.#setAttributes(node.children[0], element);
} else {
for (const kid of node.children) {
element.append(this.#walk(kid));
}
}
}
return element;
}
}
export { StructTreeLayerBuilder };