Extract Lang attribute for marked contents
This commit is contained in:
parent
de7179fd74
commit
e4569c5d22
@ -2317,9 +2317,19 @@ class PartialEvaluator {
|
||||
return;
|
||||
}
|
||||
// Other marked content types aren't supported yet.
|
||||
let props = null;
|
||||
if (args[1] instanceof Dict) {
|
||||
const lang = args[1].get("Lang");
|
||||
if (typeof lang === "string") {
|
||||
props = Object.create(null);
|
||||
props.lang = stringToPDFString(lang);
|
||||
}
|
||||
}
|
||||
|
||||
args = [
|
||||
args[0].name,
|
||||
args[1] instanceof Dict ? args[1].get("MCID") : null,
|
||||
props,
|
||||
];
|
||||
|
||||
break;
|
||||
@ -3505,8 +3515,13 @@ class PartialEvaluator {
|
||||
markedContentData.level++;
|
||||
|
||||
let mcid = null;
|
||||
let itemLang = null;
|
||||
if (args[1] instanceof Dict) {
|
||||
mcid = args[1].get("MCID");
|
||||
const langString = args[1].get("Lang");
|
||||
if (typeof langString === "string") {
|
||||
itemLang = stringToPDFString(langString);
|
||||
}
|
||||
}
|
||||
textContent.items.push({
|
||||
type: "beginMarkedContentProps",
|
||||
@ -3514,6 +3529,7 @@ class PartialEvaluator {
|
||||
? `${self.idFactory.getPageObjId()}_mc${mcid}`
|
||||
: null,
|
||||
tag: args[0] instanceof Name ? args[0].name : null,
|
||||
lang: itemLang,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
@ -1166,6 +1166,8 @@ class PDFDocumentProxy {
|
||||
* 'beginMarkedContentProps', or 'endMarkedContent'.
|
||||
* @property {string} id - The marked content identifier. Only used for type
|
||||
* 'beginMarkedContentProps'.
|
||||
* @property {string|null} tag - The marked content tag.
|
||||
* @property {string|null} lang - The lang attribute for the marked content.
|
||||
*/
|
||||
|
||||
/**
|
||||
|
||||
@ -293,6 +293,9 @@ class TextLayer {
|
||||
if (item.id) {
|
||||
this.#container.setAttribute("id", `${item.id}`);
|
||||
}
|
||||
if (item.lang) {
|
||||
this.#container.setAttribute("lang", item.lang);
|
||||
}
|
||||
parent.append(this.#container);
|
||||
} else if (item.type === "endMarkedContent") {
|
||||
this.#container = this.#container.parentNode;
|
||||
|
||||
@ -4488,6 +4488,27 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets operatorList, with marked content lang", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("marked_content_lang.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
|
||||
pdfDoc.annotationStorage.setValue("30R", { value: "test" });
|
||||
pdfDoc.annotationStorage.setValue("31R", { value: true });
|
||||
|
||||
const opList = await pdfPage.getOperatorList({
|
||||
annotationMode: AnnotationMode.DISABLE,
|
||||
});
|
||||
expect(opList.fnArray[0]).toEqual(OPS.beginMarkedContentProps);
|
||||
expect(opList.argsArray[0][0]).toEqual("P");
|
||||
expect(opList.argsArray[0][2]?.lang).toEqual("en-US");
|
||||
expect(opList.fnArray[10]).toEqual(OPS.beginMarkedContentProps);
|
||||
expect(opList.argsArray[10][0]).toEqual("P");
|
||||
expect(opList.argsArray[10][2]?.lang).toEqual("es-ES");
|
||||
});
|
||||
|
||||
it("gets operatorList, with page resources containing corrupt /CCITTFaxDecode data", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("poppler-90-0-fuzzed.pdf")
|
||||
|
||||
@ -250,4 +250,26 @@ describe("textLayer", function () {
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("handles lang attribute for marked content", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("marked_content_lang.pdf")
|
||||
);
|
||||
const pdfDocument = await loadingTask.promise;
|
||||
const page = await pdfDocument.getPage(1);
|
||||
|
||||
const container = document.createElement("div");
|
||||
const textLayer = new TextLayer({
|
||||
textContentSource: page.streamTextContent({
|
||||
includeMarkedContent: true,
|
||||
}),
|
||||
container,
|
||||
viewport: page.getViewport({ scale: 1 }),
|
||||
});
|
||||
await textLayer.render();
|
||||
|
||||
const span = container.querySelector("#p17R_mc1");
|
||||
expect(span.getAttribute("lang")).toEqual("es-ES");
|
||||
expect(span.textContent).toEqual("Esto es español");
|
||||
});
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user