Merge 9f576beee83291b61f1b54e3681671ceca256fd0 into 4aca13e77aa2f2c4b5a133aa43fe27bbdf86ad21
This commit is contained in:
commit
21c3a13774
@ -2317,9 +2317,19 @@ class PartialEvaluator {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Other marked content types aren't supported yet.
|
// Other marked content types aren't supported yet.
|
||||||
|
let props = null;
|
||||||
|
if (args[1] instanceof Dict) {
|
||||||
|
const lang = args[1].get("Lang");
|
||||||
|
if (typeof lang === "string") {
|
||||||
|
props = Object.create(null);
|
||||||
|
props.lang = stringToPDFString(lang);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
args = [
|
args = [
|
||||||
args[0].name,
|
args[0].name,
|
||||||
args[1] instanceof Dict ? args[1].get("MCID") : null,
|
args[1] instanceof Dict ? args[1].get("MCID") : null,
|
||||||
|
props,
|
||||||
];
|
];
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -3505,8 +3515,13 @@ class PartialEvaluator {
|
|||||||
markedContentData.level++;
|
markedContentData.level++;
|
||||||
|
|
||||||
let mcid = null;
|
let mcid = null;
|
||||||
|
let itemLang = null;
|
||||||
if (args[1] instanceof Dict) {
|
if (args[1] instanceof Dict) {
|
||||||
mcid = args[1].get("MCID");
|
mcid = args[1].get("MCID");
|
||||||
|
const langString = args[1].get("Lang");
|
||||||
|
if (typeof langString === "string") {
|
||||||
|
itemLang = stringToPDFString(langString);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
textContent.items.push({
|
textContent.items.push({
|
||||||
type: "beginMarkedContentProps",
|
type: "beginMarkedContentProps",
|
||||||
@ -3514,6 +3529,7 @@ class PartialEvaluator {
|
|||||||
? `${self.idFactory.getPageObjId()}_mc${mcid}`
|
? `${self.idFactory.getPageObjId()}_mc${mcid}`
|
||||||
: null,
|
: null,
|
||||||
tag: args[0] instanceof Name ? args[0].name : null,
|
tag: args[0] instanceof Name ? args[0].name : null,
|
||||||
|
lang: itemLang,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|||||||
@ -1184,6 +1184,8 @@ class PDFDocumentProxy {
|
|||||||
* 'beginMarkedContentProps', or 'endMarkedContent'.
|
* 'beginMarkedContentProps', or 'endMarkedContent'.
|
||||||
* @property {string} id - The marked content identifier. Only used for type
|
* @property {string} id - The marked content identifier. Only used for type
|
||||||
* 'beginMarkedContentProps'.
|
* 'beginMarkedContentProps'.
|
||||||
|
* @property {string|null} tag - The marked content tag.
|
||||||
|
* @property {string|null} lang - The lang attribute for the marked content.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -293,6 +293,9 @@ class TextLayer {
|
|||||||
if (item.id) {
|
if (item.id) {
|
||||||
this.#container.setAttribute("id", `${item.id}`);
|
this.#container.setAttribute("id", `${item.id}`);
|
||||||
}
|
}
|
||||||
|
if (item.lang) {
|
||||||
|
this.#container.setAttribute("lang", item.lang);
|
||||||
|
}
|
||||||
parent.append(this.#container);
|
parent.append(this.#container);
|
||||||
} else if (item.type === "endMarkedContent") {
|
} else if (item.type === "endMarkedContent") {
|
||||||
this.#container = this.#container.parentNode;
|
this.#container = this.#container.parentNode;
|
||||||
|
|||||||
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -754,6 +754,7 @@
|
|||||||
!bug1937438_af_from_latex.pdf
|
!bug1937438_af_from_latex.pdf
|
||||||
!bug1937438_from_word.pdf
|
!bug1937438_from_word.pdf
|
||||||
!bug1937438_mml_from_latex.pdf
|
!bug1937438_mml_from_latex.pdf
|
||||||
|
!marked_content_lang.pdf
|
||||||
!bug1997343.pdf
|
!bug1997343.pdf
|
||||||
!doc_1_3_pages.pdf
|
!doc_1_3_pages.pdf
|
||||||
!doc_2_3_pages.pdf
|
!doc_2_3_pages.pdf
|
||||||
|
|||||||
BIN
test/pdfs/marked_content_lang.pdf
Normal file
BIN
test/pdfs/marked_content_lang.pdf
Normal file
Binary file not shown.
@ -4488,6 +4488,23 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
|||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("gets operatorList, with marked content lang", async function () {
|
||||||
|
const loadingTask = getDocument(
|
||||||
|
buildGetDocumentParams("marked_content_lang.pdf")
|
||||||
|
);
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const opList = await pdfPage.getOperatorList({
|
||||||
|
annotationMode: AnnotationMode.DISABLE,
|
||||||
|
});
|
||||||
|
expect(opList.fnArray[0]).toEqual(OPS.beginMarkedContentProps);
|
||||||
|
expect(opList.argsArray[0][0]).toEqual("P");
|
||||||
|
expect(opList.argsArray[0][2]?.lang).toEqual("en-US");
|
||||||
|
expect(opList.fnArray[10]).toEqual(OPS.beginMarkedContentProps);
|
||||||
|
expect(opList.argsArray[10][0]).toEqual("P");
|
||||||
|
expect(opList.argsArray[10][2]?.lang).toEqual("es-ES");
|
||||||
|
});
|
||||||
|
|
||||||
it("gets operatorList, with page resources containing corrupt /CCITTFaxDecode data", async function () {
|
it("gets operatorList, with page resources containing corrupt /CCITTFaxDecode data", async function () {
|
||||||
const loadingTask = getDocument(
|
const loadingTask = getDocument(
|
||||||
buildGetDocumentParams("poppler-90-0-fuzzed.pdf")
|
buildGetDocumentParams("poppler-90-0-fuzzed.pdf")
|
||||||
|
|||||||
@ -250,4 +250,29 @@ describe("textLayer", function () {
|
|||||||
|
|
||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("handles lang attribute for marked content", async function () {
|
||||||
|
if (isNodeJS) {
|
||||||
|
pending("document.createElement is not supported in Node.js.");
|
||||||
|
}
|
||||||
|
const loadingTask = getDocument(
|
||||||
|
buildGetDocumentParams("marked_content_lang.pdf")
|
||||||
|
);
|
||||||
|
const pdfDocument = await loadingTask.promise;
|
||||||
|
const page = await pdfDocument.getPage(1);
|
||||||
|
|
||||||
|
const container = document.createElement("div");
|
||||||
|
const textLayer = new TextLayer({
|
||||||
|
textContentSource: page.streamTextContent({
|
||||||
|
includeMarkedContent: true,
|
||||||
|
}),
|
||||||
|
container,
|
||||||
|
viewport: page.getViewport({ scale: 1 }),
|
||||||
|
});
|
||||||
|
await textLayer.render();
|
||||||
|
|
||||||
|
const span = container.querySelector("#p17R_mc1");
|
||||||
|
expect(span.getAttribute("lang")).toEqual("es-ES");
|
||||||
|
expect(span.textContent).toEqual("Esto es español");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user