Don't set the MathML namespace for attributes in MathML tags (bug 1997343)

And by default a XML file is UTF-8 encoded so correctly decode the embedded file.
This commit is contained in:
Calixte Denizet 2025-10-30 15:44:05 +01:00
parent 27bb5fb173
commit 6db23139be
5 changed files with 50 additions and 6 deletions

View File

@ -13,7 +13,12 @@
* limitations under the License.
*/
import { AnnotationPrefix, stringToPDFString, warn } from "../shared/util.js";
import {
AnnotationPrefix,
stringToPDFString,
stringToUTF8String,
warn,
} from "../shared/util.js";
import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js";
import { lookupNormalRect, stringToAsciiOrUTF16BE } from "./core_utils.js";
import { BaseStream } from "./base_stream.js";
@ -610,7 +615,8 @@ class StructElementNode {
if (!isName(fileStream.dict.get("Subtype"), "application/mathml+xml")) {
continue;
}
return fileStream.getString();
// The default encoding for xml files is UTF-8.
return stringToUTF8String(fileStream.getString());
}
const A = this.dict.get("A");
if (A instanceof Dict) {

View File

@ -346,6 +346,46 @@ describe("accessibility", () => {
});
});
describe("MathML with some attributes in AF entry from LaTeX", () => {
let pages;
beforeEach(async () => {
pages = await loadAndWait("bug1997343.pdf", ".textLayer");
});
afterEach(async () => {
await closePages(pages);
});
it("must check that the MathML is correctly inserted", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const isSanitizerSupported = await page.evaluate(() => {
try {
// eslint-disable-next-line no-undef
return typeof Sanitizer !== "undefined";
} catch {
return false;
}
});
if (isSanitizerSupported) {
const mathML = await page.$eval(
"span.structTree span[aria-owns='p21R_mc64']",
el => el?.innerHTML ?? ""
);
expect(mathML)
.withContext(`In ${browserName}`)
.toEqual(
'<math display="block"> <msup> <mi>𝑛</mi> <mi>𝑝</mi> </msup> <mo lspace="0.278em" rspace="0.278em">=</mo> <mi>𝑛</mi> <mspace width="1.000em"></mspace> <mi> mod </mi> <mspace width="0.167em"></mspace> <mspace width="0.167em"></mspace> <mi>𝑝</mi> </math>'
);
} else {
pending(`Sanitizer API (in ${browserName}) is not supported`);
}
})
);
});
});
describe("MathML tags in the struct tree", () => {
let pages;

View File

@ -753,3 +753,4 @@
!bug1937438_af_from_latex.pdf
!bug1937438_from_word.pdf
!bug1937438_mml_from_latex.pdf
!bug1997343.pdf

BIN
test/pdfs/bug1997343.pdf Executable file

Binary file not shown.

View File

@ -155,10 +155,7 @@ class MathMLSanitizer {
"accentunder",
"columnspan",
"rowspan",
].map(name => ({
name,
namespace: MathMLNamespace,
})),
],
comments: false,
})
: null