Get the text under highlight/squiggly/underline/strikethrough annotations (bug 1885505)

and add an invisible element containing the text in the annotation layer to make
it readable by a screen reader.
This commit is contained in:
Calixte Denizet 2025-06-18 21:05:53 +02:00
parent 7d3b2a6d3f
commit 3bdc5d54fe
10 changed files with 449 additions and 10 deletions

View File

@ -1247,6 +1247,10 @@ class Annotation {
return null; return null;
} }
get overlaysTextContent() {
return false;
}
get hasTextContent() { get hasTextContent() {
return false; return false;
} }
@ -4711,6 +4715,10 @@ class HighlightAnnotation extends MarkupAnnotation {
} }
} }
get overlaysTextContent() {
return true;
}
static createNewDict(annotation, xref, { apRef, ap }) { static createNewDict(annotation, xref, { apRef, ap }) {
const { color, oldAnnotation, opacity, rect, rotation, user, quadPoints } = const { color, oldAnnotation, opacity, rect, rotation, user, quadPoints } =
annotation; annotation;
@ -4835,6 +4843,10 @@ class UnderlineAnnotation extends MarkupAnnotation {
this.data.popupRef = null; this.data.popupRef = null;
} }
} }
get overlaysTextContent() {
return true;
}
} }
class SquigglyAnnotation extends MarkupAnnotation { class SquigglyAnnotation extends MarkupAnnotation {
@ -4879,6 +4891,10 @@ class SquigglyAnnotation extends MarkupAnnotation {
this.data.popupRef = null; this.data.popupRef = null;
} }
} }
get overlaysTextContent() {
return true;
}
} }
class StrikeOutAnnotation extends MarkupAnnotation { class StrikeOutAnnotation extends MarkupAnnotation {
@ -4918,6 +4934,10 @@ class StrikeOutAnnotation extends MarkupAnnotation {
this.data.popupRef = null; this.data.popupRef = null;
} }
} }
get overlaysTextContent() {
return true;
}
} }
class StampAnnotation extends MarkupAnnotation { class StampAnnotation extends MarkupAnnotation {

View File

@ -66,6 +66,7 @@ import { calculateMD5 } from "./calculate_md5.js";
import { Catalog } from "./catalog.js"; import { Catalog } from "./catalog.js";
import { clearGlobalCaches } from "./cleanup_helper.js"; import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js"; import { DatasetReader } from "./dataset_reader.js";
import { Intersector } from "./intersector.js";
import { Linearization } from "./parser.js"; import { Linearization } from "./parser.js";
import { NullStream } from "./stream.js"; import { NullStream } from "./stream.js";
import { ObjectLoader } from "./object_loader.js"; import { ObjectLoader } from "./object_loader.js";
@ -632,6 +633,7 @@ class Page {
includeMarkedContent, includeMarkedContent,
disableNormalization, disableNormalization,
sink, sink,
intersector = null,
}) { }) {
const contentStreamPromise = this.getContentStream(); const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT); const resourcesPromise = this.loadResources(RESOURCES_KEYS_TEXT_CONTENT);
@ -658,6 +660,7 @@ class Page {
sink, sink,
viewBox: this.view, viewBox: this.view,
lang, lang,
intersector,
}); });
} }
@ -707,6 +710,8 @@ class Page {
intentDisplay = !!(intent & RenderingIntentFlag.DISPLAY), intentDisplay = !!(intent & RenderingIntentFlag.DISPLAY),
intentPrint = !!(intent & RenderingIntentFlag.PRINT); intentPrint = !!(intent & RenderingIntentFlag.PRINT);
const highlightedAnnotations = [];
for (const annotation of annotations) { for (const annotation of annotations) {
// Get the annotation even if it's hidden because // Get the annotation even if it's hidden because
// JS can change its display. // JS can change its display.
@ -732,9 +737,29 @@ class Page {
); );
}) })
); );
} else if (annotation.overlaysTextContent && isVisible) {
highlightedAnnotations.push(annotation);
} }
} }
if (highlightedAnnotations.length > 0) {
const intersector = new Intersector(highlightedAnnotations);
textContentPromises.push(
this.extractTextContent({
handler,
task,
includeMarkedContent: false,
disableNormalization: false,
sink: null,
viewBox: this.view,
lang: null,
intersector,
}).then(() => {
intersector.setText();
})
);
}
await Promise.all(textContentPromises); await Promise.all(textContentPromises);
return annotationsData; return annotationsData;
} }

View File

@ -2362,6 +2362,7 @@ class PartialEvaluator {
disableNormalization = false, disableNormalization = false,
keepWhiteSpace = false, keepWhiteSpace = false,
prevRefs = null, prevRefs = null,
intersector = null,
}) { }) {
const objId = stream.dict?.objId; const objId = stream.dict?.objId;
const seenRefs = new RefSet(prevRefs); const seenRefs = new RefSet(prevRefs);
@ -2506,6 +2507,7 @@ class PartialEvaluator {
transform = textContentItem.prevTransform, transform = textContentItem.prevTransform,
fontName = textContentItem.fontName, fontName = textContentItem.fontName,
}) { }) {
intersector?.addExtraChar(" ");
textContent.items.push({ textContent.items.push({
str: " ", str: " ",
dir: "ltr", dir: "ltr",
@ -2964,9 +2966,21 @@ class PartialEvaluator {
if (!font.vertical) { if (!font.vertical) {
scaledDim *= textState.textHScale; scaledDim *= textState.textHScale;
intersector?.addGlyph(
getCurrentTextTransform(),
scaledDim,
0,
glyph.unicode
);
textState.translateTextMatrix(scaledDim, 0); textState.translateTextMatrix(scaledDim, 0);
textChunk.width += scaledDim; textChunk.width += scaledDim;
} else { } else {
intersector?.addGlyph(
getCurrentTextTransform(),
0,
scaledDim,
glyph.unicode
);
textState.translateTextMatrix(0, scaledDim); textState.translateTextMatrix(0, scaledDim);
scaledDim = Math.abs(scaledDim); scaledDim = Math.abs(scaledDim);
textChunk.height += scaledDim; textChunk.height += scaledDim;
@ -2985,8 +2999,12 @@ class PartialEvaluator {
// alignment issues between the textLayer and the canvas if the text // alignment issues between the textLayer and the canvas if the text
// contains e.g. tabs (fixes issue6612.pdf). // contains e.g. tabs (fixes issue6612.pdf).
textChunk.str.push(" "); textChunk.str.push(" ");
intersector?.addExtraChar(" ");
} }
if (!intersector) {
textChunk.str.push(glyphUnicode); textChunk.str.push(glyphUnicode);
}
if (charSpacing) { if (charSpacing) {
if (!font.vertical) { if (!font.vertical) {
@ -3002,6 +3020,7 @@ class PartialEvaluator {
} }
function appendEOL() { function appendEOL() {
intersector?.addExtraChar("\n");
resetLastChars(); resetLastChars();
if (textContentItem.initialized) { if (textContentItem.initialized) {
textContentItem.hasEOL = true; textContentItem.hasEOL = true;
@ -3027,6 +3046,7 @@ class PartialEvaluator {
if (textContentItem.initialized) { if (textContentItem.initialized) {
resetLastChars(); resetLastChars();
textContentItem.str.push(" "); textContentItem.str.push(" ");
intersector?.addExtraChar(" ");
} }
return false; return false;
} }
@ -3078,7 +3098,7 @@ class PartialEvaluator {
if (batch && length < TEXT_CHUNK_BATCH_SIZE) { if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
return; return;
} }
sink.enqueue(textContent, length); sink?.enqueue(textContent, length);
textContent.items = []; textContent.items = [];
textContent.styles = Object.create(null); textContent.styles = Object.create(null);
} }
@ -3088,7 +3108,7 @@ class PartialEvaluator {
return new Promise(function promiseBody(resolve, reject) { return new Promise(function promiseBody(resolve, reject) {
const next = function (promise) { const next = function (promise) {
enqueueChunk(/* batch = */ true); enqueueChunk(/* batch = */ true);
Promise.all([promise, sink.ready]).then(function () { Promise.all([promise, sink?.ready]).then(function () {
try { try {
promiseBody(resolve, reject); promiseBody(resolve, reject);
} catch (ex) { } catch (ex) {
@ -3341,7 +3361,7 @@ class PartialEvaluator {
}, },
get desiredSize() { get desiredSize() {
return sink.desiredSize; return sink.desiredSize ?? 0;
}, },
get ready() { get ready() {
@ -3359,7 +3379,7 @@ class PartialEvaluator {
: resources, : resources,
stateManager: xObjStateManager, stateManager: xObjStateManager,
includeMarkedContent, includeMarkedContent,
sink: sinkWrapper, sink: sink && sinkWrapper,
seenStyles, seenStyles,
viewBox, viewBox,
lang, lang,
@ -3499,7 +3519,7 @@ class PartialEvaluator {
} }
break; break;
} // switch } // switch
if (textContent.items.length >= sink.desiredSize) { if (textContent.items.length >= (sink?.desiredSize ?? 1)) {
// Wait for ready, if we reach highWaterMark. // Wait for ready, if we reach highWaterMark.
stop = true; stop = true;
break; break;

203
src/core/intersector.js Normal file
View File

@ -0,0 +1,203 @@
/* Copyright 2025 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
class SingleIntersector {
#annotation;
#minX = Infinity;
#minY = Infinity;
#maxX = -Infinity;
#maxY = -Infinity;
#quadPoints;
#text = [];
#extraChars = [];
#lastIntersectingQuadIndex = -1;
#canTakeExtraChars = false;
constructor(annotation) {
this.#annotation = annotation;
const quadPoints = (this.#quadPoints = annotation.data.quadPoints);
for (let i = 0, ii = quadPoints.length; i < ii; i += 8) {
this.#minX = Math.min(this.#minX, quadPoints[i]);
this.#maxX = Math.max(this.#maxX, quadPoints[i + 2]);
this.#minY = Math.min(this.#minY, quadPoints[i + 5]);
this.#maxY = Math.max(this.#maxY, quadPoints[i + 1]);
}
}
overlaps(other) {
return !(
this.#minX >= other.#maxX ||
this.#maxX <= other.#minX ||
this.#minY >= other.#maxY ||
this.#maxY <= other.#minY
);
}
/**
* Check if the given point intersects with the annotation's quad points.
* The point (x, y) is supposed to be the center of the glyph.
* @param {number} x
* @param {number} y
* @returns {boolean}
*/
#intersects(x, y) {
if (
this.#minX >= x ||
this.#maxX <= x ||
this.#minY >= y ||
this.#maxY <= y
) {
return false;
}
const quadPoints = this.#quadPoints;
if (quadPoints.length === 8) {
// We've only one quad, so if we intersect min/max bounds then we
// intersect the quad.
return true;
}
if (this.#lastIntersectingQuadIndex >= 0) {
const i = this.#lastIntersectingQuadIndex;
if (
!(
quadPoints[i] >= x ||
quadPoints[i + 2] <= x ||
quadPoints[i + 5] >= y ||
quadPoints[i + 1] <= y
)
) {
return true;
}
this.#lastIntersectingQuadIndex = -1;
}
for (let i = 0, ii = quadPoints.length; i < ii; i += 8) {
if (
!(
quadPoints[i] >= x ||
quadPoints[i + 2] <= x ||
quadPoints[i + 5] >= y ||
quadPoints[i + 1] <= y
)
) {
this.#lastIntersectingQuadIndex = i;
return true;
}
}
return false;
}
addGlyph(x, y, glyph) {
if (!this.#intersects(x, y)) {
this.disableExtraChars();
return false;
}
if (this.#extraChars.length > 0) {
this.#text.push(this.#extraChars.join(""));
this.#extraChars.length = 0;
}
this.#text.push(glyph);
this.#canTakeExtraChars = true;
return true;
}
addExtraChar(char) {
if (this.#canTakeExtraChars) {
this.#extraChars.push(char);
}
}
disableExtraChars() {
if (!this.#canTakeExtraChars) {
return;
}
this.#canTakeExtraChars = false;
this.#extraChars.length = 0;
}
setText() {
this.#annotation.data.overlaidText = this.#text.join("");
}
}
class Intersector {
#intersectors = new Map();
constructor(annotations) {
for (const annotation of annotations) {
if (!annotation.data.quadPoints) {
continue;
}
const intersector = new SingleIntersector(annotation);
for (const [otherIntersector, overlapping] of this.#intersectors) {
if (otherIntersector.overlaps(intersector)) {
if (!overlapping) {
this.#intersectors.set(otherIntersector, new Set([intersector]));
} else {
overlapping.add(intersector);
}
}
}
this.#intersectors.set(intersector, null);
}
}
addGlyph(transform, width, height, glyph) {
const x = transform[4] + width / 2;
const y = transform[5] + height / 2;
let overlappingIntersectors;
for (const [intersector, overlapping] of this.#intersectors) {
if (overlappingIntersectors) {
if (overlappingIntersectors.has(intersector)) {
intersector.addGlyph(x, y, glyph);
} else {
intersector.disableExtraChars();
}
continue;
}
if (!intersector.addGlyph(x, y, glyph)) {
continue;
}
overlappingIntersectors = overlapping;
}
}
addExtraChar(char) {
for (const intersector of this.#intersectors.keys()) {
intersector.addExtraChar(char);
}
}
setText() {
for (const intersector of this.#intersectors.keys()) {
intersector.setText();
}
}
}
export { Intersector };

View File

@ -555,6 +555,7 @@ class AnnotationElement {
svg.classList.add("quadrilateralsContainer"); svg.classList.add("quadrilateralsContainer");
svg.setAttribute("width", 0); svg.setAttribute("width", 0);
svg.setAttribute("height", 0); svg.setAttribute("height", 0);
svg.role = "none";
const defs = svgFactory.createElement("defs"); const defs = svgFactory.createElement("defs");
svg.append(defs); svg.append(defs);
const clipPath = svgFactory.createElement("clipPath"); const clipPath = svgFactory.createElement("clipPath");
@ -2912,13 +2913,23 @@ class HighlightAnnotationElement extends AnnotationElement {
} }
render() { render() {
if (!this.data.popupRef && this.hasPopupData) { const {
data: { overlaidText, popupRef },
} = this;
if (!popupRef && this.hasPopupData) {
this._createPopup(); this._createPopup();
} }
this.container.classList.add("highlightAnnotation"); this.container.classList.add("highlightAnnotation");
this._editOnDoubleClick(); this._editOnDoubleClick();
if (overlaidText) {
const mark = document.createElement("mark");
mark.classList.add("overlaidText");
mark.textContent = overlaidText;
this.container.append(mark);
}
return this.container; return this.container;
} }
} }
@ -2933,11 +2944,22 @@ class UnderlineAnnotationElement extends AnnotationElement {
} }
render() { render() {
if (!this.data.popupRef && this.hasPopupData) { const {
data: { overlaidText, popupRef },
} = this;
if (!popupRef && this.hasPopupData) {
this._createPopup(); this._createPopup();
} }
this.container.classList.add("underlineAnnotation"); this.container.classList.add("underlineAnnotation");
if (overlaidText) {
const underline = document.createElement("u");
underline.classList.add("overlaidText");
underline.textContent = overlaidText;
this.container.append(underline);
}
return this.container; return this.container;
} }
} }
@ -2952,11 +2974,22 @@ class SquigglyAnnotationElement extends AnnotationElement {
} }
render() { render() {
if (!this.data.popupRef && this.hasPopupData) { const {
data: { overlaidText, popupRef },
} = this;
if (!popupRef && this.hasPopupData) {
this._createPopup(); this._createPopup();
} }
this.container.classList.add("squigglyAnnotation"); this.container.classList.add("squigglyAnnotation");
if (overlaidText) {
const underline = document.createElement("u");
underline.classList.add("overlaidText");
underline.textContent = overlaidText;
this.container.append(underline);
}
return this.container; return this.container;
} }
} }
@ -2971,11 +3004,22 @@ class StrikeOutAnnotationElement extends AnnotationElement {
} }
render() { render() {
if (!this.data.popupRef && this.hasPopupData) { const {
data: { overlaidText, popupRef },
} = this;
if (!popupRef && this.hasPopupData) {
this._createPopup(); this._createPopup();
} }
this.container.classList.add("strikeoutAnnotation"); this.container.classList.add("strikeoutAnnotation");
if (overlaidText) {
const strikeout = document.createElement("s");
strikeout.classList.add("overlaidText");
strikeout.textContent = overlaidText;
this.container.append(strikeout);
}
return this.container; return this.container;
} }
} }

View File

@ -696,4 +696,77 @@ describe("ResetForm action", () => {
}); });
}); });
}); });
describe("Text under some annotations", () => {
describe("bug1885505.pdf", () => {
let pages;
beforeEach(async () => {
pages = await loadAndWait(
"bug1885505.pdf",
":is(" +
[56, 58, 60, 65]
.map(id => `[data-annotation-id='${id}R']`)
.join(", ") +
")"
);
});
afterEach(async () => {
await closePages(pages);
});
it("must check that the text under a highlight annotation exist in the DOM", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const text = await page.$eval(
"[data-annotation-id='56R'] mark",
el => el.textContent
);
expect(text).withContext(`In ${browserName}`).toEqual("Languages");
})
);
});
it("must check that the text under an underline annotation exist in the DOM", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const text = await page.$eval(
"[data-annotation-id='58R'] u",
el => el.textContent
);
expect(text).withContext(`In ${browserName}`).toEqual("machine");
})
);
});
it("must check that the text under a squiggly annotation exist in the DOM", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const text = await page.$eval(
"[data-annotation-id='60R'] u",
el => el.textContent
);
expect(text).withContext(`In ${browserName}`)
.toEqual(`paths through nested loops. We have implemented
a dynamic compiler for JavaScript based on our`);
})
);
});
it("must check that the text under a strikeout annotation exist in the DOM", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
const text = await page.$eval(
"[data-annotation-id='65R'] s",
el => el.textContent
);
expect(text)
.withContext(`In ${browserName}`)
.toEqual("Experimentation,");
})
);
});
});
});
}); });

View File

@ -727,3 +727,4 @@
!bug1963407.pdf !bug1963407.pdf
!issue19517.pdf !issue19517.pdf
!empty#hash.pdf !empty#hash.pdf
!bug1885505.pdf

BIN
test/pdfs/bug1885505.pdf Executable file

Binary file not shown.

View File

@ -5127,4 +5127,47 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
} }
); );
}); });
describe("Annotations", function () {
it("should extract the text under some annotations", async function () {
const loadingTask = getDocument(buildGetDocumentParams("bug1885505.pdf"));
const pdfDoc = await loadingTask.promise;
const page1 = await pdfDoc.getPage(1);
const annots = await page1.getAnnotations();
let annot = annots.find(x => x.id === "56R");
expect(annot.overlaidText).toEqual("Languages");
annot = annots.find(x => x.id === "52R");
expect(annot.overlaidText)
.toEqual(`Dynamic languages such as JavaScript are more difficult to com-
pile than statically typed ones. Since no concrete type information
is available, traditional compilers`);
annot = annots.find(x => x.id === "54R");
expect(annot.overlaidText)
.toEqual(`typed ones. Since no concrete type information
is available, traditional compilers need to emit generic code that can
handle all possible type combinations at runtime. We present an al-
ternative compilation technique for dynamically-`);
annot = annots.find(x => x.id === "58R");
expect(annot.overlaidText).toEqual("machine");
annot = annots.find(x => x.id === "60R");
expect(annot.overlaidText)
.toEqual(`paths through nested loops. We have implemented
a dynamic compiler for JavaScript based on our`);
annot = annots.find(x => x.id === "65R");
expect(annot.overlaidText).toEqual("Experimentation,");
annot = annots.find(x => x.id === "63R");
expect(annot.overlaidText)
.toEqual(`languages such as JavaScript, Python, and Ruby, are pop-
ular since they are expressive, accessible to non-experts, and make
deployment as easy as distributing a source file. They are used for
small scripts as well as for`);
});
});
}); });

View File

@ -126,6 +126,16 @@
display: none; display: none;
} }
} }
.overlaidText {
position: absolute;
top: 0;
left: 0;
width: 0;
height: 0;
display: inline-block;
overflow: hidden;
}
} }
.textLayer.selecting ~ & section { .textLayer.selecting ~ & section {