Correctly compute the mapping between text and normalized text when it contains a compound word on two lines

It fixes #19120.

The original text doesn't contain the cr so we must take that into account.
This commit is contained in:
Calixte Denizet 2024-11-28 15:56:00 +01:00
parent 22babd722f
commit aa9503e51f
4 changed files with 27 additions and 5 deletions

View File

@ -682,3 +682,4 @@
!bug1922766.pdf !bug1922766.pdf
!issue18956.pdf !issue18956.pdf
!issue19083.pdf !issue19083.pdf
!issue19120.pdf

BIN
test/pdfs/issue19120.pdf Executable file

Binary file not shown.

View File

@ -1062,15 +1062,16 @@ describe("pdf_find_controller", function () {
await testOnFind({ eventBus }); await testOnFind({ eventBus });
}); });
it("performs a search in a text with compound word on two lines", async function () { it("performs a search in a text with a compound word on two lines", async function () {
const { eventBus, pdfFindController } = const { eventBus, pdfFindController } =
await initPdfFindController("issue18693.pdf"); await initPdfFindController("issue18693.pdf");
const query = "hel-Lo";
await testSearch({ await testSearch({
eventBus, eventBus,
pdfFindController, pdfFindController,
state: { state: {
query: "hel-Lo", query,
}, },
matchesPerPage: [1], matchesPerPage: [1],
selectedMatch: { selectedMatch: {
@ -1078,7 +1079,28 @@ describe("pdf_find_controller", function () {
matchIndex: 0, matchIndex: 0,
}, },
pageMatches: [[6]], pageMatches: [[6]],
pageMatchesLength: [[7]], pageMatchesLength: [[query.length]],
});
});
it("performs a search after a compound word on two lines", async function () {
const { eventBus, pdfFindController } =
await initPdfFindController("issue19120.pdf");
const query = "a";
await testSearch({
eventBus,
pdfFindController,
state: {
query,
},
matchesPerPage: [3],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[0, 4, 15]],
pageMatchesLength: [[query.length, query.length, query.length]],
}); });
}); });

View File

@ -282,8 +282,7 @@ function normalize(text) {
if (p5) { if (p5) {
// Compound word with a line break after the hyphen. // Compound word with a line break after the hyphen.
positions.push([i - shift + 3, 1 + shift]); // Since the \n isn't in the original text, o = 3 and n = 3.
shift += 1;
shiftOrigin += 1; shiftOrigin += 1;
eol += 1; eol += 1;
return p5.replace("\n", ""); return p5.replace("\n", "");