Correctly compute the mapping between text and normalized text when it contains a compound word on two lines
It fixes #19120. The original text doesn't contain the cr so we must take that into account.
This commit is contained in:
parent
22babd722f
commit
aa9503e51f
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -682,3 +682,4 @@
|
|||||||
!bug1922766.pdf
|
!bug1922766.pdf
|
||||||
!issue18956.pdf
|
!issue18956.pdf
|
||||||
!issue19083.pdf
|
!issue19083.pdf
|
||||||
|
!issue19120.pdf
|
||||||
|
|||||||
BIN
test/pdfs/issue19120.pdf
Executable file
BIN
test/pdfs/issue19120.pdf
Executable file
Binary file not shown.
@ -1062,15 +1062,16 @@ describe("pdf_find_controller", function () {
|
|||||||
await testOnFind({ eventBus });
|
await testOnFind({ eventBus });
|
||||||
});
|
});
|
||||||
|
|
||||||
it("performs a search in a text with compound word on two lines", async function () {
|
it("performs a search in a text with a compound word on two lines", async function () {
|
||||||
const { eventBus, pdfFindController } =
|
const { eventBus, pdfFindController } =
|
||||||
await initPdfFindController("issue18693.pdf");
|
await initPdfFindController("issue18693.pdf");
|
||||||
|
|
||||||
|
const query = "hel-Lo";
|
||||||
await testSearch({
|
await testSearch({
|
||||||
eventBus,
|
eventBus,
|
||||||
pdfFindController,
|
pdfFindController,
|
||||||
state: {
|
state: {
|
||||||
query: "hel-Lo",
|
query,
|
||||||
},
|
},
|
||||||
matchesPerPage: [1],
|
matchesPerPage: [1],
|
||||||
selectedMatch: {
|
selectedMatch: {
|
||||||
@ -1078,7 +1079,28 @@ describe("pdf_find_controller", function () {
|
|||||||
matchIndex: 0,
|
matchIndex: 0,
|
||||||
},
|
},
|
||||||
pageMatches: [[6]],
|
pageMatches: [[6]],
|
||||||
pageMatchesLength: [[7]],
|
pageMatchesLength: [[query.length]],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("performs a search after a compound word on two lines", async function () {
|
||||||
|
const { eventBus, pdfFindController } =
|
||||||
|
await initPdfFindController("issue19120.pdf");
|
||||||
|
|
||||||
|
const query = "a";
|
||||||
|
await testSearch({
|
||||||
|
eventBus,
|
||||||
|
pdfFindController,
|
||||||
|
state: {
|
||||||
|
query,
|
||||||
|
},
|
||||||
|
matchesPerPage: [3],
|
||||||
|
selectedMatch: {
|
||||||
|
pageIndex: 0,
|
||||||
|
matchIndex: 0,
|
||||||
|
},
|
||||||
|
pageMatches: [[0, 4, 15]],
|
||||||
|
pageMatchesLength: [[query.length, query.length, query.length]],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@ -282,8 +282,7 @@ function normalize(text) {
|
|||||||
|
|
||||||
if (p5) {
|
if (p5) {
|
||||||
// Compound word with a line break after the hyphen.
|
// Compound word with a line break after the hyphen.
|
||||||
positions.push([i - shift + 3, 1 + shift]);
|
// Since the \n isn't in the original text, o = 3 and n = 3.
|
||||||
shift += 1;
|
|
||||||
shiftOrigin += 1;
|
shiftOrigin += 1;
|
||||||
eol += 1;
|
eol += 1;
|
||||||
return p5.replace("\n", "");
|
return p5.replace("\n", "");
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user