Merge pull request #20456 from calixteman/issue20225
When searching for a group of punctuation signs, only add extraspaces around the group
This commit is contained in:
commit
d4b6464675
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -762,3 +762,4 @@
|
|||||||
!extract_link.pdf
|
!extract_link.pdf
|
||||||
!two_paragraphs.pdf
|
!two_paragraphs.pdf
|
||||||
!paragraph_and_link.pdf
|
!paragraph_and_link.pdf
|
||||||
|
!issue20225.pdf
|
||||||
|
|||||||
BIN
test/pdfs/issue20225.pdf
Executable file
BIN
test/pdfs/issue20225.pdf
Executable file
Binary file not shown.
@ -634,8 +634,8 @@ describe("pdf_find_controller", function () {
|
|||||||
pageIndex: 0,
|
pageIndex: 0,
|
||||||
matchIndex: 0,
|
matchIndex: 0,
|
||||||
},
|
},
|
||||||
pageMatches: [[1497]],
|
pageMatches: [[1498]],
|
||||||
pageMatchesLength: [[25]],
|
pageMatchesLength: [[24]],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -1138,6 +1138,26 @@ describe("pdf_find_controller", function () {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("performs a search with a group of punctuation signs", async () => {
|
||||||
|
const { eventBus, pdfFindController } =
|
||||||
|
await initPdfFindController("issue20225.pdf");
|
||||||
|
|
||||||
|
await testSearch({
|
||||||
|
eventBus,
|
||||||
|
pdfFindController,
|
||||||
|
state: {
|
||||||
|
query: "....",
|
||||||
|
},
|
||||||
|
matchesPerPage: [1],
|
||||||
|
selectedMatch: {
|
||||||
|
pageIndex: 0,
|
||||||
|
matchIndex: 0,
|
||||||
|
},
|
||||||
|
pageMatches: [[8]],
|
||||||
|
pageMatchesLength: [[4]],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("custom matcher", () => {
|
describe("custom matcher", () => {
|
||||||
it("calls to the matcher with the right arguments", async () => {
|
it("calls to the matcher with the right arguments", async () => {
|
||||||
const QUERY = "Foo bar";
|
const QUERY = "Foo bar";
|
||||||
|
|||||||
@ -78,7 +78,7 @@ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below.
|
|||||||
|
|
||||||
const DIACRITICS_REG_EXP = /\p{M}+/gu;
|
const DIACRITICS_REG_EXP = /\p{M}+/gu;
|
||||||
const SPECIAL_CHARS_REG_EXP =
|
const SPECIAL_CHARS_REG_EXP =
|
||||||
/([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu;
|
/([*+^${}()|[\]\\])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu;
|
||||||
const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
|
const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
|
||||||
const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;
|
const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;
|
||||||
|
|
||||||
@ -708,6 +708,18 @@ class PDFFindController {
|
|||||||
#convertToRegExpString(query, hasDiacritics) {
|
#convertToRegExpString(query, hasDiacritics) {
|
||||||
const { matchDiacritics } = this.#state;
|
const { matchDiacritics } = this.#state;
|
||||||
let isUnicode = false;
|
let isUnicode = false;
|
||||||
|
const addExtraWhitespaces = (original, fixed) => {
|
||||||
|
if (original === query) {
|
||||||
|
return fixed;
|
||||||
|
}
|
||||||
|
if (query.startsWith(original)) {
|
||||||
|
return `${fixed}[ ]*`;
|
||||||
|
}
|
||||||
|
if (query.endsWith(original)) {
|
||||||
|
return `[ ]*${fixed}`;
|
||||||
|
}
|
||||||
|
return `[ ]*${fixed}[ ]*`;
|
||||||
|
};
|
||||||
query = query.replaceAll(
|
query = query.replaceAll(
|
||||||
SPECIAL_CHARS_REG_EXP,
|
SPECIAL_CHARS_REG_EXP,
|
||||||
(
|
(
|
||||||
@ -723,11 +735,11 @@ class PDFFindController {
|
|||||||
|
|
||||||
if (p1) {
|
if (p1) {
|
||||||
// Escape characters like *+?... to not interfere with regexp syntax.
|
// Escape characters like *+?... to not interfere with regexp syntax.
|
||||||
return `[ ]*\\${p1}[ ]*`;
|
return addExtraWhitespaces(p1, `\\${p1}`);
|
||||||
}
|
}
|
||||||
if (p2) {
|
if (p2) {
|
||||||
// Allow whitespaces around punctuation signs.
|
// Allow whitespaces around group of punctuation signs.
|
||||||
return `[ ]*${p2}[ ]*`;
|
return addExtraWhitespaces(p2, p2.replaceAll(/[.?]/g, "\\$&"));
|
||||||
}
|
}
|
||||||
if (p3) {
|
if (p3) {
|
||||||
// Replace spaces by \s+ to be sure to match any spaces.
|
// Replace spaces by \s+ to be sure to match any spaces.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user