Fix search functionality to respect punctuation and spaces exactly
- Remove optional spaces around punctuation marks in search regex - Fixes issue #20225 where searching for '..' would incorrectly match '. ...' - Add test cases to prevent regression - Ensures exact matching for punctuation sequences and text with dots Resolves: https://github.com/mozilla/pdf.js/issues/20225
This commit is contained in:
parent
a6934b478f
commit
7870539303
@ -1213,4 +1213,54 @@ describe("pdf_find_controller", function () {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("should not match punctuation with spaces when searching for exact punctuation", async function () {
|
||||
// Test case for issue #20225: searching for "...." should not match ". ..." or ".. .."
|
||||
const mockMatcher = function (query, pageContent) {
|
||||
// Simulate page content with various dot patterns
|
||||
const testContent = "This has four dots.... and this has dot space three dots. ... and this has two dots space two dots.. ..";
|
||||
|
||||
// Call the original match method to test the current behavior
|
||||
return PDFFindController.prototype.match.call(this, query, testContent, 0);
|
||||
};
|
||||
|
||||
const { eventBus, pdfFindController } = await initPdfFindController(
|
||||
null,
|
||||
false,
|
||||
mockMatcher
|
||||
);
|
||||
|
||||
// Test searching for exactly four dots - should only match exact sequence
|
||||
await testSearch({
|
||||
eventBus,
|
||||
pdfFindController,
|
||||
state: { query: "...." },
|
||||
selectedMatch: { pageIndex: 0, matchIndex: 0 },
|
||||
matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], // Should match only 1 exact occurrence
|
||||
});
|
||||
});
|
||||
|
||||
it("should not match text with spaces when searching for text without spaces", async function () {
|
||||
// Test case for issue: searching for "hello.world" should not match "hello. world"
|
||||
const mockMatcher = function (query, pageContent) {
|
||||
const testContent = "hello. world and hello.world are different";
|
||||
|
||||
// Call the original match method to test the current behavior
|
||||
return PDFFindController.prototype.match.call(this, query, testContent, 0);
|
||||
};
|
||||
|
||||
const { eventBus, pdfFindController } = await initPdfFindController(
|
||||
null,
|
||||
false,
|
||||
mockMatcher
|
||||
);
|
||||
|
||||
await testSearch({
|
||||
eventBus,
|
||||
pdfFindController,
|
||||
state: { query: "hello.world" },
|
||||
selectedMatch: { pageIndex: 0, matchIndex: 0 },
|
||||
matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], // Should match only 1 exact occurrence
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -723,11 +723,11 @@ class PDFFindController {
|
||||
|
||||
if (p1) {
|
||||
// Escape characters like *+?... to not interfere with regexp syntax.
|
||||
return `[ ]*\\${p1}[ ]*`;
|
||||
return `\\${p1}`;
|
||||
}
|
||||
if (p2) {
|
||||
// Allow whitespaces around punctuation signs.
|
||||
return `[ ]*${p2}[ ]*`;
|
||||
// Match punctuation signs exactly without allowing arbitrary whitespaces.
|
||||
return p2;
|
||||
}
|
||||
if (p3) {
|
||||
// Replace spaces by \s+ to be sure to match any spaces.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user