Don't remove a dash at the end of a line when guessing urls (bug 1974112)
This commit is contained in:
parent
85b67f19bc
commit
bb6b42177c
@ -195,4 +195,13 @@ describe("autolinker", function () {
|
|||||||
["httptest@email.com", "mailto:httptest@email.com"],
|
["httptest@email.com", "mailto:httptest@email.com"],
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("shouldn't remove the dash when it's an the end of a line (bug 1974112)", function () {
|
||||||
|
testLinks([
|
||||||
|
[
|
||||||
|
"https://github.com/pypi/linehaul-cloud-\nfunction",
|
||||||
|
"https://github.com/pypi/linehaul-cloud-function",
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -138,7 +138,7 @@ class Autolinker {
|
|||||||
this.#regex ??=
|
this.#regex ??=
|
||||||
/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;
|
/\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;
|
||||||
|
|
||||||
const [normalizedText, diffs] = normalize(text);
|
const [normalizedText, diffs] = normalize(text, { ignoreDashEOL: true });
|
||||||
const matches = normalizedText.matchAll(this.#regex);
|
const matches = normalizedText.matchAll(this.#regex);
|
||||||
const links = [];
|
const links = [];
|
||||||
for (const match of matches) {
|
for (const match of matches) {
|
||||||
|
|||||||
@ -97,7 +97,7 @@ const NFKC_CHARS_TO_NORMALIZE = new Map();
|
|||||||
let noSyllablesRegExp = null;
|
let noSyllablesRegExp = null;
|
||||||
let withSyllablesRegExp = null;
|
let withSyllablesRegExp = null;
|
||||||
|
|
||||||
function normalize(text) {
|
function normalize(text, options = {}) {
|
||||||
// The diacritics in the text or in the query can be composed or not.
|
// The diacritics in the text or in the query can be composed or not.
|
||||||
// So we use a decomposed text using NFD (and the same for the query)
|
// So we use a decomposed text using NFD (and the same for the query)
|
||||||
// in order to be sure that diacritics are in the same order.
|
// in order to be sure that diacritics are in the same order.
|
||||||
@ -118,6 +118,7 @@ function normalize(text) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const hasSyllables = syllablePositions.length > 0;
|
const hasSyllables = syllablePositions.length > 0;
|
||||||
|
const ignoreDashEOL = options.ignoreDashEOL ?? false;
|
||||||
|
|
||||||
let normalizationRegex;
|
let normalizationRegex;
|
||||||
if (!hasSyllables && noSyllablesRegExp) {
|
if (!hasSyllables && noSyllablesRegExp) {
|
||||||
@ -294,6 +295,12 @@ function normalize(text) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (p5) {
|
if (p5) {
|
||||||
|
if (ignoreDashEOL) {
|
||||||
|
// Keep the - but remove the EOL.
|
||||||
|
shiftOrigin += 1;
|
||||||
|
eol += 1;
|
||||||
|
return p5.slice(0, -1);
|
||||||
|
}
|
||||||
// In "X-\ny", "-\n" is removed because an hyphen at the end of a line
|
// In "X-\ny", "-\n" is removed because an hyphen at the end of a line
|
||||||
// between two letters is likely here to mark a break in a word.
|
// between two letters is likely here to mark a break in a word.
|
||||||
// If X is encoded with UTF-32 then it can have a length greater than 1.
|
// If X is encoded with UTF-32 then it can have a length greater than 1.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user