Improve performance when reading very large TrueType "cmap" tables (issue 19319)

In the affected font the total number of mapping-entries is `1142348`, and no less than `997473` of them are duplicates.
Given that every duplicate causes a lot of Array elements to be moved this becomes extremely inefficient, which we can avoid by keeping track of seen `charCode`s and directly build the final mappings-Array instead.
This commit is contained in:
Jonas Jenwald 2025-01-13 13:03:59 +01:00
parent e1b972aac3
commit 5e569cade5
3 changed files with 21 additions and 5 deletions

View File

@ -1760,17 +1760,22 @@ class Font {
mappings.sort(function (a, b) { mappings.sort(function (a, b) {
return a.charCode - b.charCode; return a.charCode - b.charCode;
}); });
for (let i = 1; i < mappings.length; i++) { const finalMappings = [],
if (mappings[i - 1].charCode === mappings[i].charCode) { seenCharCodes = new Set();
mappings.splice(i, 1); for (const map of mappings) {
i--; const { charCode } = map;
if (seenCharCodes.has(charCode)) {
continue;
} }
seenCharCodes.add(charCode);
finalMappings.push(map);
} }
return { return {
platformId: potentialTable.platformId, platformId: potentialTable.platformId,
encodingId: potentialTable.encodingId, encodingId: potentialTable.encodingId,
mappings, mappings: finalMappings,
hasShortCmap, hasShortCmap,
}; };
} }

View File

@ -0,0 +1 @@
https://github.com/user-attachments/files/18396493/2023-ESG-report-eng.pdf

View File

@ -11254,5 +11254,15 @@
"rounds": 1, "rounds": 1,
"link": true, "link": true,
"type": "other" "type": "other"
},
{
"id": "issue19319",
"file": "pdfs/issue19319.pdf",
"md5": "8612d3f0cf2dd067ea4aec9c8bf98763",
"rounds": 1,
"link": true,
"firstPage": 2,
"lastPage": 2,
"type": "eq"
} }
] ]