Merge pull request #20334 from calixteman/issue20319
Only apply word spacing when there is a 0x20 in the text chunk
This commit is contained in:
commit
d27cd6a4a1
@ -2927,7 +2927,7 @@ class PartialEvaluator {
|
||||
|
||||
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
||||
const glyph = glyphs[i];
|
||||
const { category } = glyph;
|
||||
const { category, originalCharCode } = glyph;
|
||||
|
||||
if (category.isInvisibleFormatMark) {
|
||||
continue;
|
||||
@ -2941,6 +2941,10 @@ class PartialEvaluator {
|
||||
}
|
||||
let scaledDim = glyphWidth * scale;
|
||||
|
||||
if (originalCharCode === 0x20) {
|
||||
charSpacing += textState.wordSpacing;
|
||||
}
|
||||
|
||||
if (!keepWhiteSpace && category.isWhitespace) {
|
||||
// Don't push a " " in the textContentItem
|
||||
// (except when it's between two non-spaces chars),
|
||||
@ -2948,13 +2952,13 @@ class PartialEvaluator {
|
||||
// compareWithLastPosition.
|
||||
// This way we can merge real spaces and spaces due to cursor moves.
|
||||
if (!font.vertical) {
|
||||
charSpacing += scaledDim + textState.wordSpacing;
|
||||
charSpacing += scaledDim;
|
||||
textState.translateTextMatrix(
|
||||
charSpacing * textState.textHScale,
|
||||
0
|
||||
);
|
||||
} else {
|
||||
charSpacing += -scaledDim + textState.wordSpacing;
|
||||
charSpacing += -scaledDim;
|
||||
textState.translateTextMatrix(0, -charSpacing);
|
||||
}
|
||||
saveLastChar(" ");
|
||||
|
||||
2
test/pdfs/.gitignore
vendored
2
test/pdfs/.gitignore
vendored
@ -746,3 +746,5 @@
|
||||
!issue20232.pdf
|
||||
!bug1989304.pdf
|
||||
!comments.pdf
|
||||
!issue20319_1.pdf
|
||||
!issue20319_2.pdf
|
||||
|
||||
BIN
test/pdfs/issue20319_1.pdf
Normal file
BIN
test/pdfs/issue20319_1.pdf
Normal file
Binary file not shown.
80
test/pdfs/issue20319_2.pdf
Normal file
80
test/pdfs/issue20319_2.pdf
Normal file
@ -0,0 +1,80 @@
|
||||
%PDF-1.7
|
||||
1 0 obj
|
||||
<< /Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<< /Type /Pages
|
||||
/Kids [3 0 R]
|
||||
/Count 1
|
||||
>>
|
||||
endobj
|
||||
|
||||
3 0 obj
|
||||
<< /Type /Page
|
||||
/Parent 2 0 R
|
||||
/MediaBox [0 0 200 200]
|
||||
/Resources <<
|
||||
/Font << /F1 4 0 R >>
|
||||
>>
|
||||
/Contents 5 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
4 0 obj
|
||||
<< /Type /Font
|
||||
/Subtype /Type3
|
||||
/Name /F1
|
||||
/FontBBox [0 0 500 500]
|
||||
/FontMatrix [0.001 0 0 0.001 0 0]
|
||||
/Encoding << /Differences [32 /A 33 /A] >>
|
||||
/CharProcs << /A 6 0 R >>
|
||||
/FirstChar 32
|
||||
/LastChar 33
|
||||
/Widths [500 500]
|
||||
/Resources << >>
|
||||
>>
|
||||
endobj
|
||||
|
||||
5 0 obj
|
||||
<< /Length 45 >>
|
||||
stream
|
||||
BT
|
||||
/F1 20 Tf
|
||||
50 Tw
|
||||
100 100 Td
|
||||
<212021> Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
6 0 obj
|
||||
<< /Length 77 >>
|
||||
stream
|
||||
500 0 d0
|
||||
50 50 50 400 re
|
||||
400 50 50 400 re
|
||||
50 400 400 50 re
|
||||
50 200 400 50 re
|
||||
f
|
||||
endstream
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 7
|
||||
0000000000 65535 f
|
||||
0000000009 00000 n
|
||||
0000000062 00000 n
|
||||
0000000126 00000 n
|
||||
0000000275 00000 n
|
||||
0000000554 00000 n
|
||||
0000000650 00000 n
|
||||
trailer
|
||||
<< /Size 7
|
||||
/Root 1 0 R
|
||||
>>
|
||||
startxref
|
||||
778
|
||||
%%EOF
|
||||
@ -4027,6 +4027,37 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
expect(items[1].fontName).not.toEqual(items[0].fontName);
|
||||
});
|
||||
|
||||
it("gets text content with word spacing (issue 20319)", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("issue20319_1.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items } = await pdfPage.getTextContent({
|
||||
disableNormalization: true,
|
||||
});
|
||||
const text = mergeText(items);
|
||||
|
||||
expect(text).toEqual("A A");
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content with word spacing and a fake space (issue 20319)", async function () {
|
||||
const loadingTask = getDocument(
|
||||
buildGetDocumentParams("issue20319_2.pdf")
|
||||
);
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items } = await pdfPage.getTextContent({
|
||||
disableNormalization: true,
|
||||
});
|
||||
const text = mergeText(items);
|
||||
expect(text).toEqual("AA A");
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets empty structure tree", async function () {
|
||||
const tree = await page.getStructTree();
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user