Only apply word spacing when there is a 0x20 in the text chunk
Fixes #20319.
This commit is contained in:
parent
4e7db4f59c
commit
4d15bfec0d
@ -2927,7 +2927,7 @@ class PartialEvaluator {
|
|||||||
|
|
||||||
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
||||||
const glyph = glyphs[i];
|
const glyph = glyphs[i];
|
||||||
const { category } = glyph;
|
const { category, originalCharCode } = glyph;
|
||||||
|
|
||||||
if (category.isInvisibleFormatMark) {
|
if (category.isInvisibleFormatMark) {
|
||||||
continue;
|
continue;
|
||||||
@ -2941,6 +2941,10 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
let scaledDim = glyphWidth * scale;
|
let scaledDim = glyphWidth * scale;
|
||||||
|
|
||||||
|
if (originalCharCode === 0x20) {
|
||||||
|
charSpacing += textState.wordSpacing;
|
||||||
|
}
|
||||||
|
|
||||||
if (!keepWhiteSpace && category.isWhitespace) {
|
if (!keepWhiteSpace && category.isWhitespace) {
|
||||||
// Don't push a " " in the textContentItem
|
// Don't push a " " in the textContentItem
|
||||||
// (except when it's between two non-spaces chars),
|
// (except when it's between two non-spaces chars),
|
||||||
@ -2948,13 +2952,13 @@ class PartialEvaluator {
|
|||||||
// compareWithLastPosition.
|
// compareWithLastPosition.
|
||||||
// This way we can merge real spaces and spaces due to cursor moves.
|
// This way we can merge real spaces and spaces due to cursor moves.
|
||||||
if (!font.vertical) {
|
if (!font.vertical) {
|
||||||
charSpacing += scaledDim + textState.wordSpacing;
|
charSpacing += scaledDim;
|
||||||
textState.translateTextMatrix(
|
textState.translateTextMatrix(
|
||||||
charSpacing * textState.textHScale,
|
charSpacing * textState.textHScale,
|
||||||
0
|
0
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
charSpacing += -scaledDim + textState.wordSpacing;
|
charSpacing += -scaledDim;
|
||||||
textState.translateTextMatrix(0, -charSpacing);
|
textState.translateTextMatrix(0, -charSpacing);
|
||||||
}
|
}
|
||||||
saveLastChar(" ");
|
saveLastChar(" ");
|
||||||
|
|||||||
2
test/pdfs/.gitignore
vendored
2
test/pdfs/.gitignore
vendored
@ -746,3 +746,5 @@
|
|||||||
!issue20232.pdf
|
!issue20232.pdf
|
||||||
!bug1989304.pdf
|
!bug1989304.pdf
|
||||||
!comments.pdf
|
!comments.pdf
|
||||||
|
!issue20319_1.pdf
|
||||||
|
!issue20319_2.pdf
|
||||||
|
|||||||
BIN
test/pdfs/issue20319_1.pdf
Normal file
BIN
test/pdfs/issue20319_1.pdf
Normal file
Binary file not shown.
80
test/pdfs/issue20319_2.pdf
Normal file
80
test/pdfs/issue20319_2.pdf
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
%PDF-1.7
|
||||||
|
1 0 obj
|
||||||
|
<< /Type /Catalog
|
||||||
|
/Pages 2 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages
|
||||||
|
/Kids [3 0 R]
|
||||||
|
/Count 1
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Page
|
||||||
|
/Parent 2 0 R
|
||||||
|
/MediaBox [0 0 200 200]
|
||||||
|
/Resources <<
|
||||||
|
/Font << /F1 4 0 R >>
|
||||||
|
>>
|
||||||
|
/Contents 5 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
4 0 obj
|
||||||
|
<< /Type /Font
|
||||||
|
/Subtype /Type3
|
||||||
|
/Name /F1
|
||||||
|
/FontBBox [0 0 500 500]
|
||||||
|
/FontMatrix [0.001 0 0 0.001 0 0]
|
||||||
|
/Encoding << /Differences [32 /A 33 /A] >>
|
||||||
|
/CharProcs << /A 6 0 R >>
|
||||||
|
/FirstChar 32
|
||||||
|
/LastChar 33
|
||||||
|
/Widths [500 500]
|
||||||
|
/Resources << >>
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
5 0 obj
|
||||||
|
<< /Length 45 >>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 20 Tf
|
||||||
|
50 Tw
|
||||||
|
100 100 Td
|
||||||
|
<212021> Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
6 0 obj
|
||||||
|
<< /Length 77 >>
|
||||||
|
stream
|
||||||
|
500 0 d0
|
||||||
|
50 50 50 400 re
|
||||||
|
400 50 50 400 re
|
||||||
|
50 400 400 50 re
|
||||||
|
50 200 400 50 re
|
||||||
|
f
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
xref
|
||||||
|
0 7
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00000 n
|
||||||
|
0000000062 00000 n
|
||||||
|
0000000126 00000 n
|
||||||
|
0000000275 00000 n
|
||||||
|
0000000554 00000 n
|
||||||
|
0000000650 00000 n
|
||||||
|
trailer
|
||||||
|
<< /Size 7
|
||||||
|
/Root 1 0 R
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
778
|
||||||
|
%%EOF
|
||||||
@ -4027,6 +4027,37 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
|||||||
expect(items[1].fontName).not.toEqual(items[0].fontName);
|
expect(items[1].fontName).not.toEqual(items[0].fontName);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("gets text content with word spacing (issue 20319)", async function () {
|
||||||
|
const loadingTask = getDocument(
|
||||||
|
buildGetDocumentParams("issue20319_1.pdf")
|
||||||
|
);
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent({
|
||||||
|
disableNormalization: true,
|
||||||
|
});
|
||||||
|
const text = mergeText(items);
|
||||||
|
|
||||||
|
expect(text).toEqual("A A");
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("gets text content with word spacing and a fake space (issue 20319)", async function () {
|
||||||
|
const loadingTask = getDocument(
|
||||||
|
buildGetDocumentParams("issue20319_2.pdf")
|
||||||
|
);
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent({
|
||||||
|
disableNormalization: true,
|
||||||
|
});
|
||||||
|
const text = mergeText(items);
|
||||||
|
expect(text).toEqual("AA A");
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
it("gets empty structure tree", async function () {
|
it("gets empty structure tree", async function () {
|
||||||
const tree = await page.getStructTree();
|
const tree = await page.getStructTree();
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user