Merge pull request #19518 from Snuffleupagus/JpegStream-EXIF-replace

Move the EXIF-block replacement into `JpegStream` (PR 19356 follow-up)
This commit is contained in:
calixteman 2025-02-20 23:11:43 +01:00 committed by GitHub
commit 34ef74cf0e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 54 additions and 12 deletions

View File

@ -163,10 +163,23 @@ class JpegStream extends DecodeStream {
if (!bytes) { if (!bytes) {
return null; return null;
} }
const data = this.#skipUselessBytes(bytes); let data = this.#skipUselessBytes(bytes);
if (!JpegImage.canUseImageDecoder(data, jpegOptions.colorTransform)) { const useImageDecoder = JpegImage.canUseImageDecoder(
data,
jpegOptions.colorTransform
);
if (!useImageDecoder) {
return null; return null;
} }
if (useImageDecoder.exifStart) {
// Replace the entire EXIF-block with dummy data, to ensure that a
// non-default EXIF orientation won't cause the image to be rotated
// when using `ImageDecoder` (fixes bug1942064.pdf).
//
// Copy the data first, to avoid modifying the original PDF document.
data = data.slice();
data.fill(0x00, useImageDecoder.exifStart, useImageDecoder.exifEnd);
}
decoder = new ImageDecoder({ decoder = new ImageDecoder({
data, data,
type: "image/jpeg", type: "image/jpeg",

View File

@ -782,8 +782,11 @@ function readDataBlock(data, offset) {
} }
const array = data.subarray(offset, endOffset); const array = data.subarray(offset, endOffset);
offset += array.length; return {
return { appData: array, newOffset: offset }; appData: array,
oldOffset: offset,
newOffset: offset + array.length,
};
} }
function skipData(data, offset) { function skipData(data, offset) {
@ -805,6 +808,7 @@ class JpegImage {
} }
static canUseImageDecoder(data, colorTransform = -1) { static canUseImageDecoder(data, colorTransform = -1) {
let exifOffsets = null;
let offset = 0; let offset = 0;
let numComponents = null; let numComponents = null;
let fileMarker = readUint16(data, offset); let fileMarker = readUint16(data, offset);
@ -820,7 +824,7 @@ class JpegImage {
case 0xffe1: // APP1 - Exif case 0xffe1: // APP1 - Exif
// TODO: Remove this once https://github.com/w3c/webcodecs/issues/870 // TODO: Remove this once https://github.com/w3c/webcodecs/issues/870
// is fixed. // is fixed.
const { appData, newOffset } = readDataBlock(data, offset); const { appData, oldOffset, newOffset } = readDataBlock(data, offset);
offset = newOffset; offset = newOffset;
// 'Exif\x00\x00' // 'Exif\x00\x00'
@ -832,10 +836,12 @@ class JpegImage {
appData[4] === 0 && appData[4] === 0 &&
appData[5] === 0 appData[5] === 0
) { ) {
// Replace the entire EXIF-block with dummy data, to ensure that a if (exifOffsets) {
// non-default EXIF orientation won't cause the image to be rotated throw new JpegError("Duplicate EXIF-blocks found.");
// when using `ImageDecoder` (fixes bug1942064.pdf). }
appData.fill(0x00, 6); // Don't do the EXIF-block replacement here, see `JpegStream`,
// since that can modify the original PDF document.
exifOffsets = { exifStart: oldOffset + 6, exifEnd: newOffset };
} }
fileMarker = readUint16(data, offset); fileMarker = readUint16(data, offset);
offset += 2; offset += 2;
@ -861,12 +867,12 @@ class JpegImage {
offset += 2; offset += 2;
} }
if (numComponents === 4) { if (numComponents === 4) {
return false; return null;
} }
if (numComponents === 3 && colorTransform === 0) { if (numComponents === 3 && colorTransform === 0) {
return false; return null;
} }
return true; return exifOffsets || {};
} }
parse(data, { dnlScanLines = null } = {}) { parse(data, { dnlScanLines = null } = {}) {

View File

@ -2235,6 +2235,29 @@ describe("api", function () {
expect(data.length).toEqual(basicApiFileLength); expect(data.length).toEqual(basicApiFileLength);
}); });
it("gets data from PDF document with JPEG image containing EXIF-data (bug 1942064)", async function () {
const typedArrayPdf = await DefaultFileReaderFactory.fetch({
path: TEST_PDFS_PATH + "bug1942064.pdf",
});
// Sanity check to make sure that we fetched the entire PDF file.
expect(typedArrayPdf instanceof Uint8Array).toEqual(true);
expect(typedArrayPdf.length).toEqual(10719);
const loadingTask = getDocument(typedArrayPdf.slice());
const pdfDoc = await loadingTask.promise;
const page = await pdfDoc.getPage(1);
// Trigger parsing of the JPEG image.
await page.getOperatorList();
const data = await pdfDoc.getData();
expect(data instanceof Uint8Array).toEqual(true);
// Ensure that the EXIF-block wasn't modified.
expect(typedArrayPdf).toEqual(data);
await loadingTask.destroy();
});
it("gets download info", async function () { it("gets download info", async function () {
const downloadInfo = await pdfDocument.getDownloadInfo(); const downloadInfo = await pdfDocument.getDownloadInfo();
expect(downloadInfo).toEqual({ length: basicApiFileLength }); expect(downloadInfo).toEqual({ length: basicApiFileLength });