In some cases PDF documents can contain JPEG images that the native browser decoder cannot handle, e.g. images with DNL (Define Number of Lines) markers or images where the SOF (Start of Frame) marker contains a wildly incorrect `scanLines` parameter. Currently, for "simple" JPEG images, we're relying on native image decoding to *fail* before falling back to the implementation in `src/core/jpg.js`. In some cases, note e.g. issue 10880, the native image decoder doesn't outright fail and thus some images may not render. In an attempt to improve the current situation, this patch adds additional validation of the JPEG image SOF data to force the use of `src/core/jpg.js` directly in cases where the native JPEG decoder cannot be trusted to do the right thing. The only way to implement this is unfortunately to parse the *beginning* of the JPEG image data, looking for a SOF marker. To limit the impact of this extra parsing, the result is cached on the `JpegStream` instance and this code is only run for images which passed all of the pre-existing "can the JPEG image be natively rendered and/or decoded" checks. --- *Slightly off-topic:* Working on this *really* makes me start questioning if native rendering/decoding of JPEG images is actually a good idea. There's certain kinds of JPEG images not supported natively, and all of the validation which is now necessary isn't "free". At this point, in the `NativeImageDecoder`, we're having to check for certain properties in the image dictionary, parse the `ColorSpace`, and finally read the actual image data to find the SOF marker. Furthermore, we cannot just send the image to the main-thread and be done in the "JpegStream" case, but we also need to wait for rendering to complete (or fail) before continuing with other parsing. In the "JpegDecode" case we're even having to parse part of the image on the main-thread, which seems completely at odds with the principle of doing all heavy parsing in the Worker, and there's also a couple of potentially large (temporary) allocations/copies of TypedArray data involved as well.
115 lines
3.1 KiB
JavaScript
115 lines
3.1 KiB
JavaScript
/* Copyright 2019 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
/* eslint no-var: error */
|
|
|
|
import { ColorSpace } from "./colorspace.js";
|
|
import { JpegStream } from "./jpeg_stream.js";
|
|
import { Stream } from "./stream.js";
|
|
|
|
class NativeImageDecoder {
|
|
constructor({
|
|
xref,
|
|
resources,
|
|
handler,
|
|
forceDataSchema = false,
|
|
pdfFunctionFactory,
|
|
}) {
|
|
this.xref = xref;
|
|
this.resources = resources;
|
|
this.handler = handler;
|
|
this.forceDataSchema = forceDataSchema;
|
|
this.pdfFunctionFactory = pdfFunctionFactory;
|
|
}
|
|
|
|
canDecode(image) {
|
|
return (
|
|
image instanceof JpegStream &&
|
|
NativeImageDecoder.isDecodable(
|
|
image,
|
|
this.xref,
|
|
this.resources,
|
|
this.pdfFunctionFactory
|
|
) &&
|
|
image.maybeValidDimensions
|
|
);
|
|
}
|
|
|
|
decode(image) {
|
|
// For natively supported JPEGs send them to the main thread for decoding.
|
|
const dict = image.dict;
|
|
let colorSpace = dict.get("ColorSpace", "CS");
|
|
colorSpace = ColorSpace.parse(
|
|
colorSpace,
|
|
this.xref,
|
|
this.resources,
|
|
this.pdfFunctionFactory
|
|
);
|
|
|
|
return this.handler
|
|
.sendWithPromise("JpegDecode", [
|
|
image.getIR(this.forceDataSchema),
|
|
colorSpace.numComps,
|
|
])
|
|
.then(function({ data, width, height }) {
|
|
return new Stream(data, 0, data.length, dict);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Checks if the image can be decoded and displayed by the browser without any
|
|
* further processing such as color space conversions.
|
|
*/
|
|
static isSupported(image, xref, res, pdfFunctionFactory) {
|
|
const dict = image.dict;
|
|
if (dict.has("DecodeParms") || dict.has("DP")) {
|
|
return false;
|
|
}
|
|
const cs = ColorSpace.parse(
|
|
dict.get("ColorSpace", "CS"),
|
|
xref,
|
|
res,
|
|
pdfFunctionFactory
|
|
);
|
|
// isDefaultDecode() of DeviceGray and DeviceRGB needs no `bpc` argument.
|
|
return (
|
|
(cs.name === "DeviceGray" || cs.name === "DeviceRGB") &&
|
|
cs.isDefaultDecode(dict.getArray("Decode", "D"))
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Checks if the image can be decoded by the browser.
|
|
*/
|
|
static isDecodable(image, xref, res, pdfFunctionFactory) {
|
|
const dict = image.dict;
|
|
if (dict.has("DecodeParms") || dict.has("DP")) {
|
|
return false;
|
|
}
|
|
const cs = ColorSpace.parse(
|
|
dict.get("ColorSpace", "CS"),
|
|
xref,
|
|
res,
|
|
pdfFunctionFactory
|
|
);
|
|
const bpc = dict.get("BitsPerComponent", "BPC") || 1;
|
|
return (
|
|
(cs.numComps === 1 || cs.numComps === 3) &&
|
|
cs.isDefaultDecode(dict.getArray("Decode", "D"), bpc)
|
|
);
|
|
}
|
|
}
|
|
|
|
export { NativeImageDecoder };
|