[api-minor] Only support the Fetch API for "remote" PDF documents in Node.js environments

The Fetch API has been supported since Node.js version 18, see https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API#browser_compatibility
This commit is contained in:
Jonas Jenwald 2024-09-04 13:58:54 +02:00
parent c7407230c1
commit cbf0ca71bf
3 changed files with 57 additions and 226 deletions

View File

@ -450,15 +450,20 @@ function getDocument(src = {}) {
PDFJSDev.test("GENERIC") && PDFJSDev.test("GENERIC") &&
isNodeJS isNodeJS
) { ) {
const isFetchSupported = if (isValidFetchUrl(url)) {
typeof fetch !== "undefined" && if (
typeof Response !== "undefined" && typeof fetch === "undefined" ||
"body" in Response.prototype; typeof Response === "undefined" ||
!("body" in Response.prototype)
NetworkStream = ) {
isFetchSupported && isValidFetchUrl(url) throw new Error(
? PDFFetchStream "getDocument - the Fetch API was disabled in Node.js, see `--no-experimental-fetch`."
: PDFNodeStream; );
}
NetworkStream = PDFFetchStream;
} else {
NetworkStream = PDFNodeStream;
}
} else { } else {
NetworkStream = isValidFetchUrl(url) NetworkStream = isValidFetchUrl(url)
? PDFFetchStream ? PDFFetchStream

View File

@ -15,11 +15,6 @@
/* globals process */ /* globals process */
import { AbortException, assert, MissingPDFException } from "../shared/util.js"; import { AbortException, assert, MissingPDFException } from "../shared/util.js";
import {
createHeaders,
extractFilenameFromHeader,
validateRangeRequestCapabilities,
} from "./network_utils.js";
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
throw new Error( throw new Error(
@ -37,24 +32,14 @@ function parseUrlOrPath(sourceUrl) {
return new URL(url.pathToFileURL(sourceUrl)); return new URL(url.pathToFileURL(sourceUrl));
} }
function createRequest(url, headers, callback) {
if (url.protocol === "http:") {
const http = process.getBuiltinModule("http");
return http.request(url, { headers }, callback);
}
const https = process.getBuiltinModule("https");
return https.request(url, { headers }, callback);
}
class PDFNodeStream { class PDFNodeStream {
constructor(source) { constructor(source) {
this.source = source; this.source = source;
this.url = parseUrlOrPath(source.url); this.url = parseUrlOrPath(source.url);
this.isHttp = assert(
this.url.protocol === "http:" || this.url.protocol === "https:"; this.url.protocol === "file:",
// Check if url refers to filesystem. "PDFNodeStream only supports file:// URLs."
this.isFsUrl = this.url.protocol === "file:"; );
this.headers = createHeaders(this.isHttp, source.httpHeaders);
this._fullRequestReader = null; this._fullRequestReader = null;
this._rangeRequestReaders = []; this._rangeRequestReaders = [];
@ -69,9 +54,7 @@ class PDFNodeStream {
!this._fullRequestReader, !this._fullRequestReader,
"PDFNodeStream.getFullReader can only be called once." "PDFNodeStream.getFullReader can only be called once."
); );
this._fullRequestReader = this.isFsUrl this._fullRequestReader = new PDFNodeStreamFsFullReader(this);
? new PDFNodeStreamFsFullReader(this)
: new PDFNodeStreamFullReader(this);
return this._fullRequestReader; return this._fullRequestReader;
} }
@ -79,9 +62,7 @@ class PDFNodeStream {
if (end <= this._progressiveDataLength) { if (end <= this._progressiveDataLength) {
return null; return null;
} }
const rangeReader = this.isFsUrl const rangeReader = new PDFNodeStreamFsRangeReader(this, start, end);
? new PDFNodeStreamFsRangeReader(this, start, end)
: new PDFNodeStreamRangeReader(this, start, end);
this._rangeRequestReaders.push(rangeReader); this._rangeRequestReaders.push(rangeReader);
return rangeReader; return rangeReader;
} }
@ -288,79 +269,6 @@ class BaseRangeReader {
} }
} }
class PDFNodeStreamFullReader extends BaseFullReader {
constructor(stream) {
super(stream);
// Node.js requires the `headers` to be a regular Object.
const headers = Object.fromEntries(stream.headers);
const handleResponse = response => {
if (response.statusCode === 404) {
const error = new MissingPDFException(`Missing PDF "${this._url}".`);
this._storedError = error;
this._headersCapability.reject(error);
return;
}
this._headersCapability.resolve();
this._setReadableStream(response);
const responseHeaders = new Headers(this._readableStream.headers);
const { allowRangeRequests, suggestedLength } =
validateRangeRequestCapabilities({
responseHeaders,
isHttp: stream.isHttp,
rangeChunkSize: this._rangeChunkSize,
disableRange: this._disableRange,
});
this._isRangeSupported = allowRangeRequests;
// Setting right content length.
this._contentLength = suggestedLength || this._contentLength;
this._filename = extractFilenameFromHeader(responseHeaders);
};
this._request = createRequest(this._url, headers, handleResponse);
this._request.on("error", reason => {
this._storedError = reason;
this._headersCapability.reject(reason);
});
// Note: `request.end(data)` is used to write `data` to request body
// and notify end of request. But one should always call `request.end()`
// even if there is no data to write -- (to notify the end of request).
this._request.end();
}
}
class PDFNodeStreamRangeReader extends BaseRangeReader {
constructor(stream, start, end) {
super(stream);
// Node.js requires the `headers` to be a regular Object.
const headers = Object.fromEntries(stream.headers);
headers.Range = `bytes=${start}-${end - 1}`;
const handleResponse = response => {
if (response.statusCode === 404) {
const error = new MissingPDFException(`Missing PDF "${this._url}".`);
this._storedError = error;
return;
}
this._setReadableStream(response);
};
this._request = createRequest(this._url, headers, handleResponse);
this._request.on("error", reason => {
this._storedError = reason;
});
this._request.end();
}
}
class PDFNodeStreamFsFullReader extends BaseFullReader { class PDFNodeStreamFsFullReader extends BaseFullReader {
constructor(stream) { constructor(stream) {
super(stream); super(stream);

View File

@ -14,7 +14,6 @@
*/ */
import { AbortException, isNodeJS } from "../../src/shared/util.js"; import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFNodeStream } from "../../src/display/node_stream.js"; import { PDFNodeStream } from "../../src/display/node_stream.js";
// Ensure that these tests only run in Node.js environments. // Ensure that these tests only run in Node.js environments.
@ -25,96 +24,48 @@ if (!isNodeJS) {
} }
describe("node_stream", function () { describe("node_stream", function () {
let tempServer = null;
const url = process.getBuiltinModule("url"); const url = process.getBuiltinModule("url");
const cwdURL = url.pathToFileURL(process.cwd()) + "/"; const cwdURL = url.pathToFileURL(process.cwd()) + "/";
const pdf = new URL("./test/pdfs/tracemonkey.pdf", cwdURL).href; const pdf = new URL("./test/pdfs/tracemonkey.pdf", cwdURL).href;
const pdfLength = 1016315; const pdfLength = 1016315;
beforeAll(function () { it("read filesystem pdf files", async function () {
tempServer = createTemporaryNodeServer(); const stream = new PDFNodeStream({
});
afterAll(function () {
// Close the server from accepting new connections after all test finishes.
const { server } = tempServer;
server.close();
tempServer = null;
});
it("read both http(s) and filesystem pdf files", async function () {
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
rangeChunkSize: 65536,
disableStream: true,
disableRange: true,
});
const stream2 = new PDFNodeStream({
url: pdf, url: pdf,
rangeChunkSize: 65536, rangeChunkSize: 65536,
disableStream: true, disableStream: true,
disableRange: true, disableRange: true,
}); });
const fullReader1 = stream1.getFullReader(); const fullReader = stream.getFullReader();
const fullReader2 = stream2.getFullReader();
let isStreamingSupported1, isRangeSupported1; let isStreamingSupported, isRangeSupported;
const promise1 = fullReader1.headersReady.then(() => { const promise = fullReader.headersReady.then(() => {
isStreamingSupported1 = fullReader1.isStreamingSupported; isStreamingSupported = fullReader.isStreamingSupported;
isRangeSupported1 = fullReader1.isRangeSupported; isRangeSupported = fullReader.isRangeSupported;
}); });
let isStreamingSupported2, isRangeSupported2; let len = 0;
const promise2 = fullReader2.headersReady.then(() => { const read = function () {
isStreamingSupported2 = fullReader2.isStreamingSupported; return fullReader.read().then(function (result) {
isRangeSupported2 = fullReader2.isRangeSupported;
});
let len1 = 0,
len2 = 0;
const read1 = function () {
return fullReader1.read().then(function (result) {
if (result.done) { if (result.done) {
return undefined; return undefined;
} }
len1 += result.value.byteLength; len += result.value.byteLength;
return read1(); return read();
});
};
const read2 = function () {
return fullReader2.read().then(function (result) {
if (result.done) {
return undefined;
}
len2 += result.value.byteLength;
return read2();
}); });
}; };
await Promise.all([read1(), read2(), promise1, promise2]); await Promise.all([read(), promise]);
expect(isStreamingSupported1).toEqual(false); expect(isStreamingSupported).toEqual(false);
expect(isRangeSupported1).toEqual(false); expect(isRangeSupported).toEqual(false);
expect(isStreamingSupported2).toEqual(false); expect(len).toEqual(pdfLength);
expect(isRangeSupported2).toEqual(false);
expect(len1).toEqual(pdfLength);
expect(len1).toEqual(len2);
}); });
it("read custom ranges for both http(s) and filesystem urls", async function () { it("read custom ranges for filesystem urls", async function () {
const rangeSize = 32768; const rangeSize = 32768;
const stream1 = new PDFNodeStream({ const stream = new PDFNodeStream({
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
length: pdfLength,
rangeChunkSize: rangeSize,
disableStream: true,
disableRange: false,
});
const stream2 = new PDFNodeStream({
url: pdf, url: pdf,
length: pdfLength, length: pdfLength,
rangeChunkSize: rangeSize, rangeChunkSize: rangeSize,
@ -122,53 +73,28 @@ describe("node_stream", function () {
disableRange: false, disableRange: false,
}); });
const fullReader1 = stream1.getFullReader(); const fullReader = stream.getFullReader();
const fullReader2 = stream2.getFullReader();
let isStreamingSupported1, isRangeSupported1, fullReaderCancelled1; let isStreamingSupported, isRangeSupported, fullReaderCancelled;
let isStreamingSupported2, isRangeSupported2, fullReaderCancelled2; const promise = fullReader.headersReady.then(function () {
isStreamingSupported = fullReader.isStreamingSupported;
const promise1 = fullReader1.headersReady.then(function () { isRangeSupported = fullReader.isRangeSupported;
isStreamingSupported1 = fullReader1.isStreamingSupported;
isRangeSupported1 = fullReader1.isRangeSupported;
// we shall be able to close the full reader without issues // we shall be able to close the full reader without issues
fullReader1.cancel(new AbortException("Don't need fullReader1.")); fullReader.cancel(new AbortException("Don't need fullReader."));
fullReaderCancelled1 = true; fullReaderCancelled = true;
});
const promise2 = fullReader2.headersReady.then(function () {
isStreamingSupported2 = fullReader2.isStreamingSupported;
isRangeSupported2 = fullReader2.isRangeSupported;
fullReader2.cancel(new AbortException("Don't need fullReader2."));
fullReaderCancelled2 = true;
}); });
// Skipping fullReader results, requesting something from the PDF end. // Skipping fullReader results, requesting something from the PDF end.
const tailSize = pdfLength % rangeSize || rangeSize; const tailSize = pdfLength % rangeSize || rangeSize;
const range11Reader = stream1.getRangeReader( const range1Reader = stream.getRangeReader(
pdfLength - tailSize - rangeSize, pdfLength - tailSize - rangeSize,
pdfLength - tailSize pdfLength - tailSize
); );
const range12Reader = stream1.getRangeReader( const range2Reader = stream.getRangeReader(pdfLength - tailSize, pdfLength);
pdfLength - tailSize,
pdfLength
);
const range21Reader = stream2.getRangeReader(
pdfLength - tailSize - rangeSize,
pdfLength - tailSize
);
const range22Reader = stream2.getRangeReader(
pdfLength - tailSize,
pdfLength
);
const result11 = { value: 0 },
result12 = { value: 0 };
const result21 = { value: 0 },
result22 = { value: 0 };
const result1 = { value: 0 },
result2 = { value: 0 };
const read = function (reader, lenResult) { const read = function (reader, lenResult) {
return reader.read().then(function (result) { return reader.read().then(function (result) {
if (result.done) { if (result.done) {
@ -180,23 +106,15 @@ describe("node_stream", function () {
}; };
await Promise.all([ await Promise.all([
read(range11Reader, result11), read(range1Reader, result1),
read(range12Reader, result12), read(range2Reader, result2),
read(range21Reader, result21), promise,
read(range22Reader, result22),
promise1,
promise2,
]); ]);
expect(result11.value).toEqual(rangeSize); expect(result1.value).toEqual(rangeSize);
expect(result12.value).toEqual(tailSize); expect(result2.value).toEqual(tailSize);
expect(result21.value).toEqual(rangeSize); expect(isStreamingSupported).toEqual(false);
expect(result22.value).toEqual(tailSize); expect(isRangeSupported).toEqual(true);
expect(isStreamingSupported1).toEqual(false); expect(fullReaderCancelled).toEqual(true);
expect(isRangeSupported1).toEqual(true);
expect(fullReaderCancelled1).toEqual(true);
expect(isStreamingSupported2).toEqual(false);
expect(isRangeSupported2).toEqual(true);
expect(fullReaderCancelled2).toEqual(true);
}); });
}); });