From 69a8336cf16078d31c6ed0f9a36e9d659b0a3ade Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 16 Jan 2018 16:24:36 +0100 Subject: [PATCH] Address the final round of review comments for Content-Disposition filename extraction This patch updates the `IPDFStreamReader` interface and ensures that the interface/implementation of `network.js`, `fetch_stream.js`, `node_stream.js`, and `transport_stream.js` all match properly. The unit-tests are also adjusted, to more closely replicate the actual behaviour of the various actual `IPDFStreamReader` implementations. Finally, this patch adjusts the use of the Content-Disposition filename when setting the title in the viewer, and adds `PDFDocumentProperties` support as well. --- package.json | 2 +- src/core/worker.js | 10 +++ src/display/api.js | 8 +- src/display/fetch_stream.js | 14 ++-- src/display/network.js | 13 ++- src/display/network_utils.js | 28 +++---- src/display/node_stream.js | 26 +++--- src/display/transport_stream.js | 5 ++ test/unit/api_spec.js | 1 + test/unit/network_utils_spec.js | 136 ++++++++++++++++++-------------- web/app.js | 19 +++-- web/pdf_document_properties.js | 10 ++- 12 files changed, 151 insertions(+), 121 deletions(-) diff --git a/package.json b/package.json index 52059740d..1b3f29cc8 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,7 @@ "uglify-es": "^3.1.2", "vinyl": "^2.1.0", "vinyl-fs": "^2.4.4", - "webpack": "^3.10.0", + "webpack": "^3.6.0", "webpack-stream": "^4.0.0", "wintersmith": "^2.4.1", "yargs": "^9.0.1" diff --git a/src/core/worker.js b/src/core/worker.js index 0326bf4f4..12a75df53 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -103,6 +103,16 @@ IPDFStreamReader.prototype = { return null; }, + /** + * Gets the Content-Disposition filename. It is defined after the headersReady + * promise is resolved. + * @returns {string|null} The filename, or `null` if the Content-Disposition + * header is missing/invalid. + */ + get filename() { + return null; + }, + /** * Gets PDF binary data length. It is defined after the headersReady promise * is resolved. diff --git a/src/display/api.js b/src/display/api.js index c776efe65..fb52474b5 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -1997,14 +1997,14 @@ var WorkerTransport = (function WorkerTransportClosure() { getMetadata: function WorkerTransport_getMetadata() { return this.messageHandler.sendWithPromise('GetMetadata', null). - then(function transportMetadata(results) { + then((results) => { return { info: results[0], metadata: (results[1] ? new Metadata(results[1]) : null), - contentDispositionFileName: (this._fullReader ? - this._fullReader.fileName : null), + contentDispositionFilename: (this._fullReader ? + this._fullReader.filename : null), }; - }.bind(this)); + }); }, getStats: function WorkerTransport_getStats() { diff --git a/src/display/fetch_stream.js b/src/display/fetch_stream.js index b7e5e70c2..44d726c96 100644 --- a/src/display/fetch_stream.js +++ b/src/display/fetch_stream.js @@ -67,9 +67,9 @@ class PDFFetchStream { class PDFFetchStreamReader { constructor(stream) { this._stream = stream; - this._fileName = null; this._reader = null; this._loaded = 0; + this._filename = null; let source = stream.source; this._withCredentials = source.withCredentials; this._contentLength = source.length; @@ -104,7 +104,6 @@ class PDFFetchStreamReader { const getResponseHeader = (name) => { return response.headers.get(name); }; - let { allowRangeRequests, suggestedLength, } = validateRangeRequestCapabilities({ getResponseHeader, @@ -115,7 +114,8 @@ class PDFFetchStreamReader { this._contentLength = suggestedLength; this._isRangeSupported = allowRangeRequests; - this._fileName = extractFilenameFromHeader(getResponseHeader); + + this._filename = extractFilenameFromHeader(getResponseHeader); // We need to stop reading when range is supported and streaming is // disabled. @@ -131,12 +131,12 @@ class PDFFetchStreamReader { return this._headersCapability.promise; } - get contentLength() { - return this._contentLength; + get filename() { + return this._filename; } - get fileName() { - return this._fileName; + get contentLength() { + return this._contentLength; } get isRangeSupported() { diff --git a/src/display/network.js b/src/display/network.js index f50bf022f..54a93eeed 100644 --- a/src/display/network.js +++ b/src/display/network.js @@ -341,7 +341,7 @@ function PDFNetworkStreamFullRequestReader(manager, source) { this._requests = []; this._done = false; this._storedError = undefined; - this._fileName = null; + this._filename = null; this.onProgress = null; } @@ -371,6 +371,8 @@ PDFNetworkStreamFullRequestReader.prototype = { this._isRangeSupported = true; } + this._filename = extractFilenameFromHeader(getResponseHeader); + var networkManager = this._manager; if (networkManager.isStreamingRequest(fullRequestXhrId)) { // We can continue fetching when progressive loading is enabled, @@ -385,11 +387,6 @@ PDFNetworkStreamFullRequestReader.prototype = { networkManager.abortRequest(fullRequestXhrId); } - // Content-Disposition: attachment; filename=Naïve file.txt - if (networkManager.isPendingRequest(fullRequestXhrId)) { - this._fileName = extractFilenameFromHeader(getResponseHeader); - } - this._headersReceivedCapability.resolve(); }, @@ -438,8 +435,8 @@ PDFNetworkStreamFullRequestReader.prototype = { } }, - get fileName() { - return this._fileName; + get filename() { + return this._filename; }, get isRangeSupported() { diff --git a/src/display/network_utils.js b/src/display/network_utils.js index ad68c2b6e..1b0eb0ee9 100644 --- a/src/display/network_utils.js +++ b/src/display/network_utils.js @@ -53,6 +53,18 @@ function validateRangeRequestCapabilities({ getResponseHeader, isHttp, return returnValues; } +function extractFilenameFromHeader(getResponseHeader) { + const contentDisposition = getResponseHeader('Content-Disposition'); + if (contentDisposition) { + let parts = + /.+;\s*filename=(?:'|")(.+\.pdf)(?:'|")/gi.exec(contentDisposition); + if (parts !== null && parts.length > 1) { + return getFilenameFromUrl(parts[1]); + } + } + return null; +} + function createResponseStatusError(status, url) { if (status === 404 || status === 0 && /^file:/.test(url)) { return new MissingPDFException('Missing PDF "' + url + '".'); @@ -66,23 +78,9 @@ function validateResponseStatus(status) { return status === 200 || status === 206; } -function extractFilenameFromHeader(getResponseHeader) { - const contentDisposition = getResponseHeader('Content-Disposition'); - - if (contentDisposition) { - let parts = - /.+;\s*filename=(?:"|')(.+\.pdf)(?:"|')/gi.exec(contentDisposition); - if (parts !== null && parts.length > 1) { - return getFilenameFromUrl(parts[1]); - } - } - - return null; -} - export { createResponseStatusError, + extractFilenameFromHeader, validateRangeRequestCapabilities, validateResponseStatus, - extractFilenameFromHeader, }; diff --git a/src/display/node_stream.js b/src/display/node_stream.js index 192d87900..a8431cdb7 100644 --- a/src/display/node_stream.js +++ b/src/display/node_stream.js @@ -76,11 +76,11 @@ class BaseFullReader { this._done = false; this._errored = false; this._reason = null; - this._fileName = null; this.onProgress = null; let source = stream.source; this._contentLength = source.length; // optional this._loaded = 0; + this._filename = null; this._disableRange = source.disableRange || false; this._rangeChunkSize = source.rangeChunkSize; @@ -100,6 +100,10 @@ class BaseFullReader { return this._headersCapability.promise; } + get filename() { + return this._filename; + } + get contentLength() { return this._contentLength; } @@ -112,10 +116,6 @@ class BaseFullReader { return this._isStreamingSupported; } - get fileName() { - return this._fileName; - } - read() { return this._readCapability.promise.then(() => { if (this._done) { @@ -296,14 +296,13 @@ class PDFNodeStreamFullReader extends BaseFullReader { // here: https://nodejs.org/api/http.html#http_message_headers. return this._readableStream.headers[name.toLowerCase()]; }; - let { allowRangeRequests, suggestedLength, } = - validateRangeRequestCapabilities({ - getResponseHeader, - isHttp: stream.isHttp, - rangeChunkSize: this._rangeChunkSize, - disableRange: this._disableRange, - }); + validateRangeRequestCapabilities({ + getResponseHeader, + isHttp: stream.isHttp, + rangeChunkSize: this._rangeChunkSize, + disableRange: this._disableRange, + }); if (allowRangeRequests) { this._isRangeSupported = true; @@ -311,8 +310,7 @@ class PDFNodeStreamFullReader extends BaseFullReader { // Setting right content length. this._contentLength = suggestedLength; - // Setting the file name from the response header - this._fileName = extractFilenameFromHeader(getResponseHeader); + this._filename = extractFilenameFromHeader(getResponseHeader); }; this._request = null; diff --git a/src/display/transport_stream.js b/src/display/transport_stream.js index a89920589..21525cfac 100644 --- a/src/display/transport_stream.js +++ b/src/display/transport_stream.js @@ -119,6 +119,7 @@ var PDFDataTransportStream = (function PDFDataTransportStreamClosure() { function PDFDataTransportStreamReader(stream, queuedChunks) { this._stream = stream; this._done = false; + this._filename = null; this._queuedChunks = queuedChunks || []; this._requests = []; this._headersReady = Promise.resolve(); @@ -143,6 +144,10 @@ var PDFDataTransportStream = (function PDFDataTransportStreamClosure() { return this._headersReady; }, + get filename() { + return this._filename; + }, + get isRangeSupported() { return this._stream._isRangeSupported; }, diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index d67596a31..48dd07c91 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -794,6 +794,7 @@ describe('api', function() { expect(metadata.info['Title']).toEqual('Basic API Test'); expect(metadata.info['PDFFormatVersion']).toEqual('1.7'); expect(metadata.metadata.get('dc:title')).toEqual('Basic API Test'); + expect(metadata.contentDispositionFilename).toEqual(null); done(); }).catch(function (reason) { done.fail(reason); diff --git a/test/unit/network_utils_spec.js b/test/unit/network_utils_spec.js index 17a077687..b98ac5d01 100644 --- a/test/unit/network_utils_spec.js +++ b/test/unit/network_utils_spec.js @@ -134,6 +134,84 @@ describe('network_utils', function() { }); }); + describe('extractFilenameFromHeader', function() { + it('returns null when content disposition header is blank', function() { + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return null; + } + })).toBeNull(); + + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return undefined; + } + })).toBeNull(); + + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return ''; + } + })).toBeNull(); + }); + + it('gets the filename from the response header', function() { + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return 'inline'; + } + })).toBeNull(); + + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return 'attachment'; + } + })).toBeNull(); + + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return 'attachment; filename="filename.pdf"'; + } + })).toEqual('filename.pdf'); + }); + + it('returns null when content disposition is form-data', function() { + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return 'form-data'; + } + })).toBeNull(); + + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return 'form-data; name="filename.pdf"'; + } + })).toBeNull(); + + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return 'form-data; name="filename.pdf"; filename="file.pdf"'; + } + })).toEqual('file.pdf'); + }); + + it('only extracts filename with pdf extension', function () { + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return 'attachment; filename="filename.png"'; + } + })).toBeNull(); + }); + + it('extension validation is case insensitive', function () { + expect(extractFilenameFromHeader((headerName) => { + if (headerName === 'Content-Disposition') { + return 'form-data; name="fieldName"; filename="file.PdF"'; + } + })).toEqual('file.PdF'); + }); + }); + describe('createResponseStatusError', function() { it('handles missing PDF file responses', function() { expect(createResponseStatusError(404, 'https://foo.com/bar.pdf')).toEqual( @@ -175,62 +253,4 @@ describe('network_utils', function() { expect(validateResponseStatus(undefined)).toEqual(false); }); }); - - describe('extractFilenameFromHeader', function () { - it('returns null when content disposition header is blank', function() { - expect(extractFilenameFromHeader(function() { - return null; - })).toBeNull(); - - expect(extractFilenameFromHeader(function() { - return undefined; - })).toBeNull(); - - expect(extractFilenameFromHeader(function() { - return ''; - })).toBeNull(); - }); - - it('gets the filename from the response header', function () { - expect(extractFilenameFromHeader(function() { - return 'Content-Disposition: inline'; - })).toBeNull(); - - expect(extractFilenameFromHeader(function() { - return 'Content-Disposition: attachment'; - })).toBeNull(); - - expect(extractFilenameFromHeader(function() { - return 'Content-Disposition: attachment; filename="filename.pdf"'; - })).toBe('filename.pdf'); - }); - - it('returns null when content disposition is form-data', function () { - expect(extractFilenameFromHeader(function() { - return 'Content-Disposition: form-data'; - })).toBeNull(); - - expect(extractFilenameFromHeader(function() { - return 'Content-Disposition: form-data; name="filename"'; - })).toBeNull(); - - expect(extractFilenameFromHeader(function () { - return 'Content-Disposition: form-data; ' + - 'name="filename"; filename="file.pdf"'; - })).toBe('file.pdf'); - }); - - it('Only extracts file names with pdf extension', function () { - expect(extractFilenameFromHeader(function() { - return 'Content-Disposition: attachment; filename="filename.png"'; - })).toBeNull(); - }); - - it('Extension validation is case insensitive', function () { - expect(extractFilenameFromHeader(function() { - return 'Content-Disposition: form-data; ' + - 'name="fieldName"; filename="file.PdF"'; - })).toBe('file.PdF'); - }); - }); }); diff --git a/web/app.js b/web/app.js index ffd84eb38..edeea6ec3 100644 --- a/web/app.js +++ b/web/app.js @@ -154,7 +154,7 @@ let PDFViewerApplication = { baseUrl: '', externalServices: DefaultExternalServices, _boundEvents: {}, - contentDispositionFileName: null, + contentDispositionFilename: null, // Called once when the document is loaded. initialize(appConfig) { @@ -679,7 +679,7 @@ let PDFViewerApplication = { this.downloadComplete = false; this.url = ''; this.baseUrl = ''; - this.contentDispositionFileName = null; + this.contentDispositionFilename = null; this.pdfSidebar.reset(); this.pdfOutlineViewer.reset(); @@ -803,7 +803,7 @@ let PDFViewerApplication = { let url = this.baseUrl; // Use this.url instead of this.baseUrl to perform filename detection based // on the reference fragment as ultimate fallback if needed. - let filename = this.contentDispositionFileName || + let filename = this.contentDispositionFilename || getPDFFileNameFromURL(this.url); let downloadManager = this.downloadManager; downloadManager.onerror = (err) => { @@ -1157,10 +1157,10 @@ let PDFViewerApplication = { }); pdfDocument.getMetadata().then( - ({ info, metadata, contentDispositionFileName, }) => { + ({ info, metadata, contentDispositionFilename, }) => { this.documentInfo = info; this.metadata = metadata; - this.contentDispositionFileName = contentDispositionFileName; + this.contentDispositionFilename = contentDispositionFilename; // Provides some basic debug information console.log('PDF ' + pdfDocument.fingerprint + ' [' + @@ -1183,11 +1183,10 @@ let PDFViewerApplication = { } if (pdfTitle) { - this.setTitle(pdfTitle + ' - ' + document.title); - } - - if (!pdfTitle && contentDispositionFileName) { - this.setTitle(contentDispositionFileName); + this.setTitle( + `${pdfTitle} - ${contentDispositionFilename || document.title}`); + } else if (contentDispositionFilename) { + this.setTitle(contentDispositionFilename); } if (info.IsAcroFormPresent) { diff --git a/web/pdf_document_properties.js b/web/pdf_document_properties.js index 6727627b7..cb118920b 100644 --- a/web/pdf_document_properties.js +++ b/web/pdf_document_properties.js @@ -71,24 +71,26 @@ class PDFDocumentProperties { return; } // Get the document properties. - this.pdfDocument.getMetadata().then(({ info, metadata, }) => { + this.pdfDocument.getMetadata().then( + ({ info, metadata, contentDispositionFilename, }) => { return Promise.all([ info, metadata, + contentDispositionFilename || getPDFFileNameFromURL(this.url), this._parseFileSize(this.maybeFileSize), this._parseDate(info.CreationDate), this._parseDate(info.ModDate) ]); - }).then(([info, metadata, fileSize, creationDate, modificationDate]) => { + }).then(([info, metadata, fileName, fileSize, creationDate, modDate]) => { freezeFieldData({ - 'fileName': getPDFFileNameFromURL(this.url), + 'fileName': fileName, 'fileSize': fileSize, 'title': info.Title, 'author': info.Author, 'subject': info.Subject, 'keywords': info.Keywords, 'creationDate': creationDate, - 'modificationDate': modificationDate, + 'modificationDate': modDate, 'creator': info.Creator, 'producer': info.Producer, 'version': info.PDFFormatVersion,