From e992480baa0b91132431793b4a62dc20ae9c2eb2 Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Sun, 3 Jun 2018 15:03:00 +0200 Subject: [PATCH] Fix multibyte decoding in content_disposition.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I made some mistakes when trying to make the content_disposition.js compatible with non-modern browsers (IE/Edge). Notably, text decoding was usually skipped because of the inverted logical check at the top of `textdecode`. I verified that this new version works as expected, as follows: 1. Visit https://github.com/Rob--W/open-in-browser/tree/55c71eb44e0ad71a3bb443457666fd48421612ac/test/ and get test-content-disposition.js also get test-content-disposition.node.js if using Node.js, or get test-content-disposition.html if you use a browser. 2. Modify `test-content-disposition.node.js` (or the HTML file) and change `../extension/content-disposition.js` to `PDFJS-content_disposition.js` 3. Copy the `getFilenameFromContentDispositionHeader` function from `content_disposition.js` (i.e. the file without the trailing exports) and save it as `PDFJS-content_disposition.js`. 4. Run the tests (`node test-content-disposition.node.js` or by opening `test-content-disposition.html` in a browser). 5. Confirm that there are no failures: "Finished all tests (0 failures)" The code has a best-efforts fallback for Microsoft Edge, which lacks the TextDecoder API. The fallback only supports the common UTF-8 encoding. To simulate this in a test, modify `PDFJS-content_disposition.js` and deliberately throw an error before `new TextDecoder`. There will be two failures because we don't want to include too much code to support text decoding for non-UTF-8 encodings in Edge ``` test-content-disposition.js:265 Assertion failed: Input: attachment; filename*=ISO-8859-1''%c3%a4 Expected: "ä" Actual : "ä" test-content-disposition.js:268 Assertion failed: Input: attachment; filename*=ISO-8859-1''%e2%82%ac Expected: "€" Actual : "€" ``` --- src/display/content_disposition.js | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/display/content_disposition.js b/src/display/content_disposition.js index af5261047..af180181e 100644 --- a/src/display/content_disposition.js +++ b/src/display/content_disposition.js @@ -78,24 +78,27 @@ function getFilenameFromContentDispositionHeader(contentDisposition) { } function textdecode(encoding, value) { if (encoding) { - if (!/^[^\x00-\xFF]+$/.test(value)) { + if (!/^[\x00-\xFF]+$/.test(value)) { return value; } try { let decoder = new TextDecoder(encoding, { fatal: true, }); let bytes = new Array(value.length); for (let i = 0; i < value.length; ++i) { - bytes[i] = value.charCodeAt(0); + bytes[i] = value.charCodeAt(i); } value = decoder.decode(new Uint8Array(bytes)); needsEncodingFixup = false; } catch (e) { // TextDecoder constructor threw - unrecognized encoding. - // Or TextDecoder API is not available. + // Or TextDecoder API is not available (in IE / Edge). if (/^utf-?8$/i.test(encoding)) { // UTF-8 is commonly used, try to support it in another way: - value = decodeURIComponent(escape(value)); - needsEncodingFixup = false; + try { + value = decodeURIComponent(escape(value)); + needsEncodingFixup = false; + } catch (err) { + } } } }