diff --git a/extensions/chromium/.eslintrc b/extensions/chromium/.eslintrc index ba6fdbd4b..dd74b7b7c 100644 --- a/extensions/chromium/.eslintrc +++ b/extensions/chromium/.eslintrc @@ -14,4 +14,23 @@ "rules": { "no-var": "off", }, + + "overrides": [ + { + // Include all files referenced in background.js + "files": [ + "options/migration.js", + "preserve-referer.js", + "pdfHandler.js", + "extension-router.js", + "suppress-update.js", + "telemetry.js" + ], + "env": { + // Background script is a service worker. + "browser": false, + "serviceworker": true + } + } + ] } diff --git a/extensions/chromium/pdfHandler.html b/extensions/chromium/background.js similarity index 62% rename from extensions/chromium/pdfHandler.html rename to extensions/chromium/background.js index 7a5a036e9..bb448be54 100644 --- a/extensions/chromium/pdfHandler.html +++ b/extensions/chromium/background.js @@ -1,6 +1,5 @@ - - - - - - - - +*/ + +"use strict"; + +importScripts( + "options/migration.js", + "preserve-referer.js", + "pdfHandler.js", + "extension-router.js", + "suppress-update.js", + "telemetry.js" +); diff --git a/extensions/chromium/extension-router.js b/extensions/chromium/extension-router.js index 6d64b41ac..5bbb0d1a5 100644 --- a/extensions/chromium/extension-router.js +++ b/extensions/chromium/extension-router.js @@ -55,49 +55,50 @@ limitations under the License. return undefined; } - // TODO(rob): Use declarativeWebRequest once declared URL-encoding is - // supported, see http://crbug.com/273589 - // (or rewrite the query string parser in viewer.js to get it to - // recognize the non-URL-encoded PDF URL.) - chrome.webRequest.onBeforeRequest.addListener( - function (details) { + function resolveViewerURL(originalUrl) { + if (originalUrl.startsWith(CRX_BASE_URL)) { // This listener converts chrome-extension://.../http://...pdf to // chrome-extension://.../content/web/viewer.html?file=http%3A%2F%2F...pdf - var url = parseExtensionURL(details.url); + var url = parseExtensionURL(originalUrl); if (url) { url = VIEWER_URL + "?file=" + url; - var i = details.url.indexOf("#"); + var i = originalUrl.indexOf("#"); if (i > 0) { - url += details.url.slice(i); + url += originalUrl.slice(i); } - console.log("Redirecting " + details.url + " to " + url); - return { redirectUrl: url }; - } - return undefined; - }, - { - types: ["main_frame", "sub_frame"], - urls: schemes.map(function (scheme) { - // Format: "chrome-extension://[EXTENSIONID]/*" - return CRX_BASE_URL + scheme + "*"; - }), - }, - ["blocking"] - ); - - // When session restore is used, viewer pages may be loaded before the - // webRequest event listener is attached (= page not found). - // Or the extension could have been crashed (OOM), leaving a sad tab behind. - // Reload these tabs. - chrome.tabs.query( - { - url: CRX_BASE_URL + "*:*", - }, - function (tabsFromLastSession) { - for (const { id } of tabsFromLastSession) { - chrome.tabs.reload(id); + return url; } } + return undefined; + } + + self.addEventListener("fetch", event => { + const req = event.request; + if (req.destination === "document") { + var url = resolveViewerURL(req.url); + if (url) { + console.log("Redirecting " + req.url + " to " + url); + event.respondWith(Response.redirect(url)); + } + } + }); + + // Ctrl + F5 bypasses service worker. the pretty extension URLs will fail to + // resolve in that case. Catch this and redirect to destination. + chrome.webNavigation.onErrorOccurred.addListener( + details => { + if (details.frameId !== 0) { + // Not a top-level frame. Cannot easily navigate a specific child frame. + return; + } + const url = resolveViewerURL(details.url); + if (url) { + console.log(`Redirecting ${details.url} to ${url} (fallback)`); + chrome.tabs.update(details.tabId, { url }); + } + }, + { url: [{ urlPrefix: CRX_BASE_URL }] } ); + console.log("Set up extension URL router."); })(); diff --git a/extensions/chromium/manifest.json b/extensions/chromium/manifest.json index 8fd2649c1..bcba19296 100644 --- a/extensions/chromium/manifest.json +++ b/extensions/chromium/manifest.json @@ -1,6 +1,6 @@ { "minimum_chrome_version": "88", - "manifest_version": 2, + "manifest_version": 3, "name": "PDF Viewer", "version": "PDFJSSCRIPT_VERSION", "description": "Uses HTML5 to display PDF files directly in the browser.", @@ -13,12 +13,11 @@ "alarms", "declarativeNetRequestWithHostAccess", "webRequest", - "webRequestBlocking", - "", "tabs", "webNavigation", "storage" ], + "host_permissions": [""], "content_scripts": [ { "matches": ["http://*/*", "https://*/*", "file://*/*"], @@ -36,18 +35,24 @@ }, "options_page": "options/options.html", "background": { - "page": "pdfHandler.html" + "service_worker": "background.js" }, "incognito": "split", "web_accessible_resources": [ - "content/web/viewer.html", - "http:/*", - "https:/*", - "file:/*", - "chrome-extension:/*", - "blob:*", - "data:*", - "filesystem:/*", - "drive:*" + { + "resources": [ + "content/web/viewer.html", + "http:/*", + "https:/*", + "file:/*", + "chrome-extension:/*", + "blob:*", + "data:*", + "filesystem:/*", + "drive:*" + ], + "matches": [""], + "extension_ids": ["*"] + } ] } diff --git a/extensions/chromium/options/migration.js b/extensions/chromium/options/migration.js index dd8fb6ef7..9b084e45a 100644 --- a/extensions/chromium/options/migration.js +++ b/extensions/chromium/options/migration.js @@ -13,10 +13,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -/* eslint strict: ["error", "function"] */ +"use strict"; -(function () { - "use strict"; +chrome.runtime.onInstalled.addListener(({ reason }) => { + if (reason !== "update") { + // We only need to run migration logic for extension updates, not for new + // installs or browser updates. + return; + } var storageLocal = chrome.storage.local; var storageSync = chrome.storage.sync; @@ -37,16 +41,12 @@ limitations under the License. }); }); - function getStorageNames(callback) { - var x = new XMLHttpRequest(); + async function getStorageNames(callback) { var schema_location = chrome.runtime.getManifest().storage.managed_schema; - x.open("get", chrome.runtime.getURL(schema_location)); - x.onload = function () { - var storageKeys = Object.keys(x.response.properties); - callback(storageKeys); - }; - x.responseType = "json"; - x.send(); + var res = await fetch(chrome.runtime.getURL(schema_location)); + var storageManifest = await res.json(); + var storageKeys = Object.keys(storageManifest.properties); + callback(storageKeys); } // Save |values| to storage.sync and delete the values with that key from @@ -150,4 +150,4 @@ limitations under the License. } ); } -})(); +}); diff --git a/extensions/chromium/pdfHandler.js b/extensions/chromium/pdfHandler.js index 992ec883c..1e1629309 100644 --- a/extensions/chromium/pdfHandler.js +++ b/extensions/chromium/pdfHandler.js @@ -18,6 +18,197 @@ limitations under the License. var VIEWER_URL = chrome.runtime.getURL("content/web/viewer.html"); +// Use in-memory storage to ensure that the DNR rules have been registered at +// least once per session. runtime.onInstalled would have been the most fitting +// event to ensure that, except there are cases where it does not fire when +// needed. E.g. in incognito mode: https://issues.chromium.org/issues/41029550 +chrome.storage.session.get({ hasPdfRedirector: false }, async items => { + if (items?.hasPdfRedirector) { + return; + } + const rules = await chrome.declarativeNetRequest.getDynamicRules(); + if (rules.length) { + // Dynamic rules persist across extension updates. We don't expect other + // dynamic rules, so just remove them all. + await chrome.declarativeNetRequest.updateDynamicRules({ + removeRuleIds: rules.map(r => r.id), + }); + } + await registerPdfRedirectRule(); + + // Only set the flag in the end, so that we know for sure that all + // asynchronous initialization logic has run. If not, then we will run the + // logic again at the next background wakeup. + chrome.storage.session.set({ hasPdfRedirector: true }); +}); + +/** + * Registers declarativeNetRequest rules to redirect PDF requests to the viewer. + * The caller should clear any previously existing dynamic DNR rules. + * + * The logic here is the declarative version of the runtime logic in the + * webRequest.onHeadersReceived implementation at + * https://github.com/mozilla/pdf.js/blob/0676ea19cf17023ec8c2d6ad69a859c345c01dc1/extensions/chromium/pdfHandler.js#L34-L152 + */ +async function registerPdfRedirectRule() { + // "allow" means to ignore rules (from this extension) with lower priority. + const ACTION_IGNORE_OTHER_RULES = { type: "allow" }; + + // Redirect to viewer. The rule condition is expected to specify regexFilter + // that matches the full request URL. + const ACTION_REDIRECT_TO_VIEWER = { + type: "redirect", + redirect: { + // DNR does not support transformations such as encodeURIComponent on the + // match, so we just concatenate the URL as is without modifications. + // TODO: use "?file=\\0" when DNR supports transformations as proposed at + // https://github.com/w3c/webextensions/issues/636#issuecomment-2165978322 + regexSubstitution: VIEWER_URL + "?DNR:\\0", + }, + }; + + // Rules in order of prority (highest priority rule first). + // The required "id" fields will be auto-generated later. + const addRules = [ + { + // Do not redirect for URLs containing pdfjs.action=download. + condition: { + urlFilter: "pdfjs.action=download", + resourceTypes: ["main_frame", "sub_frame"], + }, + action: ACTION_IGNORE_OTHER_RULES, + }, + { + // Redirect local PDF files if isAllowedFileSchemeAccess is true. No-op + // otherwise and then handled by webNavigation.onBeforeNavigate below. + condition: { + regexFilter: "^file://.*\\.pdf$", + resourceTypes: ["main_frame", "sub_frame"], + }, + action: ACTION_REDIRECT_TO_VIEWER, + }, + { + // Respect the Content-Disposition:attachment header in sub_frame. But: + // Display the PDF viewer regardless of the Content-Disposition header if + // the file is displayed in the main frame, since most often users want to + // view a PDF, and servers are often misconfigured. + condition: { + urlFilter: "*", + resourceTypes: ["sub_frame"], // Note: no main_frame, handled below. + responseHeaders: [ + { + header: "content-disposition", + values: ["attachment*"], + }, + ], + }, + action: ACTION_IGNORE_OTHER_RULES, + }, + { + // If the query string contains "=download", do not unconditionally force + // viewer to open the PDF, but first check whether the Content-Disposition + // header specifies an attachment. This allows sites like Google Drive to + // operate correctly (#6106). + condition: { + urlFilter: "=download", + resourceTypes: ["main_frame"], // No sub_frame, was handled before. + responseHeaders: [ + { + header: "content-disposition", + values: ["attachment*"], + }, + ], + }, + action: ACTION_IGNORE_OTHER_RULES, + }, + { + // Regular http(s) PDF requests. + condition: { + regexFilter: "^.*$", + // The viewer does not have the original request context and issues a + // GET request. The original response to POST requests is unavailable. + excludedRequestMethods: ["post"], + resourceTypes: ["main_frame", "sub_frame"], + responseHeaders: [ + { + header: "content-type", + values: ["application/pdf", "application/pdf;*"], + }, + ], + }, + action: ACTION_REDIRECT_TO_VIEWER, + }, + { + // Wrong MIME-type, but a PDF file according to the file name in the URL. + condition: { + regexFilter: "^.*\\.pdf\\b.*$", + // The viewer does not have the original request context and issues a + // GET request. The original response to POST requests is unavailable. + excludedRequestMethods: ["post"], + resourceTypes: ["main_frame", "sub_frame"], + responseHeaders: [ + { + header: "content-type", + values: ["application/octet-stream", "application/octet-stream;*"], + }, + ], + }, + action: ACTION_REDIRECT_TO_VIEWER, + }, + { + // Wrong MIME-type, but a PDF file according to Content-Disposition. + condition: { + regexFilter: "^.*$", + // The viewer does not have the original request context and issues a + // GET request. The original response to POST requests is unavailable. + excludedRequestMethods: ["post"], + resourceTypes: ["main_frame", "sub_frame"], + responseHeaders: [ + { + header: "content-disposition", + values: ["*.pdf", '*.pdf"*', "*.pdf'*"], + }, + ], + // We only want to match by content-disposition if Content-Type is set + // to application/octet-stream. The responseHeaders condition is a + // logical OR instead of AND, so to simulate the AND condition we use + // the double negation of excludedResponseHeaders + excludedValues. + // This matches any request whose content-type header is set and not + // "application/octet-stream". It will also match if "content-type" is + // not set, but we are okay with that since the browser would usually + // try to sniff the MIME type in that case. + excludedResponseHeaders: [ + { + header: "content-type", + excludedValues: [ + "application/octet-stream", + "application/octet-stream;*", + ], + }, + ], + }, + action: ACTION_REDIRECT_TO_VIEWER, + }, + ]; + for (const [i, rule] of addRules.entries()) { + // id must be unique and at least 1, but i starts at 0. So add +1. + rule.id = i + 1; + rule.priority = addRules.length - i; + } + try { + await chrome.declarativeNetRequest.updateDynamicRules({ addRules }); + // Note: condition.responseHeaders is only supported in Chrome 128+, but + // does not trigger errors in Chrome 123 - 127 as explained at: + // https://github.com/w3c/webextensions/issues/638#issuecomment-2181124486 + // + // We do not bother with detecting that because we fall back to catching + // PDF documents via the content script. + } catch (e) { + console.error("Failed to register rules to redirect PDF requests."); + console.error(e); + } +} + function getViewerURL(pdf_url) { // |pdf_url| may contain a fragment such as "#page=2". That should be passed // as a fragment to the viewer, not encoded in pdf_url. @@ -30,171 +221,42 @@ function getViewerURL(pdf_url) { return VIEWER_URL + "?file=" + encodeURIComponent(pdf_url) + hash; } -/** - * @param {Object} details First argument of the webRequest.onHeadersReceived - * event. The property "url" is read. - * @returns {boolean} True if the PDF file should be downloaded. - */ -function isPdfDownloadable(details) { - if (details.url.includes("pdfjs.action=download")) { - return true; - } - // Display the PDF viewer regardless of the Content-Disposition header if the - // file is displayed in the main frame, since most often users want to view - // a PDF, and servers are often misconfigured. - // If the query string contains "=download", do not unconditionally force the - // viewer to open the PDF, but first check whether the Content-Disposition - // header specifies an attachment. This allows sites like Google Drive to - // operate correctly (#6106). - if (details.type === "main_frame" && !details.url.includes("=download")) { - return false; - } - var cdHeader = - details.responseHeaders && - getHeaderFromHeaders(details.responseHeaders, "content-disposition"); - return cdHeader && /^attachment/i.test(cdHeader.value); -} - -/** - * Get the header from the list of headers for a given name. - * @param {Array} headers responseHeaders of webRequest.onHeadersReceived - * @returns {undefined|{name: string, value: string}} The header, if found. - */ -function getHeaderFromHeaders(headers, headerName) { - for (const header of headers) { - if (header.name.toLowerCase() === headerName) { - return header; - } - } - return undefined; -} - -/** - * Check if the request is a PDF file. - * @param {Object} details First argument of the webRequest.onHeadersReceived - * event. The properties "responseHeaders" and "url" - * are read. - * @returns {boolean} True if the resource is a PDF file. - */ -function isPdfFile(details) { - var header = getHeaderFromHeaders(details.responseHeaders, "content-type"); - if (header) { - var headerValue = header.value.toLowerCase().split(";", 1)[0].trim(); - if (headerValue === "application/pdf") { - return true; - } - if (headerValue === "application/octet-stream") { - if (details.url.toLowerCase().indexOf(".pdf") > 0) { - return true; - } - var cdHeader = getHeaderFromHeaders( - details.responseHeaders, - "content-disposition" - ); - if (cdHeader && /\.pdf(["']|$)/i.test(cdHeader.value)) { - return true; - } - } - } - return false; -} - -/** - * Takes a set of headers, and set "Content-Disposition: attachment". - * @param {Object} details First argument of the webRequest.onHeadersReceived - * event. The property "responseHeaders" is read and - * modified if needed. - * @returns {Object|undefined} The return value for the onHeadersReceived event. - * Object with key "responseHeaders" if the headers - * have been modified, undefined otherwise. - */ -function getHeadersWithContentDispositionAttachment(details) { - var headers = details.responseHeaders; - var cdHeader = getHeaderFromHeaders(headers, "content-disposition"); - if (!cdHeader) { - cdHeader = { name: "Content-Disposition" }; - headers.push(cdHeader); - } - if (!/^attachment/i.test(cdHeader.value)) { - cdHeader.value = "attachment" + cdHeader.value.replace(/^[^;]+/i, ""); - return { responseHeaders: headers }; - } - return undefined; -} - -chrome.webRequest.onHeadersReceived.addListener( +// If the user has not granted access to file:-URLs, then declarativeNetRequest +// will not catch the request. It is still visible through the webNavigation +// API though, and we can replace the tab with the viewer. +// The viewer will detect that it has no access to file:-URLs, and prompt the +// user to activate file permissions. +chrome.webNavigation.onBeforeNavigate.addListener( function (details) { - if (details.method !== "GET") { - // Don't intercept POST requests until http://crbug.com/104058 is fixed. - return undefined; - } - if (!isPdfFile(details)) { - return undefined; - } - if (isPdfDownloadable(details)) { - // Force download by ensuring that Content-Disposition: attachment is set - return getHeadersWithContentDispositionAttachment(details); - } + // Note: pdfjs.action=download is not checked here because that code path + // is not reachable for local files through the viewer when we do not have + // file:-access. + if (details.frameId === 0) { + chrome.extension.isAllowedFileSchemeAccess(function (isAllowedAccess) { + if (isAllowedAccess) { + // Expected to be handled by DNR. Don't do anything. + return; + } - var viewerUrl = getViewerURL(details.url); - - return { redirectUrl: viewerUrl }; - }, - { - urls: [""], - types: ["main_frame", "sub_frame"], - }, - ["blocking", "responseHeaders"] -); - -chrome.webRequest.onBeforeRequest.addListener( - function (details) { - if (isPdfDownloadable(details)) { - return undefined; - } - - var viewerUrl = getViewerURL(details.url); - - return { redirectUrl: viewerUrl }; - }, - { - urls: ["file://*/*.pdf", "file://*/*.PDF"], - types: ["main_frame", "sub_frame"], - }, - ["blocking"] -); - -chrome.extension.isAllowedFileSchemeAccess(function (isAllowedAccess) { - if (isAllowedAccess) { - return; - } - // If the user has not granted access to file:-URLs, then the webRequest API - // will not catch the request. It is still visible through the webNavigation - // API though, and we can replace the tab with the viewer. - // The viewer will detect that it has no access to file:-URLs, and prompt the - // user to activate file permissions. - chrome.webNavigation.onBeforeNavigate.addListener( - function (details) { - if (details.frameId === 0 && !isPdfDownloadable(details)) { chrome.tabs.update(details.tabId, { url: getViewerURL(details.url), }); - } - }, - { - url: [ - { - urlPrefix: "file://", - pathSuffix: ".pdf", - }, - { - urlPrefix: "file://", - pathSuffix: ".PDF", - }, - ], + }); } - ); -}); + }, + { + url: [ + { + urlPrefix: "file://", + pathSuffix: ".pdf", + }, + { + urlPrefix: "file://", + pathSuffix: ".PDF", + }, + ], + } +); chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) { if (message && message.action === "getParentOrigin") { diff --git a/extensions/chromium/suppress-update.js b/extensions/chromium/suppress-update.js index 009651d9e..f2dfcc3e0 100644 --- a/extensions/chromium/suppress-update.js +++ b/extensions/chromium/suppress-update.js @@ -20,7 +20,10 @@ limitations under the License. // viewer is not displaying any PDF files. Otherwise the tabs would close, which // is quite disruptive (crbug.com/511670). chrome.runtime.onUpdateAvailable.addListener(function () { - if (chrome.extension.getViews({ type: "tab" }).length === 0) { + chrome.tabs.query({ url: chrome.runtime.getURL("*") }, tabs => { + if (tabs?.length) { + return; + } chrome.runtime.reload(); - } + }); }); diff --git a/web/chromecom.js b/web/chromecom.js index 4f72b2327..1ece30479 100644 --- a/web/chromecom.js +++ b/web/chromecom.js @@ -31,7 +31,11 @@ if (typeof PDFJSDev === "undefined" || !PDFJSDev.test("CHROME")) { // is rewritten as soon as possible. const queryString = document.location.search.slice(1); const m = /(^|&)file=([^&]*)/.exec(queryString); - const defaultUrl = m ? decodeURIComponent(m[2]) : ""; + let defaultUrl = m ? decodeURIComponent(m[2]) : ""; + if (!defaultUrl && queryString.startsWith("DNR:")) { + // Redirected via DNR, see registerPdfRedirectRule in pdfHandler.js. + defaultUrl = queryString.slice(4); + } // Example: chrome-extension://.../http://example.com/file.pdf const humanReadableUrl = "/" + defaultUrl + location.hash;