diff --git a/extensions/chromium/contentscript.js b/extensions/chromium/contentscript.js index 70fa38701..6114c7085 100644 --- a/extensions/chromium/contentscript.js +++ b/extensions/chromium/contentscript.js @@ -23,6 +23,9 @@ function getViewerURL(pdf_url) { } document.addEventListener("animationstart", onAnimationStart, true); +if (document.contentType === "application/pdf") { + chrome.runtime.sendMessage({ action: "canRequestBody" }, maybeRenderPdfDoc); +} function onAnimationStart(event) { if (event.animationName === "pdfjs-detected-object-or-embed") { @@ -221,3 +224,22 @@ function getEmbeddedViewerURL(path) { path = a.href; return getViewerURL(path) + fragment; } + +function maybeRenderPdfDoc(isNotPOST) { + if (!isNotPOST) { + // The document was loaded through a POST request, but we cannot access the + // original response body, nor safely send a new request to fetch the PDF. + // Until #4483 is fixed, POST requests should be ignored. + return; + } + + // Detected PDF that was not redirected by the declarativeNetRequest rules. + // Maybe because this was served without Content-Type and sniffed as PDF. + // Or because this is Chrome 127-, which does not support responseHeaders + // condition in declarativeNetRequest (DNR), and PDF requests are therefore + // not redirected via DNR. + + // In any case, load the viewer. + console.log(`Detected PDF via document, opening viewer for ${document.URL}`); + location.href = getEmbeddedViewerURL(document.URL); +} diff --git a/extensions/chromium/pdfHandler.js b/extensions/chromium/pdfHandler.js index 1e1629309..0fd71e9f1 100644 --- a/extensions/chromium/pdfHandler.js +++ b/extensions/chromium/pdfHandler.js @@ -14,6 +14,8 @@ See the License for the specific language governing permissions and limitations under the License. */ +/* globals canRequestBody */ // From preserve-referer.js + "use strict"; var VIEWER_URL = chrome.runtime.getURL("content/web/viewer.html"); @@ -202,7 +204,7 @@ async function registerPdfRedirectRule() { // https://github.com/w3c/webextensions/issues/638#issuecomment-2181124486 // // We do not bother with detecting that because we fall back to catching - // PDF documents via the content script. + // PDF documents via maybeRenderPdfDoc in contentscript.js. } catch (e) { console.error("Failed to register rules to redirect PDF requests."); console.error(e); @@ -303,6 +305,11 @@ chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) { url, }); } + return undefined; + } + if (message && message.action === "canRequestBody") { + sendResponse(canRequestBody(sender.tab.id, sender.frameId)); + return undefined; } return undefined; }); diff --git a/extensions/chromium/preserve-referer.js b/extensions/chromium/preserve-referer.js index 3bdbff9c6..b352dee36 100644 --- a/extensions/chromium/preserve-referer.js +++ b/extensions/chromium/preserve-referer.js @@ -30,6 +30,8 @@ limitations under the License. * See setReferer in chromecom.js for more explanation of this logic. */ +/* exported canRequestBody */ // Used in pdfHandler.js + // g_referrers[tabId][frameId] = referrer of PDF frame. var g_referrers = {}; var g_referrerTimers = {}; @@ -38,14 +40,18 @@ var g_referrerTimers = {}; // from being kept in memory for too long, cap the data duration to 5 minutes. var REFERRER_IN_MEMORY_TIME = 300000; +// g_postRequests[tabId] = Set of frameId that were loaded via POST. +var g_postRequests = {}; + var rIsReferer = /^referer$/i; chrome.webRequest.onSendHeaders.addListener( function saveReferer(details) { - const { tabId, frameId, requestHeaders } = details; + const { tabId, frameId, requestHeaders, method } = details; g_referrers[tabId] ??= {}; g_referrers[tabId][frameId] = requestHeaders.find(h => rIsReferer.test(h.name) )?.value; + setCanRequestBody(tabId, frameId, method !== "GET"); forgetReferrerEventually(tabId); }, { urls: ["*://*/*"], types: ["main_frame", "sub_frame"] }, @@ -59,9 +65,30 @@ function forgetReferrerEventually(tabId) { g_referrerTimers[tabId] = setTimeout(() => { delete g_referrers[tabId]; delete g_referrerTimers[tabId]; + delete g_postRequests[tabId]; }, REFERRER_IN_MEMORY_TIME); } +// Keeps track of whether a document in tabId + frameId is loaded through a +// POST form submission. Although this logic has nothing to do with referrer +// tracking, it is still here to enable re-use of the webRequest listener above. +function setCanRequestBody(tabId, frameId, isPOST) { + if (isPOST) { + g_postRequests[tabId] ??= new Set(); + g_postRequests[tabId].add(frameId); + } else { + g_postRequests[tabId]?.delete(frameId); + } +} + +function canRequestBody(tabId, frameId) { + // Returns true unless the frame is known to be loaded through a POST request. + // If the background suspends, the information may be lost. This is acceptable + // because the information is only potentially needed shortly after document + // load, by contentscript.js. + return !g_postRequests[tabId]?.has(frameId); +} + // This method binds a webRequest event handler which adds the Referer header // to matching PDF resource requests (only if the Referer is non-empty). The // handler is removed as soon as the PDF viewer frame is unloaded.