mirror of
https://github.com/mozilla/pdf.js.git
synced 2025-04-19 14:48:08 +02:00
Transform Web Archive URLs to avoid downloading an HTML page instead of the PDF file
Moreover, adjust one linked test case that did not conform to the standard Web Archive URL format and adjust one linked test case because the link was dead.
This commit is contained in:
parent
3717757b39
commit
f73c9b75d9
3 changed files with 17 additions and 2 deletions
|
@ -22,7 +22,22 @@ var crypto = require('crypto');
|
|||
var http = require('http');
|
||||
var https = require('https');
|
||||
|
||||
function rewriteWebArchiveUrl(url) {
|
||||
// Web Archive URLs need to be transformed to add `if_` after the ID.
|
||||
// Without this, an HTML page containing an iframe with the PDF file
|
||||
// will be served instead (issue 8920).
|
||||
var webArchiveRegex =
|
||||
/(^https?:\/\/web\.archive\.org\/web\/)(\d+)(\/https?:\/\/.+)/g;
|
||||
var urlParts = webArchiveRegex.exec(url);
|
||||
if (urlParts) {
|
||||
return urlParts[1] + (urlParts[2] + 'if_') + urlParts[3];
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function downloadFile(file, url, callback, redirects) {
|
||||
url = rewriteWebArchiveUrl(url);
|
||||
|
||||
var completed = false;
|
||||
var protocol = /^https:\/\//.test(url) ? https : http;
|
||||
protocol.get(url, function (response) {
|
||||
|
|
|
@ -1 +1 @@
|
|||
http://web.archive.org/web/20150212141833/http://geothermal.inel.gov/publications/future_of_geothermal_energy.pdf
|
||||
https://web.archive.org/web/20170930174755/https://www.pdf-archive.com/2017/09/30/future-of-geothermal-energy/future-of-geothermal-energy.pdf
|
||||
|
|
|
@ -1 +1 @@
|
|||
http://web.archive.org/save/_embed/http://210.243.166.143/prob1.pdf
|
||||
https://web.archive.org/web/20170930161657/http://210.243.166.143/prob1.pdf
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue