geturl: if head request is denied because host is still living in 20th century, try requesting body (thanks to incompetent admins of arxiv.org)

This commit is contained in:
Andrew Dolgov 2014-08-20 12:01:41 +04:00
parent aa9f7d4447
commit fafac207c5
2 changed files with 10 additions and 3 deletions

View File

@ -360,7 +360,7 @@
$fetch_curl_used = true;
if (ini_get("safe_mode") || ini_get("open_basedir")) {
if (ini_get("safe_mode") || ini_get("open_basedir") || defined("FORCE_GETURL")) {
$new_url = geturl($url);
if (!$new_url) {
// geturl has already populated $fetch_last_error

View File

@ -2209,7 +2209,7 @@
return in_array($interface, class_implements($class));
}
function geturl($url, $depth = 0){
function geturl($url, $depth = 0, $nobody = true){
if ($depth == 20) return $url;
@ -2230,7 +2230,7 @@
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0 Firefox/5.0');
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_HEADER, true);
curl_setopt($curl, CURLOPT_NOBODY, true);
curl_setopt($curl, CURLOPT_NOBODY, $nobody);
curl_setopt($curl, CURLOPT_REFERER, $url);
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
@ -2252,6 +2252,13 @@
$status = curl_getinfo($curl);
if($status['http_code']!=200){
// idiot site not allowing http get
if($status['http_code'] == 405) {
curl_close($curl);
return geturl($url, $depth +1, false);
}
if($status['http_code'] == 301 || $status['http_code'] == 302) {
curl_close($curl);
list($header) = explode("\r\n\r\n", $html, 2);