From fafac207c5a7bb610c4a2ce473cc677eeca92e4e Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 20 Aug 2014 12:01:41 +0400 Subject: [PATCH] geturl: if head request is denied because host is still living in 20th century, try requesting body (thanks to incompetent admins of arxiv.org) --- include/functions.php | 2 +- include/functions2.php | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/functions.php b/include/functions.php index c595944e1..ac8fb6302 100644 --- a/include/functions.php +++ b/include/functions.php @@ -360,7 +360,7 @@ $fetch_curl_used = true; - if (ini_get("safe_mode") || ini_get("open_basedir")) { + if (ini_get("safe_mode") || ini_get("open_basedir") || defined("FORCE_GETURL")) { $new_url = geturl($url); if (!$new_url) { // geturl has already populated $fetch_last_error diff --git a/include/functions2.php b/include/functions2.php index 9d8bc76aa..555aa04e9 100644 --- a/include/functions2.php +++ b/include/functions2.php @@ -2209,7 +2209,7 @@ return in_array($interface, class_implements($class)); } - function geturl($url, $depth = 0){ + function geturl($url, $depth = 0, $nobody = true){ if ($depth == 20) return $url; @@ -2230,7 +2230,7 @@ curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0 Firefox/5.0'); curl_setopt($curl, CURLOPT_HTTPHEADER, $header); curl_setopt($curl, CURLOPT_HEADER, true); - curl_setopt($curl, CURLOPT_NOBODY, true); + curl_setopt($curl, CURLOPT_NOBODY, $nobody); curl_setopt($curl, CURLOPT_REFERER, $url); curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate'); curl_setopt($curl, CURLOPT_AUTOREFERER, true); @@ -2252,6 +2252,13 @@ $status = curl_getinfo($curl); if($status['http_code']!=200){ + + // idiot site not allowing http get + if($status['http_code'] == 405) { + curl_close($curl); + return geturl($url, $depth +1, false); + } + if($status['http_code'] == 301 || $status['http_code'] == 302) { curl_close($curl); list($header) = explode("\r\n\r\n", $html, 2);