geturl: if head request is denied because host is still living in 20th century, try requesting body (thanks to incompetent admins of arxiv.org)
This commit is contained in:
parent
aa9f7d4447
commit
fafac207c5
|
@ -360,7 +360,7 @@
|
||||||
|
|
||||||
$fetch_curl_used = true;
|
$fetch_curl_used = true;
|
||||||
|
|
||||||
if (ini_get("safe_mode") || ini_get("open_basedir")) {
|
if (ini_get("safe_mode") || ini_get("open_basedir") || defined("FORCE_GETURL")) {
|
||||||
$new_url = geturl($url);
|
$new_url = geturl($url);
|
||||||
if (!$new_url) {
|
if (!$new_url) {
|
||||||
// geturl has already populated $fetch_last_error
|
// geturl has already populated $fetch_last_error
|
||||||
|
|
|
@ -2209,7 +2209,7 @@
|
||||||
return in_array($interface, class_implements($class));
|
return in_array($interface, class_implements($class));
|
||||||
}
|
}
|
||||||
|
|
||||||
function geturl($url, $depth = 0){
|
function geturl($url, $depth = 0, $nobody = true){
|
||||||
|
|
||||||
if ($depth == 20) return $url;
|
if ($depth == 20) return $url;
|
||||||
|
|
||||||
|
@ -2230,7 +2230,7 @@
|
||||||
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0 Firefox/5.0');
|
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0 Firefox/5.0');
|
||||||
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
|
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
|
||||||
curl_setopt($curl, CURLOPT_HEADER, true);
|
curl_setopt($curl, CURLOPT_HEADER, true);
|
||||||
curl_setopt($curl, CURLOPT_NOBODY, true);
|
curl_setopt($curl, CURLOPT_NOBODY, $nobody);
|
||||||
curl_setopt($curl, CURLOPT_REFERER, $url);
|
curl_setopt($curl, CURLOPT_REFERER, $url);
|
||||||
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
|
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
|
||||||
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
|
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
|
||||||
|
@ -2252,6 +2252,13 @@
|
||||||
$status = curl_getinfo($curl);
|
$status = curl_getinfo($curl);
|
||||||
|
|
||||||
if($status['http_code']!=200){
|
if($status['http_code']!=200){
|
||||||
|
|
||||||
|
// idiot site not allowing http get
|
||||||
|
if($status['http_code'] == 405) {
|
||||||
|
curl_close($curl);
|
||||||
|
return geturl($url, $depth +1, false);
|
||||||
|
}
|
||||||
|
|
||||||
if($status['http_code'] == 301 || $status['http_code'] == 302) {
|
if($status['http_code'] == 301 || $status['http_code'] == 302) {
|
||||||
curl_close($curl);
|
curl_close($curl);
|
||||||
list($header) = explode("\r\n\r\n", $html, 2);
|
list($header) = explode("\r\n\r\n", $html, 2);
|
||||||
|
|
Loading…
Reference in New Issue