get_favicon_url: rewrite using DOMDocument

This commit is contained in:
Andrew Dolgov 2010-11-11 16:10:39 +03:00
parent fa40373395
commit ed2142982b
1 changed files with 18 additions and 24 deletions

View File

@ -397,37 +397,31 @@
*/ */
function get_favicon_url($url) { function get_favicon_url($url) {
$favicon_url = false;
if ($html = @fetch_file_contents($url)) { if ($html = @fetch_file_contents($url)) {
if ( preg_match('/<link[^>]+rel="(?:shortcut )?icon"[^>]+?href="([^"]+?)"/si', $html, $matches)) { libxml_use_internal_errors(true);
// Attempt to grab a favicon link from their webpage url
$linkUrl = html_entity_decode($matches[1]);
if (substr($linkUrl, 0, 1) == '/') { $doc = new DOMDocument();
$urlParts = parse_url($url); $doc->loadHTML($html);
$faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].$linkUrl; $xpath = new DOMXPath($doc);
} else if (substr($linkUrl, 0, 7) == 'http://') { $entries = $xpath->query('/html/head/link[@rel="shortcut icon"]');
$faviconURL = $linkUrl;
} else { if (count($entries) > 0) {
$pos = strrpos($url, "/"); foreach ($entries as $entry) {
// no "/" in url or "/" is part of "://" $favicon_url = rewrite_relative_url($url, $entry->getAttribute("href"));
if ($pos === false || $pos == (strpos($url, "://")+2)) { break;
$faviconURL = $url.'/'.$linkUrl;
} else {
$faviconURL = substr($url, 0, $pos+1).$linkUrl;
}
} }
}
} else {
// If unsuccessful, attempt to "guess" the favicon location
$urlParts = parse_url($url);
$faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].'/favicon.ico';
}
} }
if (!$favicon_url)
$favicon_url = rewrite_relative_url($url, "/favicon.ico");
// Run a test to see if what we have attempted to get actually exists. // Run a test to see if what we have attempted to get actually exists.
if(USE_CURL_FOR_ICONS || url_validate($faviconURL)) { if(USE_CURL_FOR_ICONS || url_validate($favicon_url)) {
return $faviconURL; return $favicon_url;
} else { } else {
return false; return false;
} }