From 882311d9ad0092127ccaecd700cb659cebb89aa8 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 15 May 2012 12:06:52 +0400 Subject: [PATCH] get favicon from icon atom feeds & search for icons not only in html head (closes #457) --- include/functions.php | 118 +++++++++++++++++++++++------------------- include/rssfuncs.php | 9 +++- 2 files changed, 72 insertions(+), 55 deletions(-) diff --git a/include/functions.php b/include/functions.php index 27c24f8ed..8f31241b8 100644 --- a/include/functions.php +++ b/include/functions.php @@ -332,9 +332,9 @@ * @access public * @return mixed The favicon URL, or false if none was found. */ - function get_favicon_url($url) { + function get_favicon_urls($url) { - $favicon_url = false; + $favicons = array(); if ($html = @fetch_file_contents($url)) { @@ -350,68 +350,78 @@ break; } - $entries = $xpath->query('/html/head/link[@rel="shortcut icon" or @rel="icon"]'); + # Limiting the search to head will not find when in body + $entries = $xpath->query('//link[@rel="shortcut icon" or @rel="icon"]'); if (count($entries) > 0) { foreach ($entries as $entry) { - $favicon_url = rewrite_relative_url($url, $entry->getAttribute("href")); - break; + array_push($favicons, rewrite_relative_url($url, $entry->getAttribute("href"))); } } } - if (!$favicon_url) - $favicon_url = rewrite_relative_url($url, "/favicon.ico"); + array_push($favicons, rewrite_relative_url($url, "/favicon.ico")); - return $favicon_url; - } // function get_favicon_url + return $favicons; + } // function get_favicon_urls - function check_feed_favicon($site_url, $feed, $link) { + function validate_favicon($url, $feed) { + // Limiting to "image" type misses those served with text/plain + $contents = fetch_file_contents($url); // , "image"); + + if (!$contents) + return false; + + // Crude image type matching. + // Patterns gleaned from the file(1) source code. + if (preg_match('/^\x00\x00\x01\x00/', $contents)) { + // 0 string \000\000\001\000 MS Windows icon resource + //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource"); + } + elseif (preg_match('/^GIF8/', $contents)) { + // 0 string GIF8 GIF image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image"); + } + elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) { + // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image"); + } + elseif (preg_match('/^\xff\xd8/', $contents)) { + // 0 beshort 0xffd8 JPEG image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image"); + } + else { + //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type"); + return false; + } + + $icon_file = ICONS_DIR . "/$feed.ico"; + $fp = @fopen($icon_file, "w"); + if ($fp) { + fwrite($fp, $contents); + fclose($fp); + chmod($icon_file, 0644); + } + return true; + } + + function check_feed_favicon($site_url, $feed, $link, $atom_icon) { # print "FAVICON [$site_url]: $favicon_url\n"; - $icon_file = ICONS_DIR . "/$feed.ico"; - - if (!file_exists($icon_file)) { - $favicon_url = get_favicon_url($site_url); - - if ($favicon_url) { - // Limiting to "image" type misses those served with text/plain - $contents = fetch_file_contents($favicon_url); // , "image"); - - if ($contents) { - // Crude image type matching. - // Patterns gleaned from the file(1) source code. - if (preg_match('/^\x00\x00\x01\x00/', $contents)) { - // 0 string \000\000\001\000 MS Windows icon resource - //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource"); - } - elseif (preg_match('/^GIF8/', $contents)) { - // 0 string GIF8 GIF image data - //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image"); - } - elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) { - // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data - //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image"); - } - elseif (preg_match('/^\xff\xd8/', $contents)) { - // 0 beshort 0xffd8 JPEG image data - //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image"); - } - else { - //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type"); - $contents = ""; - } - } - - if ($contents) { - $fp = @fopen($icon_file, "w"); - - if ($fp) { - fwrite($fp, $contents); - fclose($fp); - chmod($icon_file, 0644); - } - } - } + if(!empty($atom_icon) && validate_favicon($atom_icon, $feed)) + return; + + $favicon_urls = array($atom_icon); + + $favicon_urls = array_unique(array_merge($favicon_urls, get_favicon_urls($site_url))); + for ($i = 1; $i < count($favicon_urls); $i++) { + if (validate_favicon($favicon_urls[$i], $feed)) + return; + } + + $favicon_urls = array_unique(array_merge($favicon_urls, get_favicon_urls(rewrite_relative_url($link, "/")))); + for (; $i < count($favicon_urls); $i++) { + if (validate_favicon($favicon_urls[$i], $feed)) + return; } } diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 48a246d88..cb985197c 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -441,7 +441,14 @@ _debug("update_rss_feed: checking favicon..."); } - check_feed_favicon($site_url, $feed, $link); + if (!file_exists(ICONS_DIR . "/$feed.ico")) { + if ($use_simplepie) { + $atom_icon = $rss->get_favicon(); + } else { + $atom_icon = $rss->channel["icon"]; + } + check_feed_favicon($site_url, $feed, $link, $atom_icon); + } if (!$registered_title || $registered_title == "[Unknown]") {