when determining feed-specific favicon, instead of using first match or generic fallback, go through entire list of determined favicon URLs

This commit is contained in:
Andrew Dolgov 2022-05-28 22:27:23 +03:00
parent 09fb2273e6
commit b0059d3f88
1 changed files with 100 additions and 63 deletions

View File

@ -1673,65 +1673,79 @@ class RSSUtils {
static function update_favicon(string $site_url, int $feed) { static function update_favicon(string $site_url, int $feed) {
$icon_file = Config::get(Config::ICONS_DIR) . "/$feed.ico"; $icon_file = Config::get(Config::ICONS_DIR) . "/$feed.ico";
$favicon_url = self::get_favicon_url($site_url); $favicon_urls = self::get_favicon_urls($site_url);
if (!$favicon_url) {
Debug::log("favicon: couldn't find favicon URL in $site_url", Debug::LOG_VERBOSE); if (count($favicon_urls) == 0) {
Debug::log("favicon: couldn't find any favicon URLs for $site_url", Debug::LOG_VERBOSE);
return false; return false;
} }
// Limiting to "image" type misses those served with text/plain // i guess we'll have to go through all of them until something looks valid...
$contents = UrlHelper::fetch([ foreach ($favicon_urls as $favicon_url) {
'url' => $favicon_url,
'max_size' => Config::get(Config::MAX_FAVICON_FILE_SIZE), // Limiting to "image" type misses those served with text/plain
//'type' => 'image', $contents = UrlHelper::fetch([
]); 'url' => $favicon_url,
if (!$contents) { 'max_size' => Config::get(Config::MAX_FAVICON_FILE_SIZE),
Debug::log("favicon: fetching $favicon_url failed", Debug::LOG_VERBOSE); //'type' => 'image',
return false; ]);
if (!$contents) {
Debug::log("favicon: fetching $favicon_url failed, skipping...", Debug::LOG_VERBOSE);
break;
}
// TODO: we could use mime_conent_type() here instead of below hacks but we'll need to
// save every favicon to disk and go from there.
// also, if SVG is allowed in the future, we'll need to specifically forbid 'image/svg+xml'.
// Crude image type matching.
// Patterns gleaned from the file(1) source code.
if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
// 0 string \000\000\001\000 MS Windows icon resource
//error_log("update_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
}
elseif (preg_match('/^GIF8/', $contents)) {
// 0 string GIF8 GIF image data
//error_log("update_favicon: favicon_url=$favicon_url isa GIF image");
}
elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
// 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
//error_log("update_favicon: favicon_url=$favicon_url isa PNG image");
}
elseif (preg_match('/^\xff\xd8/', $contents)) {
// 0 beshort 0xffd8 JPEG image data
//error_log("update_favicon: favicon_url=$favicon_url isa JPG image");
}
elseif (preg_match('/^BM/', $contents)) {
// 0 string BM PC bitmap (OS2, Windows BMP files)
//error_log("update_favicon, favicon_url=$favicon_url isa BMP image");
}
else {
//error_log("update_favicon: favicon_url=$favicon_url isa UNKNOWN type");
Debug::log("favicon $favicon_url type is unknown, skipping...", Debug::LOG_VERBOSE);
break;
}
Debug::log("favicon: $favicon_url looks valid, saving to $icon_file", Debug::LOG_VERBOSE);
$fp = @fopen($icon_file, "w");
if ($fp) {
fwrite($fp, $contents);
fclose($fp);
chmod($icon_file, 0644);
clearstatcache();
return $icon_file;
} else {
Debug::log("favicon: failed to open $icon_file for writing", Debug::LOG_VERBOSE);
}
} }
// Crude image type matching. return false;
// Patterns gleaned from the file(1) source code.
if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
// 0 string \000\000\001\000 MS Windows icon resource
//error_log("update_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
}
elseif (preg_match('/^GIF8/', $contents)) {
// 0 string GIF8 GIF image data
//error_log("update_favicon: favicon_url=$favicon_url isa GIF image");
}
elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
// 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
//error_log("update_favicon: favicon_url=$favicon_url isa PNG image");
}
elseif (preg_match('/^\xff\xd8/', $contents)) {
// 0 beshort 0xffd8 JPEG image data
//error_log("update_favicon: favicon_url=$favicon_url isa JPG image");
}
elseif (preg_match('/^BM/', $contents)) {
// 0 string BM PC bitmap (OS2, Windows BMP files)
//error_log("update_favicon, favicon_url=$favicon_url isa BMP image");
}
else {
//error_log("update_favicon: favicon_url=$favicon_url isa UNKNOWN type");
Debug::log("favicon $favicon_url type is unknown (not updating)", Debug::LOG_VERBOSE);
return false;
}
Debug::log("favicon: saving to $icon_file", Debug::LOG_VERBOSE);
$fp = @fopen($icon_file, "w");
if (!$fp) {
Debug::log("favicon: failed to open $icon_file for writing", Debug::LOG_VERBOSE);
return false;
}
fwrite($fp, $contents);
fclose($fp);
chmod($icon_file, 0644);
clearstatcache();
return $icon_file;
} }
static function is_gzipped(string $feed_data): bool { static function is_gzipped(string $feed_data): bool {
@ -1853,17 +1867,33 @@ class RSSUtils {
} }
/** /**
* Try to determine the favicon URL for a feed. * Returns first determined favicon URL for a feed.
* adapted from wordpress favicon plugin by Jeff Minard (http://thecodepro.com/)
* http://dev.wp-plugins.org/file/favatars/trunk/favatars.php
*
* @param string $url A feed or page URL * @param string $url A feed or page URL
* @access public * @access public
* @return false|string The favicon URL string, or false if none was found. * @return false|string The favicon URL string, or false if none was found.
*/ */
static function get_favicon_url(string $url) { static function get_favicon_url(string $url) {
$favicon_url = false; $favicon_urls = self::get_favicon_urls($url);
if (count($favicon_urls) > 0)
return $favicon_urls[0];
else
return false;
}
/**
* Try to determine all favicon URLs for a feed.
* adapted from wordpress favicon plugin by Jeff Minard (http://thecodepro.com/)
* http://dev.wp-plugins.org/file/favatars/trunk/favatars.php
*
* @param string $url A feed or page URL
* @access public
* @return array<string> List of all determined favicon URLs or an empty array
*/
static function get_favicon_urls(string $url) : array {
$favicon_urls = [];
if ($html = @UrlHelper::fetch($url)) { if ($html = @UrlHelper::fetch($url)) {
@ -1877,20 +1907,27 @@ class RSSUtils {
break; break;
} }
$entries = $xpath->query('/html/head/link[@rel="shortcut icon" or @rel="icon"]'); $entries = $xpath->query('/html/head/link[@rel="shortcut icon" or @rel="icon" or @rel="alternate icon"]');
if (count($entries) > 0) { if (count($entries) > 0) {
foreach ($entries as $entry) { foreach ($entries as $entry) {
$favicon_url = UrlHelper::rewrite_relative($url, $entry->getAttribute("href")); $favicon_url = UrlHelper::rewrite_relative($url, $entry->getAttribute("href"));
break;
if ($favicon_url)
array_push($favicon_urls, $favicon_url);
} }
} }
} }
} }
if (!$favicon_url) if (count($favicon_urls) == 0) {
$favicon_url = UrlHelper::rewrite_relative($url, "/favicon.ico"); $favicon_url = UrlHelper::rewrite_relative($url, "/favicon.ico");
return $favicon_url; if ($favicon_url)
array_push($favicon_urls, $favicon_url);
}
return $favicon_urls;
} }
/** /**