From cb401af6f6f278b94b664569796c49f045e3ffde Mon Sep 17 00:00:00 2001 From: wn_ Date: Sun, 21 Feb 2021 18:40:43 +0000 Subject: [PATCH 1/3] Let 'RSSUtils::check_feed_favicon' update existing favicons. --- classes/rssutils.php | 114 ++++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/classes/rssutils.php b/classes/rssutils.php index 5fb2e7712..13f63bc7a 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -594,7 +594,7 @@ class RSSUtils { $favicon_file = ICONS_DIR . "/$feed.ico"; $favicon_modified = file_exists($favicon_file) ? filemtime($favicon_file) : -1; - Debug::log("checking favicon...", Debug::$LOG_VERBOSE); + Debug::log("checking favicon for feed $feed...", Debug::$LOG_VERBOSE); self::check_feed_favicon($site_url, $feed); $favicon_modified_new = file_exists($favicon_file) ? filemtime($favicon_file) : -1; @@ -1643,58 +1643,72 @@ class RSSUtils { } static function check_feed_favicon($site_url, $feed) { - # print "FAVICON [$site_url]: $favicon_url\n"; - $icon_file = ICONS_DIR . "/$feed.ico"; - if (!file_exists($icon_file)) { - $favicon_url = self::get_favicon_url($site_url); - - if ($favicon_url) { - // Limiting to "image" type misses those served with text/plain - $contents = UrlHelper::fetch($favicon_url); // , "image"); - - if ($contents) { - // Crude image type matching. - // Patterns gleaned from the file(1) source code. - if (preg_match('/^\x00\x00\x01\x00/', $contents)) { - // 0 string \000\000\001\000 MS Windows icon resource - //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource"); - } - elseif (preg_match('/^GIF8/', $contents)) { - // 0 string GIF8 GIF image data - //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image"); - } - elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) { - // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data - //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image"); - } - elseif (preg_match('/^\xff\xd8/', $contents)) { - // 0 beshort 0xffd8 JPEG image data - //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image"); - } - elseif (preg_match('/^BM/', $contents)) { - // 0 string BM PC bitmap (OS2, Windows BMP files) - //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image"); - } - else { - //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type"); - $contents = ""; - } - } - - if ($contents) { - $fp = @fopen($icon_file, "w"); - - if ($fp) { - fwrite($fp, $contents); - fclose($fp); - chmod($icon_file, 0644); - } - } - } - return $icon_file; + $favicon_url = self::get_favicon_url($site_url); + if (!$favicon_url) { + Debug::log("couldn't find favicon URL in $site_url", Debug::$LOG_VERBOSE); + return false; } + + // Limiting to "image" type misses those served with text/plain + $contents = UrlHelper::fetch(['url' => $favicon_url]); // , "image"); + if (!$contents) { + Debug::log("fetching favicon $favicon_url failed", Debug::$LOG_VERBOSE); + return false; + } + + $original_contents = file_exists($icon_file) ? file_get_contents($icon_file) : null; + if ($original_contents) { + if (strcmp($contents, $original_contents) === 0) { + Debug::log("favicon content has not changed", Debug::$LOG_VERBOSE); + return $icon_file; + } + Debug::log("favicon content has changed", Debug::$LOG_VERBOSE); + } + + // Crude image type matching. + // Patterns gleaned from the file(1) source code. + if (preg_match('/^\x00\x00\x01\x00/', $contents)) { + // 0 string \000\000\001\000 MS Windows icon resource + //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource"); + } + elseif (preg_match('/^GIF8/', $contents)) { + // 0 string GIF8 GIF image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image"); + } + elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) { + // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image"); + } + elseif (preg_match('/^\xff\xd8/', $contents)) { + // 0 beshort 0xffd8 JPEG image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image"); + } + elseif (preg_match('/^BM/', $contents)) { + // 0 string BM PC bitmap (OS2, Windows BMP files) + //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image"); + } + else { + //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type"); + Debug::log("favicon $favicon_url type is unknown (not updating)", Debug::$LOG_VERBOSE); + return false; + } + + Debug::log("setting contents of $icon_file", Debug::$LOG_VERBOSE); + + $fp = @fopen($icon_file, "w"); + if (!$fp) { + Debug::log("failed to open $icon_file for writing", Debug::$LOG_VERBOSE); + return false; + } + + fwrite($fp, $contents); + fclose($fp); + chmod($icon_file, 0644); + clearstatcache(); + + return $icon_file; } static function is_gzipped($feed_data) { From 02a9485966dbbac1ed52ecbfb29fcc15125cba43 Mon Sep 17 00:00:00 2001 From: wn_ Date: Sun, 21 Feb 2021 23:30:31 +0000 Subject: [PATCH 2/3] Try to limit max favicon size, don't store current/old in a var. --- classes/rssutils.php | 12 ++++++++---- include/functions.php | 2 ++ utils/phpstan_tunables.php | 2 ++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/classes/rssutils.php b/classes/rssutils.php index 13f63bc7a..6785ab3f5 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -1652,15 +1652,19 @@ class RSSUtils { } // Limiting to "image" type misses those served with text/plain - $contents = UrlHelper::fetch(['url' => $favicon_url]); // , "image"); + $contents = UrlHelper::fetch([ + 'url' => $favicon_url, + 'max_size' => MAX_FAVICON_FILE_SIZE, + //'type' => 'image', + ]); if (!$contents) { Debug::log("fetching favicon $favicon_url failed", Debug::$LOG_VERBOSE); return false; } - $original_contents = file_exists($icon_file) ? file_get_contents($icon_file) : null; - if ($original_contents) { - if (strcmp($contents, $original_contents) === 0) { + $original_contents_md5 = file_exists($icon_file) ? md5_file($icon_file) : null; + if ($original_contents_md5) { + if (md5($contents) == $original_contents_md5) { Debug::log("favicon content has not changed", Debug::$LOG_VERBOSE); return $icon_file; } diff --git a/include/functions.php b/include/functions.php index 174ef39f0..df8730aca 100644 --- a/include/functions.php +++ b/include/functions.php @@ -68,6 +68,8 @@ // do not cache files larger than that (bytes) define_default('MAX_DOWNLOAD_FILE_SIZE', 16*1024*1024); // do not download general files larger than that (bytes) + define_default('MAX_FAVICON_FILE_SIZE', 1*1024*1024); + // do not download favicon files larger than that (bytes) define_default('CACHE_MAX_DAYS', 7); // max age in days for various automatically cached (temporary) files define_default('MAX_CONDITIONAL_INTERVAL', 3600*12); diff --git a/utils/phpstan_tunables.php b/utils/phpstan_tunables.php index 7d5d8f03a..e192bcdba 100644 --- a/utils/phpstan_tunables.php +++ b/utils/phpstan_tunables.php @@ -27,6 +27,8 @@ // do not cache files larger than that (bytes) define('MAX_DOWNLOAD_FILE_SIZE', 16*1024*1024); // do not download general files larger than that (bytes) + define('MAX_FAVICON_FILE_SIZE', 1*1024*1024); + // do not download favicon files larger than that (bytes) define('CACHE_MAX_DAYS', 7); // max age in days for various automatically cached (temporary) files define('MAX_CONDITIONAL_INTERVAL', 3600*12); From 6fbf7ef368520f8f4d2c07c153d1429b2f4ff5e3 Mon Sep 17 00:00:00 2001 From: wn_ Date: Mon, 22 Feb 2021 12:06:27 +0000 Subject: [PATCH 3/3] Remove check against the old file in 'RSSUtils::check_feed_favicon'. --- classes/rssutils.php | 9 --------- 1 file changed, 9 deletions(-) diff --git a/classes/rssutils.php b/classes/rssutils.php index 6785ab3f5..30d08328f 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -1662,15 +1662,6 @@ class RSSUtils { return false; } - $original_contents_md5 = file_exists($icon_file) ? md5_file($icon_file) : null; - if ($original_contents_md5) { - if (md5($contents) == $original_contents_md5) { - Debug::log("favicon content has not changed", Debug::$LOG_VERBOSE); - return $icon_file; - } - Debug::log("favicon content has changed", Debug::$LOG_VERBOSE); - } - // Crude image type matching. // Patterns gleaned from the file(1) source code. if (preg_match('/^\x00\x00\x01\x00/', $contents)) {