DiskCache: properly deal with srcset attributes

This commit is contained in:
Andrew Dolgov 2020-04-29 19:29:36 +03:00
parent e934e9f05e
commit 3a4b9249a9
2 changed files with 61 additions and 29 deletions

View File

@ -79,6 +79,7 @@ class DiskCache {
// check for locally cached (media) URLs and rewrite to local versions
// this is called separately after sanitize() and plugin render article hooks to allow
// plugins work on original source URLs used before caching
// NOTE: URLs should be already absolutized because this is called after sanitize()
static public function rewriteUrls($str)
{
$res = trim($str);
@ -89,29 +90,45 @@ class DiskCache {
$xpath = new DOMXPath($doc);
$cache = new DiskCache("images");
$entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video[@src]|//video/source[@src]|//audio/source[@src])');
$entries = $xpath->query('(//img[@src]|//source[@src|@srcset]|//video[@poster|@src])');
$need_saving = false;
foreach ($entries as $entry) {
foreach (array('src', 'poster') as $attr) {
if ($entry->hasAttribute($attr)) {
// should be already absolutized because this is called after sanitize()
$src = $entry->getAttribute($attr);
$cached_filename = sha1($src);
$url = $entry->getAttribute($attr);
$cached_filename = sha1($url);
if ($cache->exists($cached_filename)) {
$url = $cache->getUrl($cached_filename);
$src = $cache->getUrl(sha1($src));
$entry->setAttribute($attr, $src);
$entry->setAttribute($attr, $url);
$entry->removeAttribute("srcset");
$need_saving = true;
}
}
}
if ($entry->hasAttribute("srcset")) {
$tokens = explode(",", $entry->getAttribute('srcset'));
for ($i = 0; $i < count($tokens); $i++) {
$token = trim($tokens[$i]);
list ($url, $width) = explode(" ", $token, 2);
$cached_filename = sha1($url);
if ($cache->exists($cached_filename)) {
$tokens[$i] = $cache->getUrl($cached_filename) . " " . $width;
$need_saving = true;
}
}
$entry->setAttribute("srcset", implode(", ", $tokens));
}
}
if ($need_saving) {

View File

@ -1226,6 +1226,32 @@ class RSSUtils {
}
}
static function cache_media_url($cache, $url, $site_url) {
$url = rewrite_relative_url($site_url, $url);
$local_filename = sha1($url);
Debug::log("cache_media: checking $url", Debug::$LOG_VERBOSE);
if (!$cache->exists($local_filename)) {
Debug::log("cache_media: downloading: $url to $local_filename", Debug::$LOG_VERBOSE);
global $fetch_last_error_code;
global $fetch_last_error;
$file_content = fetch_file_contents(array("url" => $url,
"http_referrer" => $url,
"max_size" => MAX_CACHE_FILE_SIZE));
if ($file_content) {
$cache->put($local_filename, $file_content);
} else {
Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error");
}
} else if ($cache->isWritable($local_filename)) {
$cache->touch($local_filename);
}
}
static function cache_media($html, $site_url) {
$cache = new DiskCache("images");
@ -1234,35 +1260,24 @@ class RSSUtils {
if ($doc->loadHTML($html)) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])|(//video[@poster])|(//video[@src])');
$entries = $xpath->query('(//img[@src]|//source[@src|@srcset]|//video[@poster|@src])');
foreach ($entries as $entry) {
foreach (array('src', 'poster') as $attr) {
if ($entry->hasAttribute($attr) && strpos($entry->getAttribute($attr), "data:") !== 0) {
$src = rewrite_relative_url($site_url, $entry->getAttribute($attr));
RSSUtils::cache_media_url($cache, $entry->getAttribute($attr), $site_url);
}
}
$local_filename = sha1($src);
if ($entry->hasAttribute("srcset")) {
$tokens = explode(",", $entry->getAttribute('srcset'));
Debug::log("cache_media: checking $src", Debug::$LOG_VERBOSE);
for ($i = 0; $i < count($tokens); $i++) {
$token = trim($tokens[$i]);
if (!$cache->exists($local_filename)) {
Debug::log("cache_media: downloading: $src to $local_filename", Debug::$LOG_VERBOSE);
list ($url, $width) = explode(" ", $token, 2);
global $fetch_last_error_code;
global $fetch_last_error;
$file_content = fetch_file_contents(array("url" => $src,
"http_referrer" => $src,
"max_size" => MAX_CACHE_FILE_SIZE));
if ($file_content) {
$cache->put($local_filename, $file_content);
} else {
Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error");
}
} else if ($cache->isWritable($local_filename)) {
$cache->touch($local_filename);
}
RSSUtils::cache_media_url($cache, $url, $site_url);
}
}
}