DiskCache: properly deal with srcset attributes

This commit is contained in:
Andrew Dolgov 2020-04-29 19:29:36 +03:00
parent e934e9f05e
commit 3a4b9249a9
2 changed files with 61 additions and 29 deletions

View File

@ -79,6 +79,7 @@ class DiskCache {
// check for locally cached (media) URLs and rewrite to local versions // check for locally cached (media) URLs and rewrite to local versions
// this is called separately after sanitize() and plugin render article hooks to allow // this is called separately after sanitize() and plugin render article hooks to allow
// plugins work on original source URLs used before caching // plugins work on original source URLs used before caching
// NOTE: URLs should be already absolutized because this is called after sanitize()
static public function rewriteUrls($str) static public function rewriteUrls($str)
{ {
$res = trim($str); $res = trim($str);
@ -89,29 +90,45 @@ class DiskCache {
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
$cache = new DiskCache("images"); $cache = new DiskCache("images");
$entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video[@src]|//video/source[@src]|//audio/source[@src])'); $entries = $xpath->query('(//img[@src]|//source[@src|@srcset]|//video[@poster|@src])');
$need_saving = false; $need_saving = false;
foreach ($entries as $entry) { foreach ($entries as $entry) {
foreach (array('src', 'poster') as $attr) { foreach (array('src', 'poster') as $attr) {
if ($entry->hasAttribute($attr)) { if ($entry->hasAttribute($attr)) {
// should be already absolutized because this is called after sanitize() $url = $entry->getAttribute($attr);
$src = $entry->getAttribute($attr); $cached_filename = sha1($url);
$cached_filename = sha1($src);
if ($cache->exists($cached_filename)) { if ($cache->exists($cached_filename)) {
$url = $cache->getUrl($cached_filename);
$src = $cache->getUrl(sha1($src)); $entry->setAttribute($attr, $url);
$entry->setAttribute($attr, $src);
$entry->removeAttribute("srcset"); $entry->removeAttribute("srcset");
$need_saving = true; $need_saving = true;
} }
} }
} }
if ($entry->hasAttribute("srcset")) {
$tokens = explode(",", $entry->getAttribute('srcset'));
for ($i = 0; $i < count($tokens); $i++) {
$token = trim($tokens[$i]);
list ($url, $width) = explode(" ", $token, 2);
$cached_filename = sha1($url);
if ($cache->exists($cached_filename)) {
$tokens[$i] = $cache->getUrl($cached_filename) . " " . $width;
$need_saving = true;
}
}
$entry->setAttribute("srcset", implode(", ", $tokens));
}
} }
if ($need_saving) { if ($need_saving) {

View File

@ -1226,6 +1226,32 @@ class RSSUtils {
} }
} }
static function cache_media_url($cache, $url, $site_url) {
$url = rewrite_relative_url($site_url, $url);
$local_filename = sha1($url);
Debug::log("cache_media: checking $url", Debug::$LOG_VERBOSE);
if (!$cache->exists($local_filename)) {
Debug::log("cache_media: downloading: $url to $local_filename", Debug::$LOG_VERBOSE);
global $fetch_last_error_code;
global $fetch_last_error;
$file_content = fetch_file_contents(array("url" => $url,
"http_referrer" => $url,
"max_size" => MAX_CACHE_FILE_SIZE));
if ($file_content) {
$cache->put($local_filename, $file_content);
} else {
Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error");
}
} else if ($cache->isWritable($local_filename)) {
$cache->touch($local_filename);
}
}
static function cache_media($html, $site_url) { static function cache_media($html, $site_url) {
$cache = new DiskCache("images"); $cache = new DiskCache("images");
@ -1234,35 +1260,24 @@ class RSSUtils {
if ($doc->loadHTML($html)) { if ($doc->loadHTML($html)) {
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])|(//video[@poster])|(//video[@src])'); $entries = $xpath->query('(//img[@src]|//source[@src|@srcset]|//video[@poster|@src])');
foreach ($entries as $entry) { foreach ($entries as $entry) {
foreach (array('src', 'poster') as $attr) { foreach (array('src', 'poster') as $attr) {
if ($entry->hasAttribute($attr) && strpos($entry->getAttribute($attr), "data:") !== 0) { if ($entry->hasAttribute($attr) && strpos($entry->getAttribute($attr), "data:") !== 0) {
$src = rewrite_relative_url($site_url, $entry->getAttribute($attr)); RSSUtils::cache_media_url($cache, $entry->getAttribute($attr), $site_url);
}
}
$local_filename = sha1($src); if ($entry->hasAttribute("srcset")) {
$tokens = explode(",", $entry->getAttribute('srcset'));
Debug::log("cache_media: checking $src", Debug::$LOG_VERBOSE); for ($i = 0; $i < count($tokens); $i++) {
$token = trim($tokens[$i]);
if (!$cache->exists($local_filename)) { list ($url, $width) = explode(" ", $token, 2);
Debug::log("cache_media: downloading: $src to $local_filename", Debug::$LOG_VERBOSE);
global $fetch_last_error_code; RSSUtils::cache_media_url($cache, $url, $site_url);
global $fetch_last_error;
$file_content = fetch_file_contents(array("url" => $src,
"http_referrer" => $src,
"max_size" => MAX_CACHE_FILE_SIZE));
if ($file_content) {
$cache->put($local_filename, $file_content);
} else {
Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error");
}
} else if ($cache->isWritable($local_filename)) {
$cache->touch($local_filename);
}
} }
} }
} }