Merge branch 'master' of git.tt-rss.org:fox/tt-rss

This commit is contained in:
Andrew Dolgov 2019-08-14 08:25:09 +03:00
commit 65450f8a2b
10 changed files with 305 additions and 241 deletions

View File

@ -379,7 +379,7 @@ class API extends Handler {
$article = $p->hook_render_article_api(array("article" => $article));
}
$article['content'] = rewrite_cached_urls($article['content']);
$article['content'] = DiskCache::rewriteUrls($article['content']);
array_push($articles, $article);
@ -801,7 +801,7 @@ class API extends Handler {
$headline_row = $p->hook_render_article_api(array("headline" => $headline_row));
}
$headline_row['content'] = rewrite_cached_urls($headline_row['content']);
$headline_row['content'] = DiskCache::rewriteUrls($headline_row['content']);
array_push($headlines, $headline_row);
}

View File

@ -446,7 +446,7 @@ class Article extends Handler_Protected {
foreach ($result as $line) {
foreach (PluginHost::getInstance()->get_hooks(PluginHost::HOOK_ENCLOSURE_ENTRY) as $plugin) {
$line = $plugin->hook_enclosure_entry($line);
$line = $plugin->hook_enclosure_entry($line, $id);
}
$url = $line["content_url"];
@ -676,7 +676,7 @@ class Article extends Handler_Protected {
while ($line = $sth->fetch()) {
if (file_exists(CACHE_DIR . '/images/' . sha1($line["content_url"]))) {
$line["content_url"] = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($line["content_url"]);
$line["content_url"] = DiskCache::getUrl(sha1($line["content_url"]));
}
array_push($rv, $line);

149
classes/diskcache.php Normal file
View File

@ -0,0 +1,149 @@
<?php
class DiskCache {
private $dir;
public function __construct($dir) {
$this->dir = CACHE_DIR . "/" . basename($dir);
}
public function getDir() {
return $this->dir;
}
public function makeDir() {
if (!is_dir($this->dir)) {
return mkdir($this->dir);
}
}
public function isWritable($filename = "") {
if ($filename) {
if (file_exists($this->getFullPath($filename)))
return is_writable($this->getFullPath($filename));
else
return is_writable($this->dir);
} else {
return is_writable($this->dir);
}
}
public function exists($filename) {
return file_exists($this->getFullPath($filename));
}
public function getSize($filename) {
if ($this->exists($filename))
return filesize($this->getFullPath($filename));
else
return -1;
}
public function getFullPath($filename) {
$filename = basename($filename);
return $this->dir . "/" . $filename;
}
public function put($filename, $data) {
return file_put_contents($this->getFullPath($filename), $data);
}
public function touch($filename) {
return touch($this->getFullPath($filename));
}
public function get($filename) {
if ($this->exists($filename))
return file_get_contents($this->getFullPath($filename));
else
return null;
}
public function getMimeType($filename) {
if ($this->exists($filename))
return mime_content_type($this->getFullPath($filename));
else
return null;
}
public function send($filename) {
header("Content-Disposition: inline; filename=\"$filename\"");
return send_local_file($this->getFullPath($filename));
}
static public function getUrl($filename) {
return get_self_url_prefix() . "/public.php?op=cached_url&file=" . $filename;
}
// check for locally cached (media) URLs and rewrite to local versions
// this is called separately after sanitize() and plugin render article hooks to allow
// plugins work on original source URLs used before caching
static public function rewriteUrls($str)
{
$res = trim($str);
if (!$res) return '';
$doc = new DOMDocument();
if ($doc->loadHTML('<?xml encoding="UTF-8">' . $res)) {
$xpath = new DOMXPath($doc);
$cache = new DiskCache("images");
$entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video/source[@src]|//audio/source[@src])');
$need_saving = false;
foreach ($entries as $entry) {
if ($entry->hasAttribute('src') || $entry->hasAttribute('poster')) {
// should be already absolutized because this is called after sanitize()
$src = $entry->hasAttribute('poster') ? $entry->getAttribute('poster') : $entry->getAttribute('src');
$cached_filename = sha1($src);
if ($cache->getSize($cached_filename) > 0) {
$src = DiskCache::getUrl(sha1($src));
if ($entry->hasAttribute('poster'))
$entry->setAttribute('poster', $src);
else
$entry->setAttribute('src', $src);
$need_saving = true;
}
}
}
if ($need_saving) {
$doc->removeChild($doc->firstChild); //remove doctype
$res = $doc->saveHTML();
}
}
return $res;
}
static function expire() {
$dirs = array_filter(glob(CACHE_DIR . "/*"), "is_dir");
foreach ($dirs as $cache_dir) {
$num_deleted = 0;
if (is_writable($cache_dir) && !file_exists("$cache_dir/.no-auto-expiry")) {
$files = glob("$cache_dir/*");
if ($files) {
foreach ($files as $file) {
if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
unlink($file);
++$num_deleted;
}
}
}
Debug::log("Expired $cache_dir: removed $num_deleted files.");
}
}
}
}

View File

@ -307,7 +307,7 @@ class Feeds extends Handler_Protected {
$line = $p->hook_render_article_cdm($line);
}
$line['content'] = rewrite_cached_urls($line['content']);
$line['content'] = DiskCache::rewriteUrls($line['content']);
if ($line['note'])
$line['note'] = Article::format_article_note($id, $line['note']);

View File

@ -382,7 +382,7 @@ class Handler_Public extends Handler {
$line = $p->hook_render_article($line);
}
$line['content'] = rewrite_cached_urls($line['content']);
$line['content'] = DiskCache::rewriteUrls($line['content']);
$enclosures = Article::get_article_enclosures($line["id"]);
@ -1202,26 +1202,23 @@ class Handler_Public extends Handler {
}
function cached_url() {
@$req_filename = basename($_GET['hash']);
$filename = $_GET['file'];
// we don't need an extension to find the file, hash is a complete URL
$hash = preg_replace("/\.[^\.]*$/", "", $req_filename);
if (strpos($filename, "/") !== FALSE) {
list ($cache_dir, $filename) = explode("/", $filename, 2);
} else {
$cache_dir = "images";
}
if ($hash) {
$filename = CACHE_DIR . '/images/' . $hash;
if (file_exists($filename)) {
header("Content-Disposition: inline; filename=\"$req_filename\"");
send_local_file($filename);
$cache = new DiskCache($cache_dir);
if ($cache->exists($filename)) {
$cache->send($filename);
} else {
header($_SERVER["SERVER_PROTOCOL"]." 404 Not Found");
echo "File not found.";
}
}
}
private function make_article_tag_uri($id, $timestamp) {

View File

@ -470,4 +470,8 @@ class PluginHost {
function get_filter_actions() {
return $this->plugin_actions;
}
function get_owner_uid() {
return $this->owner_uid;
}
}

View File

@ -871,7 +871,7 @@ class RSSUtils {
$entry_ref_id = $ref_id;
if (RSSUtils::find_article_filter($article_filters, "filter")) {
Debug::log("article is filtered out, nothing to do.");
Debug::log("article is filtered out, nothing to do.", Debug::$LOG_VERBOSE);
$pdo->commit();
continue;
}
@ -1284,32 +1284,6 @@ class RSSUtils {
Debug::log("Removed $num_deleted old lock files.");
}
static function expire_cached_files() {
foreach (array("feeds", "images", "export", "upload") as $dir) {
$cache_dir = CACHE_DIR . "/$dir";
Debug::log("Expiring $cache_dir", Debug::$LOG_VERBOSE);
$num_deleted = 0;
if (is_writable($cache_dir)) {
$files = glob("$cache_dir/*");
if ($files) {
foreach ($files as $file) {
if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
unlink($file);
++$num_deleted;
}
}
}
}
Debug::log("$cache_dir: removed $num_deleted files.");
}
}
/**
* Source: http://www.php.net/manual/en/function.parse-url.php#104527
* Returns the url query as associative array
@ -1498,7 +1472,8 @@ class RSSUtils {
}
static function housekeeping_common() {
RSSUtils::expire_cached_files();
DiskCache::expire();
RSSUtils::expire_lock_files();
RSSUtils::expire_error_log();
RSSUtils::expire_feed_archive();

View File

@ -1233,64 +1233,6 @@
return false;
}
// check for locally cached (media) URLs and rewrite to local versions
// this is called separately after sanitize() and plugin render article hooks to allow
// plugins work on original source URLs used before caching
function rewrite_cached_urls($str) {
$res = trim($str); if (!$res) return '';
$doc = new DOMDocument();
$doc->loadHTML('<?xml encoding="UTF-8">' . $res);
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video/source[@src]|//audio/source[@src])');
$need_saving = false;
foreach ($entries as $entry) {
if ($entry->hasAttribute('src') || $entry->hasAttribute('poster')) {
// should be already absolutized because this is called after sanitize()
$src = $entry->hasAttribute('poster') ? $entry->getAttribute('poster') : $entry->getAttribute('src');
$cached_filename = CACHE_DIR . '/images/' . sha1($src);
if (file_exists($cached_filename)) {
// this is strictly cosmetic
if ($entry->tagName == 'img') {
$suffix = ".png";
} else if ($entry->parentNode && $entry->parentNode->tagName == "picture") {
$suffix = ".png";
} else if ($entry->parentNode && $entry->parentNode->tagName == "video") {
$suffix = ".mp4";
} else if ($entry->parentNode && $entry->parentNode->tagName == "audio") {
$suffix = ".ogg";
} else {
$suffix = "";
}
$src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix;
if ($entry->hasAttribute('poster'))
$entry->setAttribute('poster', $src);
else
$entry->setAttribute('src', $src);
$need_saving = true;
}
}
}
if ($need_saving) {
$doc->removeChild($doc->firstChild); //remove doctype
$res = $doc->saveHTML();
}
return $res;
}
function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) {
if (!$owner) $owner = $_SESSION["uid"];
@ -1315,9 +1257,6 @@
if ($entry->hasAttribute('src')) {
$src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src'));
// cache stuff has gone to rewrite_cached_urls()
$entry->setAttribute('src', $src);
}

View File

@ -4,6 +4,9 @@ class Af_Zz_ImgProxy extends Plugin {
/* @var PluginHost $host */
private $host;
/* @var DiskCache $cache */
private $cache;
function about() {
return array(1.0,
"Load insecure images via built-in proxy",
@ -18,6 +21,7 @@ class Af_Zz_ImgProxy extends Plugin {
function init($host) {
$this->host = $host;
$this->cache = new DiskCache("images");
$host->add_hook($host::HOOK_RENDER_ARTICLE, $this);
$host->add_hook($host::HOOK_RENDER_ARTICLE_CDM, $this);
@ -50,16 +54,10 @@ class Af_Zz_ImgProxy extends Plugin {
return;
}
$local_filename = CACHE_DIR . "/images/" . sha1($url);
if ($_REQUEST["debug"] == "1") { print $url . "\n" . $local_filename; die; }
header("Content-Disposition: inline; filename=\"".basename($local_filename)."\"");
if (file_exists($local_filename)) {
send_local_file($local_filename);
$local_filename = sha1($url);
if ($this->cache->exists($local_filename)) {
$this->cache->send($local_filename);
} else {
$data = fetch_file_contents(["url" => $url, "max_size" => MAX_CACHE_FILE_SIZE]);
@ -68,8 +66,8 @@ class Af_Zz_ImgProxy extends Plugin {
$disable_cache = $this->host->get($this, "disable_cache");
if (!$disable_cache && strlen($data) > MIN_CACHE_FILE_SIZE) {
if (file_put_contents($local_filename, $data)) {
$mimetype = mime_content_type($local_filename);
if ($this->cache->put($local_filename, $data)) {
$mimetype = $this->cache->getMimeType($local_filename);
header("Content-type: $mimetype");
}
}
@ -110,7 +108,7 @@ class Af_Zz_ImgProxy extends Plugin {
}
}
function rewrite_url_if_needed($url, $all_remote = false) {
private function rewrite_url_if_needed($url, $all_remote = false) {
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($all_remote) {

View File

@ -1,82 +1,34 @@
<?php
class Cache_Starred_Images extends Plugin implements IHandler {
class Cache_Starred_Images extends Plugin {
/* @var PluginHost $host */
private $host;
private $cache_dir;
/* @var DiskCache $cache */
private $cache;
private $max_cache_attempts = 5; // per-article
function about() {
return array(1.0,
"Automatically cache Starred articles' images and HTML5 video files",
"fox",
true);
}
/**
* @SuppressWarnings(PHPMD.UnusedFormalParameter)
*/
function csrf_ignore($method) {
return false;
}
/**
* @SuppressWarnings(PHPMD.UnusedFormalParameter)
*/
function before($method) {
return true;
}
function after() {
return true;
"Automatically cache media files in Starred articles",
"fox");
}
function init($host) {
$this->host = $host;
$this->cache = new DiskCache("starred-images");
$this->cache_dir = CACHE_DIR . "/starred-images/";
if ($this->cache->makeDir())
chmod($this->cache->getDir(), 0777);
if (!is_dir($this->cache_dir)) {
mkdir($this->cache_dir);
}
if (!$this->cache->exists(".no-auto-expiry"))
$this->cache->touch(".no-auto-expiry");
if (is_dir($this->cache_dir)) {
if (!is_writable($this->cache_dir))
chmod($this->cache_dir, 0777);
if (is_writable($this->cache_dir)) {
$host->add_hook($host::HOOK_UPDATE_TASK, $this);
if ($this->cache->isWritable()) {
$host->add_hook($host::HOOK_HOUSE_KEEPING, $this);
$host->add_hook($host::HOOK_ENCLOSURE_ENTRY, $this);
$host->add_hook($host::HOOK_SANITIZE, $this);
$host->add_handler("public", "cache_starred_images_getimage", $this);
} else {
user_error("Starred cache directory is not writable.", E_USER_WARNING);
}
} else {
user_error("Unable to create starred cache directory.", E_USER_WARNING);
}
}
function cache_starred_images_getimage() {
ob_end_clean();
$hash = basename($_REQUEST["hash"]);
if ($hash) {
$filename = $this->cache_dir . "/" . basename($hash);
if (file_exists($filename)) {
header("Content-Disposition: attachment; filename=\"$hash\"");
send_local_file($filename);
} else {
header($_SERVER["SERVER_PROTOCOL"]." 404 Not Found");
echo "File not found.";
}
user_error("Starred cache directory ".$this->cache->getDir()." is not writable.", E_USER_WARNING);
}
}
@ -84,7 +36,45 @@ class Cache_Starred_Images extends Plugin implements IHandler {
* @SuppressWarnings(PHPMD.UnusedLocalVariable)
*/
function hook_house_keeping() {
$files = glob($this->cache_dir . "/*.{png,mp4,status}", GLOB_BRACE);
/* since HOOK_UPDATE_TASK is not available to user plugins, this hook is a next best thing */
Debug::log("caching media of starred articles for user " . $this->host->get_owner_uid() . "...");
$sth = $this->pdo->prepare("SELECT content, ttrss_entries.title,
ttrss_user_entries.owner_uid, link, site_url, ttrss_entries.id, plugin_data
FROM ttrss_entries, ttrss_user_entries LEFT JOIN ttrss_feeds ON
(ttrss_user_entries.feed_id = ttrss_feeds.id)
WHERE ref_id = ttrss_entries.id AND
marked = true AND
site_url != '' AND
ttrss_user_entries.owner_uid = ? AND
plugin_data NOT LIKE '%starred_cache_images%'
ORDER BY ".sql_random_function()." LIMIT 100");
if ($sth->execute([$this->host->get_owner_uid()])) {
$usth = $this->pdo->prepare("UPDATE ttrss_entries SET plugin_data = ? WHERE id = ?");
while ($line = $sth->fetch()) {
Debug::log("processing article " . $line["title"], Debug::$LOG_VERBOSE);
if ($line["site_url"]) {
$success = $this->cache_article_images($line["content"], $line["site_url"], $line["owner_uid"], $line["id"]);
if ($success) {
$plugin_data = "starred_cache_images,${line['owner_uid']}:" . $line["plugin_data"];
$usth->execute([$plugin_data, $line['id']]);
}
}
}
}
/* actual housekeeping */
Debug::log("expiring " . $this->cache->getDir() . "...");
$files = glob($this->cache->getDir() . "/*.{png,mp4,status}", GLOB_BRACE);
$last_article_id = 0;
$article_exists = 1;
@ -107,6 +97,16 @@ class Cache_Starred_Images extends Plugin implements IHandler {
}
}
function hook_enclosure_entry($enc, $article_id) {
$local_filename = $article_id . "-" . sha1($enc["content_url"]);
if ($this->cache->exists($local_filename)) {
$enc["content_url"] = DiskCache::getUrl("starred-images/" . $local_filename);
}
return $enc;
}
/**
* @SuppressWarnings(PHPMD.UnusedFormalParameter)
*/
@ -120,15 +120,12 @@ class Cache_Starred_Images extends Plugin implements IHandler {
if ($entry->hasAttribute('src')) {
$src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
$extension = $entry->tagName == 'source' ? '.mp4' : '.png';
$local_filename = $this->cache_dir . $article_id . "-" . sha1($src) . $extension;
$local_filename = $article_id . "-" . sha1($src);
if (file_exists($local_filename)) {
$entry->setAttribute("src", get_self_url_prefix() .
"/public.php?op=cache_starred_images_getimage&method=image&hash=" .
$article_id . "-" . sha1($src) . $extension);
if ($this->cache->exists($local_filename)) {
$entry->setAttribute("src", DiskCache::getUrl("starred-images/" . $local_filename));
$entry->removeAttribute("srcset");
}
}
}
}
@ -136,42 +133,46 @@ class Cache_Starred_Images extends Plugin implements IHandler {
return $doc;
}
function hook_update_task() {
$res = $this->pdo->query("SELECT content, ttrss_user_entries.owner_uid, link, site_url, ttrss_entries.id, plugin_data
FROM ttrss_entries, ttrss_user_entries LEFT JOIN ttrss_feeds ON
(ttrss_user_entries.feed_id = ttrss_feeds.id)
WHERE ref_id = ttrss_entries.id AND
marked = true AND
(UPPER(content) LIKE '%<IMG%' OR UPPER(content) LIKE '%<VIDEO%') AND
site_url != '' AND
plugin_data NOT LIKE '%starred_cache_images%'
ORDER BY ".sql_random_function()." LIMIT 100");
private function cache_url($article_id, $url) {
$local_filename = $article_id . "-" . sha1($url);
$usth = $this->pdo->prepare("UPDATE ttrss_entries SET plugin_data = ? WHERE id = ?");
if (!$this->cache->getSize($local_filename) >= 0) {
Debug::log("cache_images: downloading: $url to $local_filename", Debug::$LOG_VERBOSE);
while ($line = $res->fetch()) {
if ($line["site_url"]) {
$success = $this->cache_article_images($line["content"], $line["site_url"], $line["owner_uid"], $line["id"]);
$data = fetch_file_contents(["url" => $url, "max_size" => MAX_CACHE_FILE_SIZE]);
if ($success) {
$plugin_data = "starred_cache_images,${line['owner_uid']}:" . $line["plugin_data"];
$usth->execute([$plugin_data, $line['id']]);
if ($data) {
if (strlen($data) > MIN_CACHE_FILE_SIZE) {
$this->cache->put($local_filename, $data);
}
return true;
}
} else {
//Debug::log("cache_images: local file exists for $url", Debug::$LOG_VERBOSE);
return true;
}
return false;
}
/**
* @SuppressWarnings(PHPMD.UnusedFormalParameter)
*/
function cache_article_images($content, $site_url, $owner_uid, $article_id) {
$status_filename = $this->cache_dir . $article_id . "-" . sha1($site_url) . ".status";
private function cache_article_images($content, $site_url, $owner_uid, $article_id) {
$status_filename = $article_id . "-" . sha1($site_url) . ".status";
Debug::log("status: $status_filename", Debug::$LOG_EXTENDED);
/* housekeeping might run as a separate user, in this case status/media might not be writable */
if (!$this->cache->isWritable($status_filename)) {
Debug::log("status not writable: $status_filename", Debug::$LOG_VERBOSE);
return false;
}
if (file_exists($status_filename))
$status = json_decode(file_get_contents($status_filename), true);
Debug::log("status: $status_filename", Debug::$LOG_VERBOSE);
if ($this->cache->exists($status_filename))
$status = json_decode($this->cache->get($status_filename), true);
else
$status = [];
@ -180,47 +181,48 @@ class Cache_Starred_Images extends Plugin implements IHandler {
// only allow several download attempts for article
if ($status["attempt"] > $this->max_cache_attempts) {
Debug::log("too many attempts for $site_url", Debug::$LOG_VERBOSE);
return;
return false;
}
if (!file_put_contents($status_filename, json_encode($status))) {
if (!$this->cache->put($status_filename, json_encode($status))) {
user_error("unable to write status file: $status_filename", E_USER_WARNING);
return;
return false;
}
$doc = new DOMDocument();
$doc->loadHTML('<?xml encoding="UTF-8">' . $content);
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])|(//video/source[@src])');
$success = false;
$has_images = false;
$success = false;
if ($doc->loadHTML('<?xml encoding="UTF-8">' . $content)) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])|(//video/source[@src])');
foreach ($entries as $entry) {
if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
$has_images = true;
$src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
$extension = $entry->tagName == 'source' ? '.mp4' : '.png';
$local_filename = $this->cache_dir . $article_id . "-" . sha1($src) . $extension;
Debug::log("cache_images: downloading: $src to $local_filename", Debug::$LOG_VERBOSE);
if (!file_exists($local_filename)) {
$file_content = fetch_file_contents(["url" => $src, "max_size" => MAX_CACHE_FILE_SIZE]);
if ($file_content) {
if (strlen($file_content) > MIN_CACHE_FILE_SIZE) {
file_put_contents($local_filename, $file_content);
}
if ($this->cache_url($article_id, $src)) {
$success = true;
}
} else {
}
}
}
$esth = $this->pdo->prepare("SELECT content_url FROM ttrss_enclosures WHERE post_id = ? AND
(content_type LIKE '%image%' OR content_type LIKE '%video%')");
if ($esth->execute([$article_id])) {
while ($enc = $esth->fetch()) {
$has_images = true;
$url = rewrite_relative_url($site_url, $enc["content_url"]);
if ($this->cache_url($article_id, $url)) {
$success = true;
}
}