ttrss/plugins/cache_starred_images/init.php

243 lines
6.8 KiB
PHP
Raw Permalink Normal View History

<?php
class Cache_Starred_Images extends Plugin {
/** @var PluginHost $host */
private $host;
/** @var DiskCache $cache */
private $cache;
/** @var DiskCache $cache_status */
private $cache_status;
/** @var int $max_cache_attempts (per article) */
private $max_cache_attempts = 5;
function about() {
return array(null,
"Automatically cache media files in Starred articles",
"fox");
}
function init($host) {
$this->host = $host;
$this->cache = DiskCache::instance("starred-images");
$this->cache_status = DiskCache::instance("starred-images.status-files");
2021-02-15 13:11:30 +00:00
if ($this->cache->make_dir())
chmod($this->cache->get_dir(), 0777);
if ($this->cache_status->make_dir())
chmod($this->cache_status->get_dir(), 0777);
if (!$this->cache->exists(".no-auto-expiry"))
2022-11-24 05:16:56 +00:00
$this->cache->put(".no-auto-expiry", "");
if (!$this->cache_status->exists(".no-auto-expiry"))
2022-11-24 05:16:56 +00:00
$this->cache_status->put(".no-auto-expiry", "");
if ($this->cache->is_writable() && $this->cache_status->is_writable()) {
$host->add_hook($host::HOOK_HOUSE_KEEPING, $this);
$host->add_hook($host::HOOK_ENCLOSURE_ENTRY, $this);
$host->add_hook($host::HOOK_SANITIZE, $this);
} else {
user_error("Starred cache directory ".$this->cache->get_dir()." (or status cache subdir in status-files/) is not writable.", E_USER_WARNING);
}
}
/** since HOOK_UPDATE_TASK is not available to user plugins, this hook is a next best thing */
function hook_house_keeping() {
Debug::log("caching media of starred articles for user " . $this->host->get_owner_uid() . "...");
$sth = $this->pdo->prepare("SELECT content, ttrss_entries.title,
2021-02-17 13:17:05 +00:00
ttrss_user_entries.owner_uid, link, site_url, ttrss_entries.id, plugin_data
FROM ttrss_entries, ttrss_user_entries LEFT JOIN ttrss_feeds ON
(ttrss_user_entries.feed_id = ttrss_feeds.id)
WHERE ref_id = ttrss_entries.id AND
marked = true AND
site_url != '' AND
2021-02-17 13:17:05 +00:00
ttrss_user_entries.owner_uid = ? AND
plugin_data NOT LIKE '%starred_cache_images%'
ORDER BY ".Db::sql_random_function()." LIMIT 100");
if ($sth->execute([$this->host->get_owner_uid()])) {
$usth = $this->pdo->prepare("UPDATE ttrss_entries SET plugin_data = ? WHERE id = ?");
while ($line = $sth->fetch()) {
Debug::log("processing article " . $line["title"], Debug::LOG_VERBOSE);
if ($line["site_url"]) {
$success = $this->cache_article_images($line["content"], $line["site_url"], $line["owner_uid"], $line["id"]);
if ($success) {
2021-02-17 13:17:05 +00:00
$plugin_data = "starred_cache_images," . $line["owner_uid"] . ":" . $line["plugin_data"];
$usth->execute([$plugin_data, $line['id']]);
}
}
}
}
/* actual housekeeping */
Debug::log("expiring {$this->cache->get_dir()} and {$this->cache_status->get_dir()}...");
$files = [
...(glob($this->cache->get_dir() . "/*-*") ?: []),
...(glob($this->cache_status->get_dir() . "/*.status") ?: []),
];
asort($files);
$last_article_id = 0;
$article_exists = 1;
foreach ($files as $file) {
list ($article_id, $hash) = explode("-", basename($file));
if ($article_id != $last_article_id) {
$last_article_id = $article_id;
2017-12-03 08:03:09 +00:00
$sth = $this->pdo->prepare("SELECT id FROM ttrss_entries WHERE id = ?");
$sth->execute([$article_id]);
2017-12-03 08:03:09 +00:00
$article_exists = $sth->fetch();
}
if (!$article_exists) {
unlink($file);
}
}
}
function hook_enclosure_entry($enc, $article_id, $rv) {
$local_filename = $article_id . "-" . sha1($enc["content_url"]);
if ($this->cache->exists($local_filename)) {
2021-02-15 13:11:30 +00:00
$enc["content_url"] = $this->cache->get_url($local_filename);
}
return $enc;
}
function hook_sanitize($doc, $site_url, $allowed_elements, $disallowed_attributes, $article_id) {
2021-02-17 13:17:05 +00:00
$xpath = new DOMXPath($doc);
if ($article_id) {
$entries = $xpath->query('(//img[@src])|(//video/source[@src])');
foreach ($entries as $entry) {
if ($entry->hasAttribute('src')) {
$src = UrlHelper::rewrite_relative($site_url, $entry->getAttribute('src'));
$local_filename = $article_id . "-" . sha1($src);
if ($this->cache->exists($local_filename)) {
2021-02-15 13:11:30 +00:00
$entry->setAttribute("src", $this->cache->get_url($local_filename));
$entry->removeAttribute("srcset");
}
}
}
}
return $doc;
}
private function cache_url(int $article_id, string $url) : bool {
$local_filename = $article_id . "-" . sha1($url);
if (!$this->cache->exists($local_filename)) {
Debug::log("cache_images: downloading: $url to $local_filename", Debug::LOG_VERBOSE);
2017-12-03 08:03:09 +00:00
2021-02-22 19:35:27 +00:00
$data = UrlHelper::fetch(["url" => $url, "max_size" => Config::get(Config::MAX_CACHE_FILE_SIZE)]);
2019-08-14 09:44:50 +00:00
if ($data)
return $this->cache->put($local_filename, $data);;
} else {
//Debug::log("cache_images: local file exists for $url", Debug::$LOG_VERBOSE);
return true;
}
return false;
}
private function cache_article_images(string $content, string $site_url, int $owner_uid, int $article_id) : bool {
$status_filename = $article_id . "-" . sha1($site_url) . ".status";
/* housekeeping might run as a separate user, in this case status/media might not be writable */
if (!$this->cache_status->is_writable($status_filename)) {
Debug::log("status not writable: $status_filename", Debug::LOG_VERBOSE);
return false;
}
Debug::log("status: $status_filename", Debug::LOG_VERBOSE);
if ($this->cache_status->exists($status_filename))
$status = json_decode($this->cache_status->get($status_filename), true);
2021-02-17 13:17:05 +00:00
else
$status = ["attempt" => 0];
2021-02-17 13:17:05 +00:00
$status["attempt"] += 1;
2021-02-17 13:17:05 +00:00
// only allow several download attempts for article
if ($status["attempt"] > $this->max_cache_attempts) {
Debug::log("too many attempts for $site_url", Debug::LOG_VERBOSE);
2021-02-17 13:17:05 +00:00
return false;
}
if (!$this->cache_status->put($status_filename, json_encode($status))) {
2021-02-17 13:17:05 +00:00
user_error("unable to write status file: $status_filename", E_USER_WARNING);
return false;
}
$doc = new DOMDocument();
$has_images = false;
$success = false;
2021-02-17 13:17:05 +00:00
if (@$doc->loadHTML('<?xml encoding="UTF-8">' . $content)) {
$xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])|(//video/source[@src])');
foreach ($entries as $entry) {
if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
$has_images = true;
$src = UrlHelper::rewrite_relative($site_url, $entry->getAttribute('src'));
if ($this->cache_url($article_id, $src)) {
$success = true;
}
}
}
}
$esth = $this->pdo->prepare("SELECT content_url FROM ttrss_enclosures WHERE post_id = ? AND
(content_type LIKE '%image%' OR content_type LIKE '%video%')");
2021-02-17 13:17:05 +00:00
if ($esth->execute([$article_id])) {
while ($enc = $esth->fetch()) {
2021-02-17 13:17:05 +00:00
$has_images = true;
$url = UrlHelper::rewrite_relative($site_url, $enc["content_url"]);
if ($this->cache_url($article_id, $url)) {
$success = true;
}
}
}
return $success || !$has_images;
}
function api_version() {
return 2;
}
}