af_comics: split contents of subscribe/basic_info/fetch hooks into appropriate per-comic filters
This commit is contained in:
parent
ba7f7e72db
commit
96fa6e3002
|
@ -2,4 +2,16 @@
|
||||||
abstract class Af_ComicFilter {
|
abstract class Af_ComicFilter {
|
||||||
public abstract function supported();
|
public abstract function supported();
|
||||||
public abstract function process(&$article);
|
public abstract function process(&$article);
|
||||||
}
|
|
||||||
|
public function on_subscribe($url) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function on_basic_info($url) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function on_fetch($url) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
<?php
|
||||||
|
class Af_Comics_Gocomics extends Af_ComicFilter {
|
||||||
|
|
||||||
|
function supported() {
|
||||||
|
return ["GoComics (see note below)"];
|
||||||
|
}
|
||||||
|
|
||||||
|
function process(&$article) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function on_subscribe($url) {
|
||||||
|
if (preg_match('#^https?://www\.gocomics\.com/([-a-z0-9]+)$#i', $url))
|
||||||
|
return '<?xml version="1.0" encoding="utf-8"?>'; // Get is_html() to return false.
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function on_basic_info($url) {
|
||||||
|
if (preg_match('#^https?://www\.gocomics\.com/([-a-z0-9]+)$#i', $url, $matches))
|
||||||
|
return ['title' => ucfirst($matches[1]), 'site_url' => $matches[0]];
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function on_fetch($url) {
|
||||||
|
if (preg_match('#^https?://(?:feeds\.feedburner\.com/uclick|www\.gocomics\.com)/([-a-z0-9]+)$#i', $url, $comic)) {
|
||||||
|
$site_url = 'https://www.gocomics.com/' . $comic[1];
|
||||||
|
|
||||||
|
$article_link = $site_url . date('/Y/m/d');
|
||||||
|
|
||||||
|
$body = fetch_file_contents(array('url' => $article_link, 'type' => 'text/html', 'followlocation' => false));
|
||||||
|
|
||||||
|
require_once 'lib/MiniTemplator.class.php';
|
||||||
|
|
||||||
|
$feed_title = htmlspecialchars($comic[1]);
|
||||||
|
$site_url = htmlspecialchars($site_url);
|
||||||
|
$article_link = htmlspecialchars($article_link);
|
||||||
|
|
||||||
|
$tpl = new MiniTemplator();
|
||||||
|
|
||||||
|
$tpl->readTemplateFromFile('templates/generated_feed.txt');
|
||||||
|
|
||||||
|
$tpl->setVariable('FEED_TITLE', $feed_title, true);
|
||||||
|
$tpl->setVariable('VERSION', get_version(), true);
|
||||||
|
$tpl->setVariable('FEED_URL', htmlspecialchars($url), true);
|
||||||
|
$tpl->setVariable('SELF_URL', $site_url, true);
|
||||||
|
|
||||||
|
if ($body) {
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
|
||||||
|
if (@$doc->loadHTML($body)) {
|
||||||
|
$xpath = new DOMXPath($doc);
|
||||||
|
|
||||||
|
$node = $xpath->query('//picture[contains(@class, "item-comic-image")]/img')->item(0);
|
||||||
|
|
||||||
|
if ($node) {
|
||||||
|
$title = $xpath->query('//h1')->item(0);
|
||||||
|
|
||||||
|
if ($title) {
|
||||||
|
$title = clean(trim($title->nodeValue));
|
||||||
|
} else {
|
||||||
|
$title = date('l, F d, Y');
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (['srcset', 'sizes', 'data-srcset', 'width'] as $attr ) {
|
||||||
|
$node->removeAttribute($attr);
|
||||||
|
}
|
||||||
|
|
||||||
|
$tpl->setVariable('ARTICLE_ID', $article_link, true);
|
||||||
|
$tpl->setVariable('ARTICLE_LINK', $article_link, true);
|
||||||
|
$tpl->setVariable('ARTICLE_UPDATED_ATOM', date('c', mktime(11, 0, 0)), true);
|
||||||
|
$tpl->setVariable('ARTICLE_TITLE', htmlspecialchars($title), true);
|
||||||
|
$tpl->setVariable('ARTICLE_EXCERPT', '', true);
|
||||||
|
$tpl->setVariable('ARTICLE_CONTENT', $doc->saveHTML($node), true);
|
||||||
|
|
||||||
|
$tpl->setVariable('ARTICLE_AUTHOR', '', true);
|
||||||
|
$tpl->setVariable('ARTICLE_SOURCE_LINK', $site_url, true);
|
||||||
|
$tpl->setVariable('ARTICLE_SOURCE_TITLE', $feed_title, true);
|
||||||
|
|
||||||
|
$tpl->addBlock('entry');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$tpl->addBlock('feed');
|
||||||
|
|
||||||
|
if ($tpl->generateOutputToString($tmp_data))
|
||||||
|
return $tmp_data;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,95 @@
|
||||||
|
<?php
|
||||||
|
class Af_Comics_Gocomics_FarSide extends Af_ComicFilter {
|
||||||
|
|
||||||
|
function supported() {
|
||||||
|
return ["The Far Side (needs cache media)"];
|
||||||
|
}
|
||||||
|
|
||||||
|
function process(&$article) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function on_subscribe($url) {
|
||||||
|
if (preg_match("#^https?://www\.thefarside\.com#", $url))
|
||||||
|
return '<?xml version="1.0" encoding="utf-8"?>'; // Get is_html() to return false.
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function on_basic_info($url) {
|
||||||
|
if (preg_match("#^https?://www.thefarside.com/#", $url))
|
||||||
|
return ['title' => "The Far Side", 'site_url' => 'https://www.thefarside.com'];
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function on_fetch($url) {
|
||||||
|
if (preg_match("#^https?://www\.thefarside\.com#", $url)) {
|
||||||
|
require_once 'lib/MiniTemplator.class.php';
|
||||||
|
|
||||||
|
$article_link = htmlspecialchars("https://www.thefarside.com" . date('/Y/m/d'));
|
||||||
|
|
||||||
|
$tpl = new MiniTemplator();
|
||||||
|
|
||||||
|
$tpl->readTemplateFromFile('templates/generated_feed.txt');
|
||||||
|
|
||||||
|
$tpl->setVariable('FEED_TITLE', "The Far Side", true);
|
||||||
|
$tpl->setVariable('VERSION', get_version(), true);
|
||||||
|
$tpl->setVariable('FEED_URL', htmlspecialchars($url), true);
|
||||||
|
$tpl->setVariable('SELF_URL', htmlspecialchars($url), true);
|
||||||
|
|
||||||
|
$body = fetch_file_contents(['url' => $article_link, 'type' => 'text/html', 'followlocation' => false]);
|
||||||
|
|
||||||
|
if ($body) {
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
|
||||||
|
if (@$doc->loadHTML($body)) {
|
||||||
|
$xpath = new DOMXPath($doc);
|
||||||
|
|
||||||
|
$content_node = $xpath->query('//*[contains(@class,"js-daily-dose")]')->item(0);
|
||||||
|
|
||||||
|
if ($content_node) {
|
||||||
|
$imgs = $xpath->query('//img[@data-src]', $content_node);
|
||||||
|
|
||||||
|
foreach ($imgs as $img) {
|
||||||
|
$img->setAttribute('src', $img->getAttribute('data-src'));
|
||||||
|
}
|
||||||
|
|
||||||
|
$junk_elems = $xpath->query("//*[@data-shareable-popover]");
|
||||||
|
|
||||||
|
foreach ($junk_elems as $junk)
|
||||||
|
$junk->parentNode->removeChild($junk);
|
||||||
|
|
||||||
|
$title = $xpath->query('//h3')->item(0);
|
||||||
|
|
||||||
|
if ($title) {
|
||||||
|
$title = clean(trim($title->nodeValue));
|
||||||
|
} else {
|
||||||
|
$title = date('l, F d, Y');
|
||||||
|
}
|
||||||
|
|
||||||
|
$tpl->setVariable('ARTICLE_ID', htmlspecialchars($article_link), true);
|
||||||
|
$tpl->setVariable('ARTICLE_LINK', htmlspecialchars($article_link), true);
|
||||||
|
$tpl->setVariable('ARTICLE_UPDATED_ATOM', date('c', mktime(11, 0, 0)), true);
|
||||||
|
$tpl->setVariable('ARTICLE_TITLE', htmlspecialchars($title), true);
|
||||||
|
$tpl->setVariable('ARTICLE_EXCERPT', '', true);
|
||||||
|
$tpl->setVariable('ARTICLE_CONTENT', "<p> " . $doc->saveHTML($content_node) . "</p>", true);
|
||||||
|
|
||||||
|
$tpl->setVariable('ARTICLE_AUTHOR', '', true);
|
||||||
|
$tpl->setVariable('ARTICLE_SOURCE_LINK', htmlspecialchars($article_link), true);
|
||||||
|
$tpl->setVariable('ARTICLE_SOURCE_TITLE', "The Far Side", true);
|
||||||
|
|
||||||
|
$tpl->addBlock('entry');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$tpl->addBlock('feed');
|
||||||
|
|
||||||
|
if ($tpl->generateOutputToString($tmp_data))
|
||||||
|
return $tmp_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -52,7 +52,7 @@ class Af_Comics extends Plugin {
|
||||||
|
|
||||||
print "<p>" . __("The following comics are currently supported:") . "</p>";
|
print "<p>" . __("The following comics are currently supported:") . "</p>";
|
||||||
|
|
||||||
$comics = ["GoComics", "The Far Side (needs cache media)"];
|
$comics = [];
|
||||||
|
|
||||||
foreach ($this->filters as $f) {
|
foreach ($this->filters as $f) {
|
||||||
foreach ($f->supported() as $comic) {
|
foreach ($f->supported() as $comic) {
|
||||||
|
@ -68,9 +68,9 @@ class Af_Comics extends Plugin {
|
||||||
}
|
}
|
||||||
print "</ul>";
|
print "</ul>";
|
||||||
|
|
||||||
print "<p>".__("To subscribe to GoComics use the comic's regular web page as the feed URL (e.g. for the <em>Garfield</em> comic use <code>http://www.gocomics.com/garfield</code>).")."</p>";
|
print_notice("To subscribe to GoComics use the comic's regular web page as the feed URL (e.g. for the <em>Garfield</em> comic use <code>http://www.gocomics.com/garfield</code>).");
|
||||||
|
|
||||||
print "<p>".__('Drop any updated filters into <code>filters.local</code> in plugin directory.')."</p>";
|
print_notice('Drop any updated filters into <code>filters.local</code> in plugin directory.');
|
||||||
|
|
||||||
print "</div>";
|
print "</div>";
|
||||||
}
|
}
|
||||||
|
@ -84,166 +84,35 @@ class Af_Comics extends Plugin {
|
||||||
return $article;
|
return $article;
|
||||||
}
|
}
|
||||||
|
|
||||||
// GoComics dropped feed support so it needs to be handled when fetching the feed.
|
|
||||||
// TODO: this should be split into individual methods provided by filters
|
|
||||||
function hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, $last_article_timestamp, $auth_login, $auth_pass) {
|
function hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, $last_article_timestamp, $auth_login, $auth_pass) {
|
||||||
if ($auth_login || $auth_pass)
|
foreach ($this->filters as $f) {
|
||||||
return $feed_data;
|
$res = $f->on_fetch($fetch_url);
|
||||||
|
|
||||||
if (preg_match('#^https?://(?:feeds\.feedburner\.com/uclick|www\.gocomics\.com)/([-a-z0-9]+)$#i', $fetch_url, $comic)) {
|
|
||||||
$site_url = 'https://www.gocomics.com/' . $comic[1];
|
|
||||||
|
|
||||||
$article_link = $site_url . date('/Y/m/d');
|
|
||||||
|
|
||||||
$body = fetch_file_contents(array('url' => $article_link, 'type' => 'text/html', 'followlocation' => false));
|
|
||||||
|
|
||||||
require_once 'lib/MiniTemplator.class.php';
|
|
||||||
|
|
||||||
$feed_title = htmlspecialchars($comic[1]);
|
|
||||||
$site_url = htmlspecialchars($site_url);
|
|
||||||
$article_link = htmlspecialchars($article_link);
|
|
||||||
|
|
||||||
$tpl = new MiniTemplator();
|
|
||||||
|
|
||||||
$tpl->readTemplateFromFile('templates/generated_feed.txt');
|
|
||||||
|
|
||||||
$tpl->setVariable('FEED_TITLE', $feed_title, true);
|
|
||||||
$tpl->setVariable('VERSION', get_version(), true);
|
|
||||||
$tpl->setVariable('FEED_URL', htmlspecialchars($fetch_url), true);
|
|
||||||
$tpl->setVariable('SELF_URL', $site_url, true);
|
|
||||||
|
|
||||||
if ($body) {
|
|
||||||
$doc = new DOMDocument();
|
|
||||||
|
|
||||||
if (@$doc->loadHTML($body)) {
|
|
||||||
$xpath = new DOMXPath($doc);
|
|
||||||
|
|
||||||
$node = $xpath->query('//picture[contains(@class, "item-comic-image")]/img')->item(0);
|
|
||||||
|
|
||||||
if ($node) {
|
|
||||||
$title = $xpath->query('//h1')->item(0);
|
|
||||||
|
|
||||||
if ($title) {
|
|
||||||
$title = clean(trim($title->nodeValue));
|
|
||||||
} else {
|
|
||||||
$title = date('l, F d, Y');
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach (['srcset', 'sizes', 'data-srcset', 'width'] as $attr ) {
|
|
||||||
$node->removeAttribute($attr);
|
|
||||||
}
|
|
||||||
|
|
||||||
$tpl->setVariable('ARTICLE_ID', $article_link, true);
|
|
||||||
$tpl->setVariable('ARTICLE_LINK', $article_link, true);
|
|
||||||
$tpl->setVariable('ARTICLE_UPDATED_ATOM', date('c', mktime(11, 0, 0)), true);
|
|
||||||
$tpl->setVariable('ARTICLE_TITLE', htmlspecialchars($title), true);
|
|
||||||
$tpl->setVariable('ARTICLE_EXCERPT', '', true);
|
|
||||||
$tpl->setVariable('ARTICLE_CONTENT', $doc->saveHTML($node), true);
|
|
||||||
|
|
||||||
$tpl->setVariable('ARTICLE_AUTHOR', '', true);
|
|
||||||
$tpl->setVariable('ARTICLE_SOURCE_LINK', $site_url, true);
|
|
||||||
$tpl->setVariable('ARTICLE_SOURCE_TITLE', $feed_title, true);
|
|
||||||
|
|
||||||
$tpl->addBlock('entry');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$tpl->addBlock('feed');
|
|
||||||
|
|
||||||
if ($tpl->generateOutputToString($tmp_data))
|
|
||||||
$feed_data = $tmp_data;
|
|
||||||
|
|
||||||
} else if (preg_match("#^https?://www\.thefarside\.com#", $fetch_url)) {
|
|
||||||
require_once 'lib/MiniTemplator.class.php';
|
|
||||||
|
|
||||||
$article_link = htmlspecialchars("https://www.thefarside.com" . date('/Y/m/d'));
|
|
||||||
|
|
||||||
$tpl = new MiniTemplator();
|
|
||||||
|
|
||||||
$tpl->readTemplateFromFile('templates/generated_feed.txt');
|
|
||||||
|
|
||||||
$tpl->setVariable('FEED_TITLE', "The Far Side", true);
|
|
||||||
$tpl->setVariable('VERSION', get_version(), true);
|
|
||||||
$tpl->setVariable('FEED_URL', htmlspecialchars($fetch_url), true);
|
|
||||||
$tpl->setVariable('SELF_URL', htmlspecialchars($fetch_url), true);
|
|
||||||
|
|
||||||
$body = fetch_file_contents(['url' => $article_link, 'type' => 'text/html', 'followlocation' => false]);
|
|
||||||
|
|
||||||
if ($body) {
|
|
||||||
$doc = new DOMDocument();
|
|
||||||
|
|
||||||
if (@$doc->loadHTML($body)) {
|
|
||||||
$xpath = new DOMXPath($doc);
|
|
||||||
|
|
||||||
$content_node = $xpath->query('//*[contains(@class,"js-daily-dose")]')->item(0);
|
|
||||||
|
|
||||||
if ($content_node) {
|
|
||||||
$imgs = $xpath->query('//img[@data-src]', $content_node);
|
|
||||||
|
|
||||||
foreach ($imgs as $img) {
|
|
||||||
$img->setAttribute('src', $img->getAttribute('data-src'));
|
|
||||||
}
|
|
||||||
|
|
||||||
$junk_elems = $xpath->query("//*[@data-shareable-popover]");
|
|
||||||
|
|
||||||
foreach ($junk_elems as $junk)
|
|
||||||
$junk->parentNode->removeChild($junk);
|
|
||||||
|
|
||||||
$title = $xpath->query('//h3')->item(0);
|
|
||||||
|
|
||||||
if ($title) {
|
|
||||||
$title = clean(trim($title->nodeValue));
|
|
||||||
} else {
|
|
||||||
$title = date('l, F d, Y');
|
|
||||||
}
|
|
||||||
|
|
||||||
$tpl->setVariable('ARTICLE_ID', htmlspecialchars($article_link), true);
|
|
||||||
$tpl->setVariable('ARTICLE_LINK', htmlspecialchars($article_link), true);
|
|
||||||
$tpl->setVariable('ARTICLE_UPDATED_ATOM', date('c', mktime(11, 0, 0)), true);
|
|
||||||
$tpl->setVariable('ARTICLE_TITLE', htmlspecialchars($title), true);
|
|
||||||
$tpl->setVariable('ARTICLE_EXCERPT', '', true);
|
|
||||||
$tpl->setVariable('ARTICLE_CONTENT', "<p> " . $doc->saveHTML($content_node) . "</p>", true);
|
|
||||||
|
|
||||||
$tpl->setVariable('ARTICLE_AUTHOR', '', true);
|
|
||||||
$tpl->setVariable('ARTICLE_SOURCE_LINK', htmlspecialchars($article_link), true);
|
|
||||||
$tpl->setVariable('ARTICLE_SOURCE_TITLE', "The Far Side", true);
|
|
||||||
|
|
||||||
$tpl->addBlock('entry');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$tpl->addBlock('feed');
|
|
||||||
|
|
||||||
if ($tpl->generateOutputToString($tmp_data))
|
|
||||||
$feed_data = $tmp_data;
|
|
||||||
|
|
||||||
|
if ($res)
|
||||||
|
return $res;
|
||||||
}
|
}
|
||||||
|
|
||||||
return $feed_data;
|
return $feed_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hook_subscribe_feed($contents, $url, $auth_login, $auth_pass) {
|
function hook_subscribe_feed($contents, $url, $auth_login, $auth_pass) {
|
||||||
if ($auth_login || $auth_pass)
|
foreach ($this->filters as $f) {
|
||||||
return $contents;
|
$res = $f->on_subscribe($url);
|
||||||
|
|
||||||
if (preg_match('#^https?://www\.gocomics\.com/([-a-z0-9]+)$#i', $url) ||
|
if ($res)
|
||||||
preg_match("#^https?://www\.thefarside\.com#", $url))
|
return $res;
|
||||||
return '<?xml version="1.0" encoding="utf-8"?>'; // Get is_html() to return false.
|
}
|
||||||
|
|
||||||
return $contents;
|
return $contents;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass) {
|
function hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass) {
|
||||||
if ($auth_login || $auth_pass)
|
foreach ($this->filters as $f) {
|
||||||
return $basic_info;
|
$res = $f->on_basic_info($fetch_url);
|
||||||
|
|
||||||
if (preg_match('#^https?://www\.gocomics\.com/([-a-z0-9]+)$#i', $fetch_url, $matches))
|
if ($res)
|
||||||
$basic_info = ['title' => ucfirst($matches[1]), 'site_url' => $matches[0]];
|
return $res;
|
||||||
|
}
|
||||||
if (preg_match("#^https?://www.thefarside.com/#", $fetch_url))
|
|
||||||
$basic_info = ['title' => "The Far Side", 'site_url' => 'https://www.thefarside.com'];
|
|
||||||
|
|
||||||
return $basic_info;
|
return $basic_info;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue