From cc85704f3cc798e9df7813ab9e2de955d7152ada Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 26 Nov 2012 14:33:18 +0400 Subject: [PATCH] implement experimental article on-import data filters --- classes/filter.php | 14 ++++++++++ classes/filter/redditimgur.php | 47 ++++++++++++++++++++++++++++++++++ include/functions.php | 8 ++++++ include/rssfuncs.php | 44 +++++++++++++++++++++++++++++-- 4 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 classes/filter.php create mode 100644 classes/filter/redditimgur.php diff --git a/classes/filter.php b/classes/filter.php new file mode 100644 index 000000000..8d6bf6f26 --- /dev/null +++ b/classes/filter.php @@ -0,0 +1,14 @@ +link = $link; + } + + function filter_article($article) { + return $article; + } + +} +?> diff --git a/classes/filter/redditimgur.php b/classes/filter/redditimgur.php new file mode 100644 index 000000000..6b41015e7 --- /dev/null +++ b/classes/filter/redditimgur.php @@ -0,0 +1,47 @@ +loadHTML($article["content"]); + + if ($doc) { + $xpath = new DOMXPath($doc); + $entries = $xpath->query('(//a[@href]|//img[@src])'); + + foreach ($entries as $entry) { + if ($entry->hasAttribute("href")) { + if (preg_match("/i.imgur.com\/.*?.jpg/", $entry->getAttribute("href"))) { + + $img = $doc->createElement('img'); + $img->setAttribute("src", $entry->getAttribute("href")); + + $entry->parentNode->replaceChild($img, $entry); + } + } + + // remove tiny thumbnails + if ($entry->hasAttribute("src")) { + if ($entry->parentNode && $entry->parentNode->parentNode) { + $entry->parentNode->parentNode->removeChild($entry->parentNode); + } + } + } + + $node = $doc->getElementsByTagName('body')->item(0); + + if ($node) { + $article["content"] = $doc->saveXML($node, LIBXML_NOEMPTYTAG); + } + } + } + } + + return $article; + } +} +?> diff --git a/include/functions.php b/include/functions.php index 55333ccd6..1c3f4cbb9 100644 --- a/include/functions.php +++ b/include/functions.php @@ -7,11 +7,19 @@ function __autoload($class) { $class_file = str_replace("_", "/", strtolower(basename($class))); + $file = dirname(__FILE__)."/../plugins/$class_file.php"; + + if (file_exists($file)) { + require $file; + return; + } + $file = dirname(__FILE__)."/../classes/$class_file.php"; if (file_exists($file)) { require $file; } + } mb_internal_encoding("UTF-8"); diff --git a/include/rssfuncs.php b/include/rssfuncs.php index e12044ea5..be7e19edc 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -398,6 +398,23 @@ _debug("update_rss_feed: " . count($filters) . " filters loaded."); } + $filter_plugins = array(); + + if (defined('_ARTICLE_FILTER_PLUGINS')) { + foreach (explode(",", _ARTICLE_FILTER_PLUGINS) as $p) { + $pclass = "filter_" . trim($p); + + if (class_exists($pclass)) { + $plugin = new $pclass($link); + array_push($filter_plugins, $plugin); + } + } + } + + if ($debug_enabled) { + _debug("update_rss_feed: " . count($filter_plugins) . " filter plugins loaded."); + } + if ($use_simplepie) { $iterator = $rss->get_items(); } else { @@ -651,8 +668,6 @@ $entry_content = db_escape_string($entry_content, false); - $content_hash = "SHA1:" . sha1(strip_tags($entry_content)); - $entry_title = db_escape_string($entry_title); $entry_link = db_escape_string($entry_link); $entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250); @@ -757,6 +772,31 @@ _debug("update_rss_feed: done collecting data [TITLE:$entry_title]"); } + // TODO: less memory-hungry implementation + if (count($filter_plugins) > 0) { + if ($debug_enabled) { + _debug("update_rss_feed: applying plugin filters..."); + } + + $article = array("owner_uid" => $owner_uid, + "title" => $entry_title, + "content" => $entry_content, + "link" => $entry_link, + "tags" => $entry_tags, + "author" => $entry_author); + + foreach ($filter_plugins as $plugin) { + $article = $plugin->filter_article($article); + } + + $entry_title = $article["title"]; + $entry_content = $article["content"]; + $entry_tags = $article["tags"]; + $entry_author = $article["author"]; + } + + $content_hash = "SHA1:" . sha1(strip_tags($entry_content)); + db_query($link, "BEGIN"); if (db_num_rows($result) == 0) {