add af_* plugin workaround for already processed article

This commit is contained in:
Andrew Dolgov 2013-02-23 12:07:46 +04:00
parent 389859b5d4
commit e02555c1b8
6 changed files with 128 additions and 108 deletions

View File

@ -516,14 +516,20 @@
_debug("update_rss_feed: applying plugin filters.."); _debug("update_rss_feed: applying plugin filters..");
} }
// Todo unify with id checking below // FIXME not sure if owner_uid is a good idea here, we may have a base entry without user entry (?)
$result = db_query($link, "SELECT plugin_data FROM ttrss_entries $result = db_query($link, "SELECT plugin_data,title,content,link,tag_cache,author FROM ttrss_entries, ttrss_user_entries
WHERE guid = '".db_escape_string($entry_guid)."'"); WHERE ref_id = id AND guid = '".db_escape_string($entry_guid)."' AND owner_uid = $owner_uid");
if (db_num_rows($result) != 0) { if (db_num_rows($result) != 0) {
$entry_plugin_data = db_fetch_result($result, 0, "plugin_data"); $entry_plugin_data = db_fetch_result($result, 0, "plugin_data");
$stored_article = array("title" => db_fetch_result($result, 0, "title"),
"content" => db_fetch_result($result, 0, "content"),
"link" => db_fetch_result($result, 0, "link"),
"tags" => explode(",", db_fetch_result($result, 0, "tag_cache")),
"author" => db_fetch_result($result, 0, "author"));
} else { } else {
$entry_plugin_data = ""; $entry_plugin_data = "";
$stored_article = array();
} }
$article = array("owner_uid" => $owner_uid, // read only $article = array("owner_uid" => $owner_uid, // read only
@ -533,7 +539,8 @@
"link" => $entry_link, "link" => $entry_link,
"tags" => $entry_tags, "tags" => $entry_tags,
"plugin_data" => $entry_plugin_data, "plugin_data" => $entry_plugin_data,
"author" => $entry_author); "author" => $entry_author,
"stored" => $stored_article);
foreach ($pluginhost->get_hooks($pluginhost::HOOK_ARTICLE_FILTER) as $plugin) { foreach ($pluginhost->get_hooks($pluginhost::HOOK_ARTICLE_FILTER) as $plugin) {
$article = $plugin->hook_article_filter($article); $article = $plugin->hook_article_filter($article);

View File

@ -20,35 +20,37 @@ class Af_Buttersafe extends Plugin {
function hook_article_filter($article) { function hook_article_filter($article) {
$owner_uid = $article["owner_uid"]; $owner_uid = $article["owner_uid"];
if (strpos($article["guid"], "buttersafe.com") !== FALSE && if (strpos($article["guid"], "buttersafe.com") !== FALSE) {
strpos($article["plugin_data"], "buttersafe,$owner_uid:") === FALSE) { if (strpos($article["plugin_data"], "buttersafe,$owner_uid:") === FALSE) {
$doc = new DOMDocument(); $doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"])); @$doc->loadHTML(fetch_file_contents($article["link"]));
$basenode = false; $basenode = false;
if ($doc) { if ($doc) {
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); $entries = $xpath->query('(//img[@src])');
$matches = array(); $matches = array();
foreach ($entries as $entry) { foreach ($entries as $entry) {
if (preg_match("/(http:\/\/buttersafe.com\/comics\/\d{4}.*)/i", $entry->getAttribute("src"), $matches)) { if (preg_match("/(http:\/\/buttersafe.com\/comics\/\d{4}.*)/i", $entry->getAttribute("src"), $matches)) {
$basenode = $entry; $basenode = $entry;
break; break;
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
$article["plugin_data"] = "buttersafe,$owner_uid:" . $article["plugin_data"];
} }
} }
} else if (isset($article["stored"]["content"])) {
if ($basenode) { $article["content"] = $article["stored"]["content"];
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
}
} }
$article["plugin_data"] = "buttersafe,$owner_uid:" . $article["plugin_data"];
} }
return $article; return $article;

View File

@ -20,35 +20,37 @@ class Af_Explosm extends Plugin {
function hook_article_filter($article) { function hook_article_filter($article) {
$owner_uid = $article["owner_uid"]; $owner_uid = $article["owner_uid"];
if (strpos($article["link"], "explosm.net/comics") !== FALSE && if (strpos($article["link"], "explosm.net/comics") !== FALSE) {
strpos($article["plugin_data"], "explosm,$owner_uid:") === FALSE) { if (strpos($article["plugin_data"], "explosm,$owner_uid:") === FALSE) {
$doc = new DOMDocument(); $doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"])); @$doc->loadHTML(fetch_file_contents($article["link"]));
$basenode = false; $basenode = false;
if ($doc) { if ($doc) {
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess... $entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
$matches = array(); $matches = array();
foreach ($entries as $entry) { foreach ($entries as $entry) {
if (preg_match("/(http:\/\/.*\/db\/files\/Comics\/.*)/i", $entry->getAttribute("src"), $matches)) { if (preg_match("/(http:\/\/.*\/db\/files\/Comics\/.*)/i", $entry->getAttribute("src"), $matches)) {
$basenode = $entry; $basenode = $entry;
break; break;
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
$article["plugin_data"] = "explosm,$owner_uid:" . $article["plugin_data"];
} }
} }
} else if (isset($article["stored"]["content"])) {
if ($basenode) { $article["content"] = $article["stored"]["content"];
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
}
} }
$article["plugin_data"] = "explosm,$owner_uid:" . $article["plugin_data"];
} }
return $article; return $article;

View File

@ -20,34 +20,37 @@ class Af_GoComics extends Plugin {
function hook_article_filter($article) { function hook_article_filter($article) {
$owner_uid = $article["owner_uid"]; $owner_uid = $article["owner_uid"];
if (strpos($article["guid"], "gocomics.com") !== FALSE && strpos($article["plugin_data"], "gocomics,$owner_uid:") === FALSE) { if (strpos($article["guid"], "gocomics.com") !== FALSE) {
$doc = new DOMDocument(); if (strpos($article["plugin_data"], "gocomics,$owner_uid:") === FALSE) {
@$doc->loadHTML(fetch_file_contents($article["link"])); $doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"]));
$basenode = false; $basenode = false;
if ($doc) { if ($doc) {
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess... $entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
$matches = array(); $matches = array();
foreach ($entries as $entry) { foreach ($entries as $entry) {
if (preg_match("/(http:\/\/assets.amuniversal.com\/.*)/i", $entry->getAttribute("src"), $matches)) { if (preg_match("/(http:\/\/assets.amuniversal.com\/.*)/i", $entry->getAttribute("src"), $matches)) {
$entry->setAttribute("src", $matches[0]); $entry->setAttribute("src", $matches[0]);
$basenode = $entry; $basenode = $entry;
break; break;
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
$article["plugin_data"] = "gocomics,$owner_uid:" . $article["plugin_data"];
} }
} }
} else if (isset($article["stored"]["content"])) {
if ($basenode) { $article["content"] = $article["stored"]["content"];
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
}
} }
$article["plugin_data"] = "gocomics,$owner_uid:" . $article["plugin_data"];
} }
return $article; return $article;

View File

@ -20,35 +20,37 @@ class Af_PennyArcade extends Plugin {
function hook_article_filter($article) { function hook_article_filter($article) {
$owner_uid = $article["owner_uid"]; $owner_uid = $article["owner_uid"];
if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE && if (strpos($article["link"], "penny-arcade.com") !== FALSE && strpos($article["title"], "Comic:") !== FALSE) {
strpos($article["plugin_data"], "pennyarcade,$owner_uid:") === FALSE) { if (strpos($article["plugin_data"], "pennyarcade,$owner_uid:") === FALSE) {
$doc = new DOMDocument(); $doc = new DOMDocument();
@$doc->loadHTML(fetch_file_contents($article["link"])); @$doc->loadHTML(fetch_file_contents($article["link"]));
$basenode = false; $basenode = false;
if ($doc) { if ($doc) {
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
$entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess... $entries = $xpath->query('(//img[@src])'); // we might also check for img[@class='strip'] I guess...
$matches = array(); $matches = array();
foreach ($entries as $entry) { foreach ($entries as $entry) {
if (preg_match("/(http:\/\/art.penny-arcade.com\/.*)/i", $entry->getAttribute("src"), $matches)) { if (preg_match("/(http:\/\/art.penny-arcade.com\/.*)/i", $entry->getAttribute("src"), $matches)) {
$basenode = $entry; $basenode = $entry;
break; break;
}
}
if ($basenode) {
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
$article["plugin_data"] = "pennyarcade,$owner_uid:" . $article["plugin_data"];
} }
} }
} else if (isset($article["stored"]["content"])) {
if ($basenode) { $article["content"] = $article["stored"]["content"];
$article["content"] = $doc->saveXML($basenode, LIBXML_NOEMPTYTAG);
}
} }
$article["plugin_data"] = "pennyarcade,$owner_uid:" . $article["plugin_data"];
} }
return $article; return $article;

View File

@ -25,46 +25,50 @@ class Af_Unburn extends Plugin {
if ((strpos($article["link"], "feedproxy.google.com") !== FALSE || if ((strpos($article["link"], "feedproxy.google.com") !== FALSE ||
strpos($article["link"], "/~r/") !== FALSE || strpos($article["link"], "/~r/") !== FALSE ||
strpos($article["link"], "feedsportal.com") !== FALSE) && strpos($article["link"], "feedsportal.com") !== FALSE)) {
strpos($article["plugin_data"], "unburn,$owner_uid:") === FALSE) {
$ch = curl_init($article["link"]); if (strpos($article["plugin_data"], "unburn,$owner_uid:") === FALSE) {
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
$contents = @curl_exec($ch); $ch = curl_init($article["link"]);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
$real_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); $contents = @curl_exec($ch);
curl_close($ch); $real_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
if ($real_url) { curl_close($ch);
/* remove the rest of it */
$query = parse_url($real_url, PHP_URL_QUERY); if ($real_url) {
/* remove the rest of it */
if ($query && strpos($query, "utm_source") !== FALSE) { $query = parse_url($real_url, PHP_URL_QUERY);
$args = array();
parse_str($query, $args);
foreach (array("utm_source", "utm_medium", "utm_campaign") as $param) { if ($query && strpos($query, "utm_source") !== FALSE) {
if (isset($args[$param])) unset($args[$param]); $args = array();
parse_str($query, $args);
foreach (array("utm_source", "utm_medium", "utm_campaign") as $param) {
if (isset($args[$param])) unset($args[$param]);
}
$new_query = http_build_query($args);
if ($new_query != $query) {
$real_url = str_replace("?$query", "?$new_query", $real_url);
}
} }
$new_query = http_build_query($args); $real_url = preg_replace("/\?$/", "", $real_url);
if ($new_query != $query) { $article["plugin_data"] = "unburn,$owner_uid:" . $article["plugin_data"];
$real_url = str_replace("?$query", "?$new_query", $real_url); $article["link"] = $real_url;
}
} }
} else if (isset($article["stored"]["link"])) {
$real_url = preg_replace("/\?$/", "", $real_url); $article["link"] = $article["stored"]["link"];
$article["plugin_data"] = "unburn,$owner_uid:" . $article["plugin_data"];
$article["link"] = $real_url;
} }
} }