af_redditimgur: add crude dupe checking based on content links
This commit is contained in:
parent
2caeb0f875
commit
4f5204dd45
|
@ -23,6 +23,9 @@ class Af_RedditImgur extends Plugin {
|
|||
$enable_readability = $this->host->get($this, "enable_readability");
|
||||
$enable_readability_checked = $enable_readability ? "checked" : "";
|
||||
|
||||
$enable_dupecheck = $this->host->get($this, "enable_dupecheck");
|
||||
$enable_dupecheck_checked = $enable_dupecheck ? "checked" : "";
|
||||
|
||||
print "<form dojoType=\"dijit.form.Form\">";
|
||||
|
||||
print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\">
|
||||
|
@ -53,6 +56,13 @@ class Af_RedditImgur extends Plugin {
|
|||
|
||||
print "<label for=\"enable_readability\">" . __("Extract missing content using Readability") . "</label>";
|
||||
|
||||
print "<br/>";
|
||||
|
||||
print "<input dojoType=\"dijit.form.CheckBox\" id=\"enable_dupecheck\"
|
||||
$enable_dupecheck_checked name=\"enable_dupecheck\"> ";
|
||||
|
||||
print "<label for=\"enable_dupecheck\">" . __("Mark duplicates as read using content links") . "</label>";
|
||||
|
||||
print "<p><button dojoType=\"dijit.form.Button\" type=\"submit\">".
|
||||
__("Save")."</button>";
|
||||
|
||||
|
@ -63,8 +73,10 @@ class Af_RedditImgur extends Plugin {
|
|||
|
||||
function save() {
|
||||
$enable_readability = checkbox_to_sql_bool($_POST["enable_readability"]) == "true";
|
||||
|
||||
$enable_dupecheck = checkbox_to_sql_bool($_POST["enable_dupecheck"]) == "true";
|
||||
|
||||
$this->host->set($this, "enable_readability", $enable_readability);
|
||||
$this->host->set($this, "enable_dupecheck", $enable_dupecheck);
|
||||
|
||||
echo __("Configuration saved");
|
||||
}
|
||||
|
@ -228,15 +240,42 @@ class Af_RedditImgur extends Plugin {
|
|||
@$doc->loadHTML($article["content"]);
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
$found = $this->inline_stuff($article, $doc, $xpath);
|
||||
$content_link = $xpath->query("(//a[contains(., '[link]')])")->item(0);
|
||||
|
||||
if ($content_link && $this->host->get($this, "enable_dupecheck")) {
|
||||
|
||||
if (DB_TYPE == "pgsql") {
|
||||
$date_qpart = "date_entered < NOW() - INTERVAL '1 day' ";
|
||||
} else {
|
||||
$date_qpart = "date_entered < DATE_SUB(NOW(), INTERVAL 1 DAY) ";
|
||||
}
|
||||
|
||||
$content_href = db_escape_string($content_link->getAttribute("href"));
|
||||
$owner_uid = $article["owner_uid"];
|
||||
$guid = db_escape_string($article["guid_hashed"]);
|
||||
|
||||
$result = db_query("SELECT id FROM ttrss_entries, ttrss_user_entries
|
||||
WHERE
|
||||
content LIKE '%$content_href%' AND
|
||||
$date_qpart AND
|
||||
ref_id = id AND
|
||||
owner_uid = $owner_uid
|
||||
AND guid != '$guid'
|
||||
LIMIT 1");
|
||||
|
||||
if (db_num_rows($result) != 0) {
|
||||
$found = true;
|
||||
$article["force_catchup"] = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$found) $found = $this->inline_stuff($article, $doc, $xpath);
|
||||
|
||||
if (function_exists("curl_init") && !$found && $this->host->get($this, "enable_readability") &&
|
||||
mb_strlen(strip_tags($article["content"])) <= 150) {
|
||||
|
||||
if (!class_exists("Readability")) require_once(dirname(dirname(__DIR__)). "/lib/readability/Readability.php");
|
||||
|
||||
$content_link = $xpath->query("(//a[contains(., '[link]')])")->item(0);
|
||||
|
||||
if ($content_link &&
|
||||
strpos($content_link->getAttribute("href"), "twitter.com") === FALSE &&
|
||||
strpos($content_link->getAttribute("href"), "youtube.com") === FALSE &&
|
||||
|
|
Loading…
Reference in New Issue