af_psql_trgm:

1. better debugging output
2. fix incorrect default values being used sometimes
3. remove special workaround for equal titles because trgm extension
seems to be working properly for those now (tested on postgres 11)
4. code cleanup
This commit is contained in:
Andrew Dolgov 2021-01-11 12:23:46 +03:00
parent 219b493550
commit 6d4005f984
1 changed files with 46 additions and 32 deletions

View File

@ -3,6 +3,8 @@ class Af_Psql_Trgm extends Plugin {
/* @var PluginHost $host */ /* @var PluginHost $host */
private $host; private $host;
private $default_similarity = 0.75;
private $default_min_length = 32;
function about() { function about() {
return array(1.0, return array(1.0,
@ -37,7 +39,6 @@ class Af_Psql_Trgm extends Plugin {
$host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this); $host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this);
$host->add_hook($host::HOOK_PREFS_SAVE_FEED, $this); $host->add_hook($host::HOOK_PREFS_SAVE_FEED, $this);
$host->add_hook($host::HOOK_ARTICLE_BUTTON, $this); $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this);
} }
function get_js() { function get_js() {
@ -124,7 +125,7 @@ class Af_Psql_Trgm extends Plugin {
if ($args != "prefFeeds") return; if ($args != "prefFeeds") return;
print "<div dojoType=\"dijit.layout.AccordionPane\" print "<div dojoType=\"dijit.layout.AccordionPane\"
title=\"<i class='material-icons'>extension</i> ".__('Mark similar articles as read')."\">"; title=\"<i class='material-icons'>extension</i> ".__('Mark similar articles as read (af_psql_trgm)')."\">";
if (DB_TYPE != "pgsql") { if (DB_TYPE != "pgsql") {
print_error("Database type not supported."); print_error("Database type not supported.");
@ -132,17 +133,14 @@ class Af_Psql_Trgm extends Plugin {
$res = $this->pdo->query("select 'similarity'::regproc"); $res = $this->pdo->query("select 'similarity'::regproc");
if (!$res->fetch()) { if (!$res || !$res->fetch()) {
print_error("pg_trgm extension not found."); print_error("pg_trgm extension not found.");
} }
$similarity = $this->host->get($this, "similarity"); $similarity = $this->host->get($this, "similarity", $this->default_similarity);
$min_title_length = $this->host->get($this, "min_title_length"); $min_title_length = $this->host->get($this, "min_title_length", $this->default_min_length);
$enable_globally = $this->host->get($this, "enable_globally"); $enable_globally = $this->host->get($this, "enable_globally");
if (!$similarity) $similarity = '0.75';
if (!$min_title_length) $min_title_length = '32';
print "<form dojoType=\"dijit.form.Form\">"; print "<form dojoType=\"dijit.form.Form\">";
print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\"> print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\">
@ -197,10 +195,10 @@ class Af_Psql_Trgm extends Plugin {
print_button("submit", __("Save"), "class='alt-primary'"); print_button("submit", __("Save"), "class='alt-primary'");
print "</form>"; print "</form>";
$enabled_feeds = $this->host->get($this, "enabled_feeds"); /* cleanup */
if (!array($enabled_feeds)) $enabled_feeds = array(); $enabled_feeds = $this->filter_unknown_feeds(
$this->get_stored_array("enabled_feeds"));
$enabled_feeds = $this->filter_unknown_feeds($enabled_feeds);
$this->host->set($this, "enabled_feeds", $enabled_feeds); $this->host->set($this, "enabled_feeds", $enabled_feeds);
if (count($enabled_feeds) > 0) { if (count($enabled_feeds) > 0) {
@ -221,14 +219,11 @@ class Af_Psql_Trgm extends Plugin {
} }
function hook_prefs_edit_feed($feed_id) { function hook_prefs_edit_feed($feed_id) {
print "<header>".__("Similarity (pg_trgm)")."</header>"; print "<header>".__("Similarity (af_psql_trgm)")."</header>";
print "<section>"; print "<section>";
$enabled_feeds = $this->host->get($this, "enabled_feeds"); $enabled_feeds = $this->get_stored_array("enabled_feeds");
if (!array($enabled_feeds)) $enabled_feeds = array(); $checked = in_array($feed_id, $enabled_feeds) ? "checked" : "";
$key = array_search($feed_id, $enabled_feeds);
$checked = $key !== false ? "checked" : "";
print "<fieldset>"; print "<fieldset>";
@ -241,8 +236,7 @@ class Af_Psql_Trgm extends Plugin {
} }
function hook_prefs_save_feed($feed_id) { function hook_prefs_save_feed($feed_id) {
$enabled_feeds = $this->host->get($this, "enabled_feeds"); $enabled_feeds = $this->get_stored_array("enabled_feeds");
if (!is_array($enabled_feeds)) $enabled_feeds = array();
$enable = checkbox_to_sql_bool($_POST["trgm_similarity_enabled"]); $enable = checkbox_to_sql_bool($_POST["trgm_similarity_enabled"]);
$key = array_search($feed_id, $enabled_feeds); $key = array_search($feed_id, $enabled_feeds);
@ -265,29 +259,39 @@ class Af_Psql_Trgm extends Plugin {
if (DB_TYPE != "pgsql") return $article; if (DB_TYPE != "pgsql") return $article;
$res = $this->pdo->query("select 'similarity'::regproc"); $res = $this->pdo->query("select 'similarity'::regproc");
if (!$res->fetch()) return $article; if (!$res || !$res->fetch()) return $article;
$enable_globally = $this->host->get($this, "enable_globally"); $enable_globally = $this->host->get($this, "enable_globally");
if (!$enable_globally) { if (!$enable_globally &&
$enabled_feeds = $this->host->get($this, "enabled_feeds"); !in_array($article["feed"]["id"],
$key = array_search($article["feed"]["id"], $enabled_feeds); $this->get_stored_array("enabled_feeds"))) {
if ($key === false) return $article;
return $article;
} }
$similarity = (float) $this->host->get($this, "similarity"); $similarity = (float) $this->host->get($this, "similarity", $this->default_similarity);
if ($similarity < 0.01) return $article;
$min_title_length = (int) $this->host->get($this, "min_title_length"); if ($similarity < 0.01) {
if (mb_strlen($article["title"]) < $min_title_length) return $article; Debug::log("af_psql_trgm: similarity is set too low ($similarity)", Debug::$LOG_EXTENDED);
return $article;
}
$min_title_length = (int) $this->host->get($this, "min_title_length", $this->default_min_length);
if (mb_strlen($article["title"]) < $min_title_length) {
Debug::log("af_psql_trgm: article title is too short (min: $min_title_length)", Debug::$LOG_EXTENDED);
return $article;
}
$owner_uid = $article["owner_uid"]; $owner_uid = $article["owner_uid"];
$entry_guid = $article["guid_hashed"]; $entry_guid = $article["guid_hashed"];
$title_escaped = $article["title"]; $title_escaped = $article["title"];
// trgm does not return similarity=1 for completely equal strings // trgm does not return similarity=1 for completely equal strings
// this seems to be no longer the case (fixed in upstream?)
$sth = $this->pdo->prepare("SELECT COUNT(id) AS nequal /* $sth = $this->pdo->prepare("SELECT COUNT(id) AS nequal
FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND
date_entered >= NOW() - interval '3 days' AND date_entered >= NOW() - interval '3 days' AND
title = ? AND title = ? AND
@ -303,7 +307,7 @@ class Af_Psql_Trgm extends Plugin {
if ($nequal != 0) { if ($nequal != 0) {
$article["force_catchup"] = true; $article["force_catchup"] = true;
return $article; return $article;
} } */
$sth = $this->pdo->prepare("SELECT MAX(SIMILARITY(title, ?)) AS ms $sth = $this->pdo->prepare("SELECT MAX(SIMILARITY(title, ?)) AS ms
FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND
@ -315,14 +319,15 @@ class Af_Psql_Trgm extends Plugin {
$row = $sth->fetch(); $row = $sth->fetch();
$similarity_result = $row['ms']; $similarity_result = $row['ms'];
Debug::log("af_psql_trgm: similarity result: $similarity_result", Debug::$LOG_EXTENDED); Debug::log("af_psql_trgm: similarity result for $title_escaped: $similarity_result", Debug::$LOG_EXTENDED);
if ($similarity_result >= $similarity) { if ($similarity_result >= $similarity) {
Debug::log("af_psql_trgm: marking article as read ($similarity_result >= $similarity)", Debug::$LOG_EXTENDED);
$article["force_catchup"] = true; $article["force_catchup"] = true;
} }
return $article; return $article;
} }
function api_version() { function api_version() {
@ -345,4 +350,13 @@ class Af_Psql_Trgm extends Plugin {
return $tmp; return $tmp;
} }
private function get_stored_array($name) {
$tmp = $this->host->get($this, $name);
if (!is_array($tmp)) $tmp = [];
return $tmp;
}
} }