From 2ea9bbfd075fc03acceb608746c3989613f768e3 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 9 Jul 2012 19:49:48 +0400 Subject: [PATCH] implement proof of concept postgresql pg_trgm N-gram based marking of similar article titles as read automatically --- include/functions.php | 2 ++ include/rssfuncs.php | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/functions.php b/include/functions.php index 1e527e70d..b14515bdd 100644 --- a/include/functions.php +++ b/include/functions.php @@ -3302,6 +3302,8 @@ "; } + $rv['title'] = $line['title']; + $rv['content'] .= "
" . truncate_string(strip_tags($line['title']), 15) . "
"; diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 12c4a57d2..59fa3d547 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -979,6 +979,27 @@ $published = 'false'; } + // N-grams + + if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) { + + $result = db_query($link, "SELECT COUNT(*) AS similar FROM + ttrss_entries,ttrss_user_entries + WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day' + AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD." + AND owner_uid = $owner_uid"); + + $ngram_similar = db_fetch_result($result, 0, "similar"); + + if ($debug_enabled) { + _debug("update_rss_feed: N-gram similar results: $ngram_similar"); + } + + if ($ngram_similar > 0) { + $unread = 'false'; + } + } + $result = db_query($link, "INSERT INTO ttrss_user_entries (ref_id, owner_uid, feed_id, unread, last_read, marked,