From 3dcd00e4c42989ec1834c77157314b315377b1a9 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 17 Jun 2015 18:15:41 +0300 Subject: [PATCH] use ngram tokens instead of whole words for matching --- plugins/af_sort_bayes/init.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/af_sort_bayes/init.php b/plugins/af_sort_bayes/init.php index 7699d2c4d..5419a8f69 100644 --- a/plugins/af_sort_bayes/init.php +++ b/plugins/af_sort_bayes/init.php @@ -16,6 +16,7 @@ class Af_Sort_Bayes extends Plugin { function init($host) { require_once __DIR__ . "/lib/class.naivebayesian.php"; + require_once __DIR__ . "/lib/class.naivebayesian_ngram.php"; require_once __DIR__ . "/lib/class.naivebayesianstorage.php"; $this->host = $host; @@ -36,7 +37,7 @@ class Af_Sort_Bayes extends Plugin { $category = $train_up ? "GOOD" : "NEUTRAL"; $nbs = new NaiveBayesianStorage($_SESSION["uid"]); - $nb = new NaiveBayesian($nbs); + $nb = new NaiveBayesianNgram($nbs); $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " . $article_id . " AND owner_uid = " . $_SESSION["uid"]); @@ -202,7 +203,7 @@ class Af_Sort_Bayes extends Plugin { $owner_uid = $article["owner_uid"]; $nbs = new NaiveBayesianStorage($owner_uid); - $nb = new NaiveBayesian($nbs); + $nb = new NaiveBayesianNgram($nbs); $categories = $nbs->getCategories(); @@ -227,7 +228,7 @@ class Af_Sort_Bayes extends Plugin { $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"])); - if ($count_neutral >= 3000 && $count_good >= 1000) { + if ($count_neutral >= 20000 && $count_good >= 10000) { // enable automatic categorization $result = $nb->categorize($bayes_content); @@ -261,7 +262,7 @@ class Af_Sort_Bayes extends Plugin { $this->dbh->query("COMMIT"); $nbs = new NaiveBayesianStorage($_SESSION["uid"]); - $nb = new NaiveBayesian($nbs); + $nb = new NaiveBayesianNgram($nbs); $nb->updateProbabilities(); }