more fts stuff for simple index

This commit is contained in:
Andrew Dolgov 2015-08-04 12:52:49 +03:00
parent b4f544d389
commit e854442e1f
6 changed files with 103 additions and 42 deletions

View File

@ -282,17 +282,10 @@
function search_to_sql($search) { function search_to_sql($search) {
/*if (DB_TYPE == "pgsql") {
$search_escaped = db_escape_string($search);
return array("(to_tsvector('english', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
@@ to_tsquery('$search_escaped'))", explode(" ", $search));
}*/
$keywords = str_getcsv($search, " "); $keywords = str_getcsv($search, " ");
$query_keywords = array(); $query_keywords = array();
$search_words = array(); $search_words = array();
$search_query_leftover = ""; $search_query_leftover = array();
foreach ($keywords as $k) { foreach ($keywords as $k) {
if (strpos($k, "-") === 0) { if (strpos($k, "-") === 0) {
@ -390,23 +383,27 @@
array_push($query_keywords, "(".SUBSTRING_FOR_DATE."(updated,1,LENGTH('$k')) $not = '$k')"); array_push($query_keywords, "(".SUBSTRING_FOR_DATE."(updated,1,LENGTH('$k')) $not = '$k')");
} else { } else {
$search_query_leftover .= $k . " ";
if (DB_TYPE == "pgsql") {
$k = mb_strtolower($k);
array_push($search_query_leftover, $not ? "!$k" : $k);
} else {
array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
}
if (!$not) array_push($search_words, $k); if (!$not) array_push($search_words, $k);
/*array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
if (!$not) array_push($search_words, $k);*/
} }
} }
} }
if ($search_query_leftover) { if (count($search_query_leftover) > 0) {
$search_query_leftover = db_escape_string($search_query_leftover); $search_query_leftover = db_escape_string(implode(" & ", $search_query_leftover));
if (DB_TYPE == "pgsql") {
array_push($query_keywords, array_push($query_keywords,
"(to_tsvector('simple', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800)) "(tsvector_combined @@ '$search_query_leftover'::tsquery)");
@@ to_tsquery('$search_query_leftover'))"); }
} }
@ -713,6 +710,7 @@
if ($feed == -3) if ($feed == -3)
$first_id_query_strategy_part = "true"; $first_id_query_strategy_part = "true";
if (!$search) {
// if previous topmost article id changed that means our current pagination is no longer valid // if previous topmost article id changed that means our current pagination is no longer valid
$query = "SELECT DISTINCT $query = "SELECT DISTINCT
ttrss_feeds.title, ttrss_feeds.title,
@ -743,12 +741,13 @@
$result = db_query($query); $result = db_query($query);
if ($result && db_num_rows($result) > 0) { if ($result && db_num_rows($result) > 0) {
$first_id = (int) db_fetch_result($result, 0, "id"); $first_id = (int)db_fetch_result($result, 0, "id");
if ($offset > 0 && $first_id && $check_first_id && $first_id != $check_first_id) { if ($offset > 0 && $first_id && $check_first_id && $first_id != $check_first_id) {
return array(-1, $feed_title, $feed_site_url, $last_error, $last_updated, $search_words, $first_id); return array(-1, $feed_title, $feed_site_url, $last_error, $last_updated, $search_words, $first_id);
} }
} }
}
$query = "SELECT DISTINCT $query = "SELECT DISTINCT
date_entered, date_entered,

View File

@ -984,11 +984,22 @@
_debug("RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled); _debug("RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
if (DB_TYPE == "pgsql") {
$tsvector_combined = db_escape_string(mb_substr($entry_title . ' ' . strip_tags($entry_content),
0, 1000000));
$tsvector_qpart = "tsvector_combined = to_tsvector('simple', '$tsvector_combined'),";
} else {
$tsvector_qpart = "";
}
db_query("UPDATE ttrss_entries db_query("UPDATE ttrss_entries
SET title = '$entry_title', SET title = '$entry_title',
content = '$entry_content', content = '$entry_content',
content_hash = '$entry_current_hash', content_hash = '$entry_current_hash',
updated = '$entry_timestamp_fmt', updated = '$entry_timestamp_fmt',
$tsvector_qpart
num_comments = '$num_comments', num_comments = '$num_comments',
plugin_data = '$entry_plugin_data', plugin_data = '$entry_plugin_data',
author = '$entry_author', author = '$entry_author',

View File

@ -142,12 +142,14 @@ create table ttrss_entries (id serial not null primary key,
num_comments integer not null default 0, num_comments integer not null default 0,
comments varchar(250) not null default '', comments varchar(250) not null default '',
plugin_data text, plugin_data text,
tsvector_combined tsvector,
lang varchar(2), lang varchar(2),
author varchar(250) not null default ''); author varchar(250) not null default '');
-- create index ttrss_entries_title_index on ttrss_entries(title); -- create index ttrss_entries_title_index on ttrss_entries(title);
create index ttrss_entries_date_entered_index on ttrss_entries(date_entered); create index ttrss_entries_date_entered_index on ttrss_entries(date_entered);
create index ttrss_entries_updated_idx on ttrss_entries(updated); create index ttrss_entries_updated_idx on ttrss_entries(updated);
create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
create table ttrss_user_entries ( create table ttrss_user_entries (
int_id serial not null primary key, int_id serial not null primary key,

View File

@ -0,0 +1,5 @@
BEGIN;
UPDATE ttrss_version SET schema_version = 128;
COMMIT;

View File

@ -0,0 +1,8 @@
BEGIN;
alter table ttrss_entries add column tsvector_combined tsvector;
create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
UPDATE ttrss_version SET schema_version = 128;
COMMIT;

View File

@ -33,6 +33,7 @@
"update-schema", "update-schema",
"convert-filters", "convert-filters",
"force-update", "force-update",
"update-search-idx",
"list-plugins", "list-plugins",
"help"); "help");
@ -80,6 +81,7 @@
print " --log FILE - log messages to FILE\n"; print " --log FILE - log messages to FILE\n";
print " --indexes - recreate missing schema indexes\n"; print " --indexes - recreate missing schema indexes\n";
print " --update-schema - update database schema\n"; print " --update-schema - update database schema\n";
print " --update-search-idx - update PostgreSQL fulltext search index\n";
print " --convert-filters - convert type1 filters to type2\n"; print " --convert-filters - convert type1 filters to type2\n";
print " --force-update - force update of all feeds\n"; print " --force-update - force update of all feeds\n";
print " --list-plugins - list all available plugins\n"; print " --list-plugins - list all available plugins\n";
@ -330,6 +332,40 @@
} }
if (isset($options["update-search-idx"])) {
echo "Generating search index...\n";
$result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries");
$count = db_fetch_result($result, 0, "count");
print "Total entries: $count.\n";
$offset = 0;
$limit = 1000;
while (true) {
$result = db_query("SELECT id, title, content FROM ttrss_entries WHERE tsvector_combined IS NULL ORDER BY id LIMIT $limit OFFSET $offset");
if (db_num_rows($result) != 0) {
echo "Offset $offset...\n";
while ($line = db_fetch_assoc($result)) {
$tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']),
0, 1000000));
db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]);
}
$offset += $limit;
} else {
echo "All done.\n";
break;
}
}
}
if (isset($options["list-plugins"])) { if (isset($options["list-plugins"])) {
$tmppluginhost = new PluginHost(); $tmppluginhost = new PluginHost();
$tmppluginhost->load_all($tmppluginhost::KIND_ALL); $tmppluginhost->load_all($tmppluginhost::KIND_ALL);