more fts stuff for simple index
This commit is contained in:
parent
b4f544d389
commit
e854442e1f
|
@ -282,17 +282,10 @@
|
||||||
|
|
||||||
function search_to_sql($search) {
|
function search_to_sql($search) {
|
||||||
|
|
||||||
/*if (DB_TYPE == "pgsql") {
|
|
||||||
$search_escaped = db_escape_string($search);
|
|
||||||
|
|
||||||
return array("(to_tsvector('english', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
|
|
||||||
@@ to_tsquery('$search_escaped'))", explode(" ", $search));
|
|
||||||
}*/
|
|
||||||
|
|
||||||
$keywords = str_getcsv($search, " ");
|
$keywords = str_getcsv($search, " ");
|
||||||
$query_keywords = array();
|
$query_keywords = array();
|
||||||
$search_words = array();
|
$search_words = array();
|
||||||
$search_query_leftover = "";
|
$search_query_leftover = array();
|
||||||
|
|
||||||
foreach ($keywords as $k) {
|
foreach ($keywords as $k) {
|
||||||
if (strpos($k, "-") === 0) {
|
if (strpos($k, "-") === 0) {
|
||||||
|
@ -390,23 +383,27 @@
|
||||||
|
|
||||||
array_push($query_keywords, "(".SUBSTRING_FOR_DATE."(updated,1,LENGTH('$k')) $not = '$k')");
|
array_push($query_keywords, "(".SUBSTRING_FOR_DATE."(updated,1,LENGTH('$k')) $not = '$k')");
|
||||||
} else {
|
} else {
|
||||||
$search_query_leftover .= $k . " ";
|
|
||||||
|
if (DB_TYPE == "pgsql") {
|
||||||
|
$k = mb_strtolower($k);
|
||||||
|
array_push($search_query_leftover, $not ? "!$k" : $k);
|
||||||
|
} else {
|
||||||
|
array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
|
||||||
|
OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
|
||||||
|
}
|
||||||
|
|
||||||
if (!$not) array_push($search_words, $k);
|
if (!$not) array_push($search_words, $k);
|
||||||
|
|
||||||
/*array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
|
|
||||||
OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
|
|
||||||
if (!$not) array_push($search_words, $k);*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($search_query_leftover) {
|
if (count($search_query_leftover) > 0) {
|
||||||
$search_query_leftover = db_escape_string($search_query_leftover);
|
$search_query_leftover = db_escape_string(implode(" & ", $search_query_leftover));
|
||||||
|
|
||||||
array_push($query_keywords,
|
if (DB_TYPE == "pgsql") {
|
||||||
"(to_tsvector('simple', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
|
array_push($query_keywords,
|
||||||
@@ to_tsquery('$search_query_leftover'))");
|
"(tsvector_combined @@ '$search_query_leftover'::tsquery)");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -713,29 +710,30 @@
|
||||||
if ($feed == -3)
|
if ($feed == -3)
|
||||||
$first_id_query_strategy_part = "true";
|
$first_id_query_strategy_part = "true";
|
||||||
|
|
||||||
// if previous topmost article id changed that means our current pagination is no longer valid
|
if (!$search) {
|
||||||
$query = "SELECT DISTINCT
|
// if previous topmost article id changed that means our current pagination is no longer valid
|
||||||
ttrss_feeds.title,
|
$query = "SELECT DISTINCT
|
||||||
date_entered,
|
ttrss_feeds.title,
|
||||||
guid,
|
date_entered,
|
||||||
ttrss_entries.id,
|
guid,
|
||||||
ttrss_entries.title,
|
ttrss_entries.id,
|
||||||
updated,
|
ttrss_entries.title,
|
||||||
score,
|
updated,
|
||||||
marked,
|
score,
|
||||||
published,
|
marked,
|
||||||
last_marked,
|
published,
|
||||||
last_published
|
last_marked,
|
||||||
FROM
|
last_published
|
||||||
$from_qpart
|
FROM
|
||||||
WHERE
|
$from_qpart
|
||||||
$feed_check_qpart
|
WHERE
|
||||||
ttrss_user_entries.ref_id = ttrss_entries.id AND
|
$feed_check_qpart
|
||||||
ttrss_user_entries.owner_uid = '$owner_uid' AND
|
ttrss_user_entries.ref_id = ttrss_entries.id AND
|
||||||
$search_query_part
|
ttrss_user_entries.owner_uid = '$owner_uid' AND
|
||||||
$start_ts_query_part
|
$search_query_part
|
||||||
$since_id_part
|
$start_ts_query_part
|
||||||
$first_id_query_strategy_part ORDER BY $order_by LIMIT 1";
|
$since_id_part
|
||||||
|
$first_id_query_strategy_part ORDER BY $order_by LIMIT 1";
|
||||||
|
|
||||||
if ($_REQUEST["debug"]) {
|
if ($_REQUEST["debug"]) {
|
||||||
print $query;
|
print $query;
|
||||||
|
@ -743,12 +741,13 @@
|
||||||
|
|
||||||
$result = db_query($query);
|
$result = db_query($query);
|
||||||
if ($result && db_num_rows($result) > 0) {
|
if ($result && db_num_rows($result) > 0) {
|
||||||
$first_id = (int) db_fetch_result($result, 0, "id");
|
$first_id = (int)db_fetch_result($result, 0, "id");
|
||||||
|
|
||||||
if ($offset > 0 && $first_id && $check_first_id && $first_id != $check_first_id) {
|
if ($offset > 0 && $first_id && $check_first_id && $first_id != $check_first_id) {
|
||||||
return array(-1, $feed_title, $feed_site_url, $last_error, $last_updated, $search_words, $first_id);
|
return array(-1, $feed_title, $feed_site_url, $last_error, $last_updated, $search_words, $first_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$query = "SELECT DISTINCT
|
$query = "SELECT DISTINCT
|
||||||
date_entered,
|
date_entered,
|
||||||
|
|
|
@ -984,11 +984,22 @@
|
||||||
|
|
||||||
_debug("RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
|
_debug("RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
|
||||||
|
|
||||||
|
if (DB_TYPE == "pgsql") {
|
||||||
|
$tsvector_combined = db_escape_string(mb_substr($entry_title . ' ' . strip_tags($entry_content),
|
||||||
|
0, 1000000));
|
||||||
|
|
||||||
|
$tsvector_qpart = "tsvector_combined = to_tsvector('simple', '$tsvector_combined'),";
|
||||||
|
|
||||||
|
} else {
|
||||||
|
$tsvector_qpart = "";
|
||||||
|
}
|
||||||
|
|
||||||
db_query("UPDATE ttrss_entries
|
db_query("UPDATE ttrss_entries
|
||||||
SET title = '$entry_title',
|
SET title = '$entry_title',
|
||||||
content = '$entry_content',
|
content = '$entry_content',
|
||||||
content_hash = '$entry_current_hash',
|
content_hash = '$entry_current_hash',
|
||||||
updated = '$entry_timestamp_fmt',
|
updated = '$entry_timestamp_fmt',
|
||||||
|
$tsvector_qpart
|
||||||
num_comments = '$num_comments',
|
num_comments = '$num_comments',
|
||||||
plugin_data = '$entry_plugin_data',
|
plugin_data = '$entry_plugin_data',
|
||||||
author = '$entry_author',
|
author = '$entry_author',
|
||||||
|
|
|
@ -142,12 +142,14 @@ create table ttrss_entries (id serial not null primary key,
|
||||||
num_comments integer not null default 0,
|
num_comments integer not null default 0,
|
||||||
comments varchar(250) not null default '',
|
comments varchar(250) not null default '',
|
||||||
plugin_data text,
|
plugin_data text,
|
||||||
|
tsvector_combined tsvector,
|
||||||
lang varchar(2),
|
lang varchar(2),
|
||||||
author varchar(250) not null default '');
|
author varchar(250) not null default '');
|
||||||
|
|
||||||
-- create index ttrss_entries_title_index on ttrss_entries(title);
|
-- create index ttrss_entries_title_index on ttrss_entries(title);
|
||||||
create index ttrss_entries_date_entered_index on ttrss_entries(date_entered);
|
create index ttrss_entries_date_entered_index on ttrss_entries(date_entered);
|
||||||
create index ttrss_entries_updated_idx on ttrss_entries(updated);
|
create index ttrss_entries_updated_idx on ttrss_entries(updated);
|
||||||
|
create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
|
||||||
|
|
||||||
create table ttrss_user_entries (
|
create table ttrss_user_entries (
|
||||||
int_id serial not null primary key,
|
int_id serial not null primary key,
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
UPDATE ttrss_version SET schema_version = 128;
|
||||||
|
|
||||||
|
COMMIT;
|
|
@ -0,0 +1,8 @@
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
alter table ttrss_entries add column tsvector_combined tsvector;
|
||||||
|
create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
|
||||||
|
|
||||||
|
UPDATE ttrss_version SET schema_version = 128;
|
||||||
|
|
||||||
|
COMMIT;
|
36
update.php
36
update.php
|
@ -33,6 +33,7 @@
|
||||||
"update-schema",
|
"update-schema",
|
||||||
"convert-filters",
|
"convert-filters",
|
||||||
"force-update",
|
"force-update",
|
||||||
|
"update-search-idx",
|
||||||
"list-plugins",
|
"list-plugins",
|
||||||
"help");
|
"help");
|
||||||
|
|
||||||
|
@ -80,6 +81,7 @@
|
||||||
print " --log FILE - log messages to FILE\n";
|
print " --log FILE - log messages to FILE\n";
|
||||||
print " --indexes - recreate missing schema indexes\n";
|
print " --indexes - recreate missing schema indexes\n";
|
||||||
print " --update-schema - update database schema\n";
|
print " --update-schema - update database schema\n";
|
||||||
|
print " --update-search-idx - update PostgreSQL fulltext search index\n";
|
||||||
print " --convert-filters - convert type1 filters to type2\n";
|
print " --convert-filters - convert type1 filters to type2\n";
|
||||||
print " --force-update - force update of all feeds\n";
|
print " --force-update - force update of all feeds\n";
|
||||||
print " --list-plugins - list all available plugins\n";
|
print " --list-plugins - list all available plugins\n";
|
||||||
|
@ -330,6 +332,40 @@
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isset($options["update-search-idx"])) {
|
||||||
|
echo "Generating search index...\n";
|
||||||
|
|
||||||
|
$result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries");
|
||||||
|
$count = db_fetch_result($result, 0, "count");
|
||||||
|
|
||||||
|
print "Total entries: $count.\n";
|
||||||
|
|
||||||
|
$offset = 0;
|
||||||
|
$limit = 1000;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
$result = db_query("SELECT id, title, content FROM ttrss_entries WHERE tsvector_combined IS NULL ORDER BY id LIMIT $limit OFFSET $offset");
|
||||||
|
|
||||||
|
if (db_num_rows($result) != 0) {
|
||||||
|
echo "Offset $offset...\n";
|
||||||
|
|
||||||
|
while ($line = db_fetch_assoc($result)) {
|
||||||
|
$tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']),
|
||||||
|
0, 1000000));
|
||||||
|
|
||||||
|
db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
$offset += $limit;
|
||||||
|
} else {
|
||||||
|
echo "All done.\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
if (isset($options["list-plugins"])) {
|
if (isset($options["list-plugins"])) {
|
||||||
$tmppluginhost = new PluginHost();
|
$tmppluginhost = new PluginHost();
|
||||||
$tmppluginhost->load_all($tmppluginhost::KIND_ALL);
|
$tmppluginhost->load_all($tmppluginhost::KIND_ALL);
|
||||||
|
|
Loading…
Reference in New Issue