tag-related fixes
1. move tag sanitization to feedparser common item class 2. enforce length limit on tags when parsing 3. support multiple tags passed via one dc:subject and other such elements, parse them as a comma-separated list 4. sort resulting tag list to prevent different order between feed updates 5. remove some duplicate code related to tag validation 6. allow + symbol in tags
This commit is contained in:
parent
ffa3f9309f
commit
304d3a0b88
|
@ -305,19 +305,9 @@ class Article extends Handler_Protected {
|
|||
post_int_id = ? AND owner_uid = ?");
|
||||
$sth->execute([$int_id, $_SESSION['uid']]);
|
||||
|
||||
$tags = FeedItem_Common::normalize_categories($tags);
|
||||
|
||||
foreach ($tags as $tag) {
|
||||
$tag = Article::sanitize_tag($tag);
|
||||
|
||||
if (!Article::tag_is_valid($tag)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (preg_match("/^[0-9]*$/", $tag)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// print "<!-- $id : $int_id : $tag -->";
|
||||
|
||||
if ($tag != '') {
|
||||
$sth = $this->pdo->prepare("INSERT INTO ttrss_tags
|
||||
(post_int_id, owner_uid, tag_name)
|
||||
|
@ -331,7 +321,6 @@ class Article extends Handler_Protected {
|
|||
|
||||
/* update tag cache */
|
||||
|
||||
sort($tags_to_cache);
|
||||
$tags_str = join(",", $tags_to_cache);
|
||||
|
||||
$sth = $this->pdo->prepare("UPDATE ttrss_user_entries
|
||||
|
@ -802,27 +791,6 @@ class Article extends Handler_Protected {
|
|||
return $rv;
|
||||
}
|
||||
|
||||
static function sanitize_tag($tag) {
|
||||
$tag = trim($tag);
|
||||
|
||||
$tag = mb_strtolower($tag, 'utf-8');
|
||||
|
||||
$tag = preg_replace('/[,\'\"\+\>\<]/', "", $tag);
|
||||
|
||||
if (DB_TYPE == "mysql") {
|
||||
$tag = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $tag);
|
||||
}
|
||||
|
||||
return $tag;
|
||||
}
|
||||
|
||||
static function tag_is_valid($tag) {
|
||||
if (!$tag || is_numeric($tag) || mb_strlen($tag) > 250)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static function get_article_image($enclosures, $content, $site_url) {
|
||||
|
||||
$article_image = "";
|
||||
|
|
|
@ -103,20 +103,20 @@ class FeedItem_Atom extends FeedItem_Common {
|
|||
|
||||
function get_categories() {
|
||||
$categories = $this->elem->getElementsByTagName("category");
|
||||
$cats = array();
|
||||
$cats = [];
|
||||
|
||||
foreach ($categories as $cat) {
|
||||
if ($cat->hasAttribute("term"))
|
||||
array_push($cats, trim($cat->getAttribute("term")));
|
||||
array_push($cats, $cat->getAttribute("term"));
|
||||
}
|
||||
|
||||
$categories = $this->xpath->query("dc:subject", $this->elem);
|
||||
|
||||
foreach ($categories as $cat) {
|
||||
array_push($cats, clean(trim($cat->nodeValue)));
|
||||
array_push($cats, $cat->nodeValue);
|
||||
}
|
||||
|
||||
return $cats;
|
||||
return $this->normalize_categories($cats);
|
||||
}
|
||||
|
||||
function get_enclosures() {
|
||||
|
|
|
@ -162,4 +162,35 @@ abstract class FeedItem_Common extends FeedItem {
|
|||
}
|
||||
}
|
||||
|
||||
static function normalize_categories($cats) {
|
||||
|
||||
$tmp = [];
|
||||
|
||||
foreach ($cats as $rawcat) {
|
||||
$tmp = array_merge($tmp, explode(",", $rawcat));
|
||||
}
|
||||
|
||||
$tmp = array_map(function($srccat) {
|
||||
$cat = clean(trim(mb_strtolower($srccat)));
|
||||
|
||||
// we don't support numeric tags
|
||||
if (is_numeric($cat))
|
||||
$cat = 't:' . $cat;
|
||||
|
||||
$cat = preg_replace('/[,\'\"]/', "", $cat);
|
||||
|
||||
if (DB_TYPE == "mysql") {
|
||||
$cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat);
|
||||
}
|
||||
|
||||
if (mb_strlen($cat) > 250)
|
||||
$cat = mb_substr($cat, 0, 250);
|
||||
|
||||
return $cat;
|
||||
}, $tmp);
|
||||
|
||||
asort($tmp);
|
||||
|
||||
return array_unique($tmp);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,19 +97,19 @@ class FeedItem_RSS extends FeedItem_Common {
|
|||
|
||||
function get_categories() {
|
||||
$categories = $this->elem->getElementsByTagName("category");
|
||||
$cats = array();
|
||||
$cats = [];
|
||||
|
||||
foreach ($categories as $cat) {
|
||||
array_push($cats, trim($cat->nodeValue));
|
||||
array_push($cats, $cat->nodeValue);
|
||||
}
|
||||
|
||||
$categories = $this->xpath->query("dc:subject", $this->elem);
|
||||
|
||||
foreach ($categories as $cat) {
|
||||
array_push($cats, clean(trim($cat->nodeValue)));
|
||||
array_push($cats, $cat->nodeValue);
|
||||
}
|
||||
|
||||
return $cats;
|
||||
return $this->normalize_categories($cats);
|
||||
}
|
||||
|
||||
function get_enclosures() {
|
||||
|
|
|
@ -626,28 +626,8 @@ class RSSUtils {
|
|||
Debug::log("author $entry_author", Debug::$LOG_VERBOSE);
|
||||
Debug::log("looking for tags...", Debug::$LOG_VERBOSE);
|
||||
|
||||
// parse <category> entries into tags
|
||||
|
||||
$additional_tags = array();
|
||||
|
||||
$additional_tags_src = $item->get_categories();
|
||||
|
||||
if (is_array($additional_tags_src)) {
|
||||
foreach ($additional_tags_src as $tobj) {
|
||||
array_push($additional_tags, $tobj);
|
||||
}
|
||||
}
|
||||
|
||||
$entry_tags = array_unique($additional_tags);
|
||||
|
||||
for ($i = 0; $i < count($entry_tags); $i++) {
|
||||
$entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
|
||||
|
||||
// we don't support numeric tags, let's prefix them
|
||||
if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
|
||||
}
|
||||
|
||||
Debug::log("tags found: " . join(",", $entry_tags), Debug::$LOG_VERBOSE);
|
||||
$entry_tags = $item->get_categories();
|
||||
Debug::log("tags found: " . join(", ", $entry_tags), Debug::$LOG_VERBOSE);
|
||||
|
||||
Debug::log("done collecting data.", Debug::$LOG_VERBOSE);
|
||||
|
||||
|
@ -1107,9 +1087,7 @@ class RSSUtils {
|
|||
$manual_tags = trim_array(explode(",", $f["param"]));
|
||||
|
||||
foreach ($manual_tags as $tag) {
|
||||
if (Article::tag_is_valid($tag)) {
|
||||
array_push($entry_tags, $tag);
|
||||
}
|
||||
array_push($entry_tags, $tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1122,19 +1100,17 @@ class RSSUtils {
|
|||
$filtered_tags = array();
|
||||
$tags_to_cache = array();
|
||||
|
||||
if ($entry_tags && is_array($entry_tags)) {
|
||||
foreach ($entry_tags as $tag) {
|
||||
if (array_search($tag, $boring_tags) === false) {
|
||||
array_push($filtered_tags, $tag);
|
||||
}
|
||||
foreach ($entry_tags as $tag) {
|
||||
if (array_search($tag, $boring_tags) === false) {
|
||||
array_push($filtered_tags, $tag);
|
||||
}
|
||||
}
|
||||
|
||||
$filtered_tags = array_unique($filtered_tags);
|
||||
|
||||
if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
|
||||
Debug::log("filtered article tags:", Debug::$LOG_VERBOSE);
|
||||
print_r($filtered_tags);
|
||||
if (Debug::get_loglevel() >= Debug::$LOG_VERBOSE) {
|
||||
Debug::log("filtered tags: " . implode(", ", $filtered_tags), Debug::$LOG_VERBOSE);
|
||||
|
||||
}
|
||||
|
||||
// Save article tags in the database
|
||||
|
@ -1149,12 +1125,9 @@ class RSSUtils {
|
|||
(owner_uid,tag_name,post_int_id)
|
||||
VALUES (?, ?, ?)");
|
||||
|
||||
$filtered_tags = FeedItem_Common::normalize_categories($filtered_tags);
|
||||
|
||||
foreach ($filtered_tags as $tag) {
|
||||
|
||||
$tag = Article::sanitize_tag($tag);
|
||||
|
||||
if (!Article::tag_is_valid($tag)) continue;
|
||||
|
||||
$tsth->execute([$tag, $entry_int_id, $owner_uid]);
|
||||
|
||||
if (!$tsth->fetch()) {
|
||||
|
@ -1165,9 +1138,6 @@ class RSSUtils {
|
|||
}
|
||||
|
||||
/* update the cache */
|
||||
|
||||
$tags_to_cache = array_unique($tags_to_cache);
|
||||
|
||||
$tags_str = join(",", $tags_to_cache);
|
||||
|
||||
$tsth = $pdo->prepare("UPDATE ttrss_user_entries
|
||||
|
|
Loading…
Reference in New Issue