parser: clean() attribute values by default (except content)

This commit is contained in:
Andrew Dolgov 2018-12-26 10:16:11 +03:00
parent 949bfa3457
commit 55ef85adc0
4 changed files with 55 additions and 55 deletions

View File

@ -8,7 +8,7 @@ class FeedItem_Atom extends FeedItem_Common {
if ($id) { if ($id) {
return $id->nodeValue; return $id->nodeValue;
} else { } else {
return $this->get_link(); return clean($this->get_link());
} }
} }
@ -44,9 +44,9 @@ class FeedItem_Atom extends FeedItem_Common {
$base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link); $base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link);
if ($base) if ($base)
return rewrite_relative_url($base, trim($link->getAttribute("href"))); return rewrite_relative_url($base, clean(trim($link->getAttribute("href"))));
else else
return trim($link->getAttribute("href")); return clean(trim($link->getAttribute("href")));
} }
} }
@ -56,7 +56,7 @@ class FeedItem_Atom extends FeedItem_Common {
$title = $this->elem->getElementsByTagName("title")->item(0); $title = $this->elem->getElementsByTagName("title")->item(0);
if ($title) { if ($title) {
return trim($title->nodeValue); return clean(trim($title->nodeValue));
} }
} }
@ -113,7 +113,7 @@ class FeedItem_Atom extends FeedItem_Common {
$categories = $this->xpath->query("dc:subject", $this->elem); $categories = $this->xpath->query("dc:subject", $this->elem);
foreach ($categories as $cat) { foreach ($categories as $cat) {
array_push($cats, trim($cat->nodeValue)); array_push($cats, clean(trim($cat->nodeValue)));
} }
return $cats; return $cats;
@ -129,9 +129,9 @@ class FeedItem_Atom extends FeedItem_Common {
if ($link->getAttribute("rel") == "enclosure") { if ($link->getAttribute("rel") == "enclosure") {
$enc = new FeedEnclosure(); $enc = new FeedEnclosure();
$enc->type = $link->getAttribute("type"); $enc->type = clean($link->getAttribute("type"));
$enc->link = $link->getAttribute("href"); $enc->link = clean($link->getAttribute("href"));
$enc->length = $link->getAttribute("length"); $enc->length = clean($link->getAttribute("length"));
array_push($encs, $enc); array_push($encs, $enc);
} }
@ -147,12 +147,12 @@ class FeedItem_Atom extends FeedItem_Common {
$lang = $this->elem->getAttributeNS(self::NS_XML, "lang"); $lang = $this->elem->getAttributeNS(self::NS_XML, "lang");
if (!empty($lang)) { if (!empty($lang)) {
return $lang; return clean($lang);
} else { } else {
// Fall back to the language declared on the feed, if any. // Fall back to the language declared on the feed, if any.
foreach ($this->doc->childNodes as $child) { foreach ($this->doc->childNodes as $child) {
if (method_exists($child, "getAttributeNS")) { if (method_exists($child, "getAttributeNS")) {
return $child->getAttributeNS(self::NS_XML, "lang"); return clean($child->getAttributeNS(self::NS_XML, "lang"));
} }
} }
} }

View File

@ -31,20 +31,20 @@ abstract class FeedItem_Common extends FeedItem {
if ($author) { if ($author) {
$name = $author->getElementsByTagName("name")->item(0); $name = $author->getElementsByTagName("name")->item(0);
if ($name) return $name->nodeValue; if ($name) return clean($name->nodeValue);
$email = $author->getElementsByTagName("email")->item(0); $email = $author->getElementsByTagName("email")->item(0);
if ($email) return $email->nodeValue; if ($email) return clean($email->nodeValue);
if ($author->nodeValue) if ($author->nodeValue)
return $author->nodeValue; return clean($author->nodeValue);
} }
$author = $this->xpath->query("dc:creator", $this->elem)->item(0); $author = $this->xpath->query("dc:creator", $this->elem)->item(0);
if ($author) { if ($author) {
return $author->nodeValue; return clean($author->nodeValue);
} }
} }
@ -54,14 +54,14 @@ abstract class FeedItem_Common extends FeedItem {
$com_url = $this->xpath->query("comments", $this->elem)->item(0); $com_url = $this->xpath->query("comments", $this->elem)->item(0);
if ($com_url) if ($com_url)
return $com_url->nodeValue; return clean($com_url->nodeValue);
//Atom Threading Extension (RFC 4685) stuff. Could be used in RSS feeds, so it's in common. //Atom Threading Extension (RFC 4685) stuff. Could be used in RSS feeds, so it's in common.
//'text/html' for type is too restrictive? //'text/html' for type is too restrictive?
$com_url = $this->xpath->query("atom:link[@rel='replies' and contains(@type,'text/html')]/@href", $this->elem)->item(0); $com_url = $this->xpath->query("atom:link[@rel='replies' and contains(@type,'text/html')]/@href", $this->elem)->item(0);
if ($com_url) if ($com_url)
return $com_url->nodeValue; return clean($com_url->nodeValue);
} }
function get_comments_count() { function get_comments_count() {
@ -70,7 +70,7 @@ abstract class FeedItem_Common extends FeedItem {
$comments = $this->xpath->query($query, $this->elem)->item(0); $comments = $this->xpath->query($query, $this->elem)->item(0);
if ($comments) { if ($comments) {
return $comments->nodeValue; return clean($comments->nodeValue);
} }
} }
@ -83,19 +83,19 @@ abstract class FeedItem_Common extends FeedItem {
foreach ($enclosures as $enclosure) { foreach ($enclosures as $enclosure) {
$enc = new FeedEnclosure(); $enc = new FeedEnclosure();
$enc->type = $enclosure->getAttribute("type"); $enc->type = clean($enclosure->getAttribute("type"));
$enc->link = $enclosure->getAttribute("url"); $enc->link = clean($enclosure->getAttribute("url"));
$enc->length = $enclosure->getAttribute("length"); $enc->length = clean($enclosure->getAttribute("length"));
$enc->height = $enclosure->getAttribute("height"); $enc->height = clean($enclosure->getAttribute("height"));
$enc->width = $enclosure->getAttribute("width"); $enc->width = clean($enclosure->getAttribute("width"));
$medium = $enclosure->getAttribute("medium"); $medium = clean($enclosure->getAttribute("medium"));
if (!$enc->type && $medium) { if (!$enc->type && $medium) {
$enc->type = strtolower("$medium/generic"); $enc->type = strtolower("$medium/generic");
} }
$desc = $this->xpath->query("media:description", $enclosure)->item(0); $desc = $this->xpath->query("media:description", $enclosure)->item(0);
if ($desc) $enc->title = strip_tags($desc->nodeValue); if ($desc) $enc->title = clean($desc->nodeValue);
array_push($encs, $enc); array_push($encs, $enc);
} }
@ -108,23 +108,23 @@ abstract class FeedItem_Common extends FeedItem {
$content = $this->xpath->query("media:content", $enclosure)->item(0); $content = $this->xpath->query("media:content", $enclosure)->item(0);
if ($content) { if ($content) {
$enc->type = $content->getAttribute("type"); $enc->type = clean($content->getAttribute("type"));
$enc->link = $content->getAttribute("url"); $enc->link = clean($content->getAttribute("url"));
$enc->length = $content->getAttribute("length"); $enc->length = clean($content->getAttribute("length"));
$enc->height = $content->getAttribute("height"); $enc->height = clean($content->getAttribute("height"));
$enc->width = $content->getAttribute("width"); $enc->width = clean($content->getAttribute("width"));
$medium = $content->getAttribute("medium"); $medium = clean($content->getAttribute("medium"));
if (!$enc->type && $medium) { if (!$enc->type && $medium) {
$enc->type = strtolower("$medium/generic"); $enc->type = strtolower("$medium/generic");
} }
$desc = $this->xpath->query("media:description", $content)->item(0); $desc = $this->xpath->query("media:description", $content)->item(0);
if ($desc) { if ($desc) {
$enc->title = strip_tags($desc->nodeValue); $enc->title = clean($desc->nodeValue);
} else { } else {
$desc = $this->xpath->query("media:description", $enclosure)->item(0); $desc = $this->xpath->query("media:description", $enclosure)->item(0);
if ($desc) $enc->title = strip_tags($desc->nodeValue); if ($desc) $enc->title = clean($desc->nodeValue);
} }
array_push($encs, $enc); array_push($encs, $enc);
@ -137,9 +137,9 @@ abstract class FeedItem_Common extends FeedItem {
$enc = new FeedEnclosure(); $enc = new FeedEnclosure();
$enc->type = "image/generic"; $enc->type = "image/generic";
$enc->link = $enclosure->getAttribute("url"); $enc->link = clean($enclosure->getAttribute("url"));
$enc->height = $enclosure->getAttribute("height"); $enc->height = clean($enclosure->getAttribute("height"));
$enc->width = $enclosure->getAttribute("width"); $enc->width = clean($enclosure->getAttribute("width"));
array_push($encs, $enc); array_push($encs, $enc);
} }

View File

@ -4,9 +4,9 @@ class FeedItem_RSS extends FeedItem_Common {
$id = $this->elem->getElementsByTagName("guid")->item(0); $id = $this->elem->getElementsByTagName("guid")->item(0);
if ($id) { if ($id) {
return $id->nodeValue; return clean($id->nodeValue);
} else { } else {
return $this->get_link(); return clean($this->get_link());
} }
} }
@ -33,20 +33,20 @@ class FeedItem_RSS extends FeedItem_Common {
|| $link->getAttribute("rel") == "alternate" || $link->getAttribute("rel") == "alternate"
|| $link->getAttribute("rel") == "standout")) { || $link->getAttribute("rel") == "standout")) {
return trim($link->getAttribute("href")); return clean(trim($link->getAttribute("href")));
} }
} }
$link = $this->elem->getElementsByTagName("guid")->item(0); $link = $this->elem->getElementsByTagName("guid")->item(0);
if ($link && $link->hasAttributes() && $link->getAttribute("isPermaLink") == "true") { if ($link && $link->hasAttributes() && $link->getAttribute("isPermaLink") == "true") {
return trim($link->nodeValue); return clean(trim($link->nodeValue));
} }
$link = $this->elem->getElementsByTagName("link")->item(0); $link = $this->elem->getElementsByTagName("link")->item(0);
if ($link) { if ($link) {
return trim($link->nodeValue); return clean(trim($link->nodeValue));
} }
} }
@ -54,7 +54,7 @@ class FeedItem_RSS extends FeedItem_Common {
$title = $this->xpath->query("title", $this->elem)->item(0); $title = $this->xpath->query("title", $this->elem)->item(0);
if ($title) { if ($title) {
return trim($title->nodeValue); return clean(trim($title->nodeValue));
} }
// if the document has a default namespace then querying for // if the document has a default namespace then querying for
@ -62,7 +62,7 @@ class FeedItem_RSS extends FeedItem_Common {
$title = $this->elem->getElementsByTagName("title")->item(0); $title = $this->elem->getElementsByTagName("title")->item(0);
if ($title) { if ($title) {
return trim($title->nodeValue); return clean(trim($title->nodeValue));
} }
} }
@ -106,7 +106,7 @@ class FeedItem_RSS extends FeedItem_Common {
$categories = $this->xpath->query("dc:subject", $this->elem); $categories = $this->xpath->query("dc:subject", $this->elem);
foreach ($categories as $cat) { foreach ($categories as $cat) {
array_push($cats, trim($cat->nodeValue)); array_push($cats, clean(trim($cat->nodeValue)));
} }
return $cats; return $cats;
@ -120,11 +120,11 @@ class FeedItem_RSS extends FeedItem_Common {
foreach ($enclosures as $enclosure) { foreach ($enclosures as $enclosure) {
$enc = new FeedEnclosure(); $enc = new FeedEnclosure();
$enc->type = $enclosure->getAttribute("type"); $enc->type = clean($enclosure->getAttribute("type"));
$enc->link = $enclosure->getAttribute("url"); $enc->link = clean($enclosure->getAttribute("url"));
$enc->length = $enclosure->getAttribute("length"); $enc->length = clean($enclosure->getAttribute("length"));
$enc->height = $enclosure->getAttribute("height"); $enc->height = clean($enclosure->getAttribute("height"));
$enc->width = $enclosure->getAttribute("width"); $enc->width = clean($enclosure->getAttribute("width"));
array_push($encs, $enc); array_push($encs, $enc);
} }
@ -141,7 +141,7 @@ class FeedItem_RSS extends FeedItem_Common {
return ""; return "";
} }
return $languages[0]->textContent; return clean($languages[0]->textContent);
} }
} }

View File

@ -246,11 +246,11 @@ class FeedParser {
} }
function get_link() { function get_link() {
return $this->link; return clean($this->link);
} }
function get_title() { function get_title() {
return $this->title; return clean($this->title);
} }
function get_items() { function get_items() {
@ -266,7 +266,7 @@ class FeedParser {
foreach ($links as $link) { foreach ($links as $link) {
if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) { if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) {
array_push($rv, trim($link->getAttribute('href'))); array_push($rv, clean(trim($link->getAttribute('href'))));
} }
} }
break; break;
@ -275,7 +275,7 @@ class FeedParser {
foreach ($links as $link) { foreach ($links as $link) {
if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) { if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) {
array_push($rv, trim($link->getAttribute('href'))); array_push($rv, clean(trim($link->getAttribute('href'))));
} }
} }
break; break;