2013-05-01 17:06:48 +00:00
|
|
|
<?php
|
|
|
|
abstract class FeedItem_Common extends FeedItem {
|
2021-11-15 02:40:45 +00:00
|
|
|
/** @var DOMElement */
|
2013-05-01 17:06:48 +00:00
|
|
|
protected $elem;
|
2021-11-15 02:40:45 +00:00
|
|
|
|
|
|
|
/** @var DOMDocument */
|
2013-05-01 17:06:48 +00:00
|
|
|
protected $doc;
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
/** @var DOMXPath */
|
|
|
|
protected $xpath;
|
|
|
|
|
|
|
|
function __construct(DOMElement $elem, DOMDocument $doc, DOMXPath $xpath) {
|
2013-05-01 17:06:48 +00:00
|
|
|
$this->elem = $elem;
|
|
|
|
$this->xpath = $xpath;
|
|
|
|
$this->doc = $doc;
|
2013-06-25 10:43:59 +00:00
|
|
|
|
2013-07-10 08:50:42 +00:00
|
|
|
try {
|
|
|
|
$source = $elem->getElementsByTagName("source")->item(0);
|
|
|
|
|
|
|
|
// we don't need <source> element
|
|
|
|
if ($source)
|
|
|
|
$elem->removeChild($source);
|
|
|
|
} catch (DOMException $e) {
|
|
|
|
//
|
|
|
|
}
|
2013-05-01 17:06:48 +00:00
|
|
|
}
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
function get_element(): DOMElement {
|
2017-05-05 06:21:10 +00:00
|
|
|
return $this->elem;
|
|
|
|
}
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
function get_author(): string {
|
|
|
|
/** @var DOMElement|null */
|
2013-05-01 17:06:48 +00:00
|
|
|
$author = $this->elem->getElementsByTagName("author")->item(0);
|
|
|
|
|
|
|
|
if ($author) {
|
|
|
|
$name = $author->getElementsByTagName("name")->item(0);
|
|
|
|
|
2018-12-26 07:16:11 +00:00
|
|
|
if ($name) return clean($name->nodeValue);
|
2013-05-01 17:06:48 +00:00
|
|
|
|
|
|
|
$email = $author->getElementsByTagName("email")->item(0);
|
|
|
|
|
2018-12-26 07:16:11 +00:00
|
|
|
if ($email) return clean($email->nodeValue);
|
2013-05-02 06:36:05 +00:00
|
|
|
|
|
|
|
if ($author->nodeValue)
|
2018-12-26 07:16:11 +00:00
|
|
|
return clean($author->nodeValue);
|
2013-05-01 17:06:48 +00:00
|
|
|
}
|
|
|
|
|
2019-08-02 03:22:42 +00:00
|
|
|
$author_elems = $this->xpath->query("dc:creator", $this->elem);
|
|
|
|
$authors = [];
|
2013-05-01 17:06:48 +00:00
|
|
|
|
2019-08-02 03:22:42 +00:00
|
|
|
foreach ($author_elems as $author) {
|
|
|
|
array_push($authors, clean($author->nodeValue));
|
2013-05-01 17:06:48 +00:00
|
|
|
}
|
2019-08-02 03:22:42 +00:00
|
|
|
|
|
|
|
return implode(", ", $authors);
|
2013-05-01 17:06:48 +00:00
|
|
|
}
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
function get_comments_url(): string {
|
2013-12-18 16:05:43 +00:00
|
|
|
//RSS only. Use a query here to avoid namespace clashes (e.g. with slash).
|
|
|
|
//might give a wrong result if a default namespace was declared (possible with XPath 2.0)
|
|
|
|
$com_url = $this->xpath->query("comments", $this->elem)->item(0);
|
2013-05-01 17:06:48 +00:00
|
|
|
|
2018-12-26 07:16:11 +00:00
|
|
|
if ($com_url)
|
|
|
|
return clean($com_url->nodeValue);
|
2013-12-18 16:05:43 +00:00
|
|
|
|
|
|
|
//Atom Threading Extension (RFC 4685) stuff. Could be used in RSS feeds, so it's in common.
|
|
|
|
//'text/html' for type is too restrictive?
|
|
|
|
$com_url = $this->xpath->query("atom:link[@rel='replies' and contains(@type,'text/html')]/@href", $this->elem)->item(0);
|
|
|
|
|
2018-12-26 07:16:11 +00:00
|
|
|
if ($com_url)
|
|
|
|
return clean($com_url->nodeValue);
|
2021-11-15 02:40:45 +00:00
|
|
|
|
|
|
|
return '';
|
2013-05-01 17:06:48 +00:00
|
|
|
}
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
function get_comments_count(): int {
|
2013-12-18 16:05:43 +00:00
|
|
|
//also query for ATE stuff here
|
|
|
|
$query = "slash:comments|thread:total|atom:link[@rel='replies']/@thread:count";
|
|
|
|
$comments = $this->xpath->query($query, $this->elem)->item(0);
|
2013-05-01 17:06:48 +00:00
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
if ($comments && is_numeric($comments->nodeValue)) {
|
|
|
|
return (int) clean($comments->nodeValue);
|
2013-05-01 17:06:48 +00:00
|
|
|
}
|
2021-11-15 02:40:45 +00:00
|
|
|
|
|
|
|
return 0;
|
2013-05-01 17:06:48 +00:00
|
|
|
}
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
/**
|
|
|
|
* this is common for both Atom and RSS types and deals with various 'media:' elements
|
|
|
|
*
|
|
|
|
* @return array<int, FeedEnclosure>
|
|
|
|
*/
|
|
|
|
function get_enclosures(): array {
|
2018-08-21 04:01:26 +00:00
|
|
|
$encs = [];
|
|
|
|
|
|
|
|
$enclosures = $this->xpath->query("media:content", $this->elem);
|
|
|
|
|
|
|
|
foreach ($enclosures as $enclosure) {
|
|
|
|
$enc = new FeedEnclosure();
|
|
|
|
|
2018-12-26 07:16:11 +00:00
|
|
|
$enc->type = clean($enclosure->getAttribute("type"));
|
|
|
|
$enc->link = clean($enclosure->getAttribute("url"));
|
|
|
|
$enc->length = clean($enclosure->getAttribute("length"));
|
|
|
|
$enc->height = clean($enclosure->getAttribute("height"));
|
|
|
|
$enc->width = clean($enclosure->getAttribute("width"));
|
2018-08-21 04:01:26 +00:00
|
|
|
|
2018-12-26 07:16:11 +00:00
|
|
|
$medium = clean($enclosure->getAttribute("medium"));
|
2018-08-21 04:01:26 +00:00
|
|
|
if (!$enc->type && $medium) {
|
|
|
|
$enc->type = strtolower("$medium/generic");
|
|
|
|
}
|
|
|
|
|
|
|
|
$desc = $this->xpath->query("media:description", $enclosure)->item(0);
|
2018-12-26 07:16:11 +00:00
|
|
|
if ($desc) $enc->title = clean($desc->nodeValue);
|
2018-08-21 04:01:26 +00:00
|
|
|
|
|
|
|
array_push($encs, $enc);
|
|
|
|
}
|
|
|
|
|
|
|
|
$enclosures = $this->xpath->query("media:group", $this->elem);
|
|
|
|
|
|
|
|
foreach ($enclosures as $enclosure) {
|
|
|
|
$enc = new FeedEnclosure();
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
/** @var DOMElement|null */
|
2018-08-21 04:01:26 +00:00
|
|
|
$content = $this->xpath->query("media:content", $enclosure)->item(0);
|
|
|
|
|
|
|
|
if ($content) {
|
2018-12-26 07:16:11 +00:00
|
|
|
$enc->type = clean($content->getAttribute("type"));
|
|
|
|
$enc->link = clean($content->getAttribute("url"));
|
|
|
|
$enc->length = clean($content->getAttribute("length"));
|
|
|
|
$enc->height = clean($content->getAttribute("height"));
|
|
|
|
$enc->width = clean($content->getAttribute("width"));
|
2018-08-21 04:01:26 +00:00
|
|
|
|
2018-12-26 07:16:11 +00:00
|
|
|
$medium = clean($content->getAttribute("medium"));
|
2018-08-21 04:01:26 +00:00
|
|
|
if (!$enc->type && $medium) {
|
|
|
|
$enc->type = strtolower("$medium/generic");
|
|
|
|
}
|
|
|
|
|
|
|
|
$desc = $this->xpath->query("media:description", $content)->item(0);
|
|
|
|
if ($desc) {
|
2018-12-26 07:16:11 +00:00
|
|
|
$enc->title = clean($desc->nodeValue);
|
2018-08-21 04:01:26 +00:00
|
|
|
} else {
|
|
|
|
$desc = $this->xpath->query("media:description", $enclosure)->item(0);
|
2018-12-26 07:16:11 +00:00
|
|
|
if ($desc) $enc->title = clean($desc->nodeValue);
|
2018-08-21 04:01:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
array_push($encs, $enc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$enclosures = $this->xpath->query("media:thumbnail", $this->elem);
|
|
|
|
|
|
|
|
foreach ($enclosures as $enclosure) {
|
|
|
|
$enc = new FeedEnclosure();
|
|
|
|
|
|
|
|
$enc->type = "image/generic";
|
2018-12-26 07:16:11 +00:00
|
|
|
$enc->link = clean($enclosure->getAttribute("url"));
|
|
|
|
$enc->height = clean($enclosure->getAttribute("height"));
|
|
|
|
$enc->width = clean($enclosure->getAttribute("width"));
|
2018-08-21 04:01:26 +00:00
|
|
|
|
|
|
|
array_push($encs, $enc);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $encs;
|
|
|
|
}
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
function count_children(DOMElement $node): int {
|
2016-01-22 22:48:32 +00:00
|
|
|
return $node->getElementsByTagName("*")->length;
|
|
|
|
}
|
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
/**
|
|
|
|
* @return false|string false on failure, otherwise string contents
|
|
|
|
*/
|
|
|
|
function subtree_or_text(DOMElement $node) {
|
2016-01-22 22:48:32 +00:00
|
|
|
if ($this->count_children($node) == 0) {
|
|
|
|
return $node->nodeValue;
|
|
|
|
} else {
|
|
|
|
return $node->c14n();
|
|
|
|
}
|
|
|
|
}
|
2013-05-01 17:06:48 +00:00
|
|
|
|
2021-11-15 02:40:45 +00:00
|
|
|
/**
|
|
|
|
* @param array<int, string> $cats
|
|
|
|
*
|
|
|
|
* @return array<int, string>
|
|
|
|
*/
|
|
|
|
static function normalize_categories(array $cats): array {
|
2019-11-20 15:56:34 +00:00
|
|
|
|
|
|
|
$tmp = [];
|
|
|
|
|
|
|
|
foreach ($cats as $rawcat) {
|
|
|
|
$tmp = array_merge($tmp, explode(",", $rawcat));
|
|
|
|
}
|
|
|
|
|
|
|
|
$tmp = array_map(function($srccat) {
|
|
|
|
$cat = clean(trim(mb_strtolower($srccat)));
|
|
|
|
|
|
|
|
// we don't support numeric tags
|
|
|
|
if (is_numeric($cat))
|
|
|
|
$cat = 't:' . $cat;
|
|
|
|
|
|
|
|
$cat = preg_replace('/[,\'\"]/', "", $cat);
|
|
|
|
|
2021-02-22 18:47:48 +00:00
|
|
|
if (Config::get(Config::DB_TYPE) == "mysql") {
|
2019-11-20 15:56:34 +00:00
|
|
|
$cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mb_strlen($cat) > 250)
|
|
|
|
$cat = mb_substr($cat, 0, 250);
|
|
|
|
|
|
|
|
return $cat;
|
|
|
|
}, $tmp);
|
|
|
|
|
2020-12-07 20:35:37 +00:00
|
|
|
// remove empty values
|
|
|
|
$tmp = array_filter($tmp, 'strlen');
|
|
|
|
|
2019-11-20 15:56:34 +00:00
|
|
|
asort($tmp);
|
|
|
|
|
|
|
|
return array_unique($tmp);
|
|
|
|
}
|
2018-08-21 04:01:26 +00:00
|
|
|
}
|