feeditem_atom: support xml:base for enclosures and entry content
UrlHelper::rewrite_relative: use base URL path if relative url path is not absolute (experimental)
This commit is contained in:
parent
d09a64d6f9
commit
dff479af64
|
@ -60,43 +60,76 @@ class FeedItem_Atom extends FeedItem_Common {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** $base is optional (returns $content if $base is null), $content is an HTML string */
|
||||||
|
private function rewrite_content_to_base($base, $content) {
|
||||||
|
|
||||||
|
if (!empty($base) && !empty($content)) {
|
||||||
|
|
||||||
|
$tmpdoc = new DOMDocument();
|
||||||
|
if (@$tmpdoc->loadHTML('<?xml encoding="UTF-8">' . $content)) {
|
||||||
|
$tmpxpath = new DOMXPath($tmpdoc);
|
||||||
|
|
||||||
|
$elems = $tmpxpath->query("(//*[@href]|//*[@src])");
|
||||||
|
|
||||||
|
foreach ($elems as $elem) {
|
||||||
|
if ($elem->hasAttribute("href")) {
|
||||||
|
$elem->setAttribute("href",
|
||||||
|
UrlHelper::rewrite_relative($base, $elem->getAttribute("href")));
|
||||||
|
} else if ($elem->hasAttribute("src")) {
|
||||||
|
$elem->setAttribute("src",
|
||||||
|
UrlHelper::rewrite_relative($base, $elem->getAttribute("src")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $tmpdoc->saveXML();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
|
||||||
function get_content() {
|
function get_content() {
|
||||||
$content = $this->elem->getElementsByTagName("content")->item(0);
|
$content = $this->elem->getElementsByTagName("content")->item(0);
|
||||||
|
|
||||||
if ($content) {
|
if ($content) {
|
||||||
|
$base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $content);
|
||||||
|
|
||||||
if ($content->hasAttribute('type')) {
|
if ($content->hasAttribute('type')) {
|
||||||
if ($content->getAttribute('type') == 'xhtml') {
|
if ($content->getAttribute('type') == 'xhtml') {
|
||||||
for ($i = 0; $i < $content->childNodes->length; $i++) {
|
for ($i = 0; $i < $content->childNodes->length; $i++) {
|
||||||
$child = $content->childNodes->item($i);
|
$child = $content->childNodes->item($i);
|
||||||
|
|
||||||
if ($child->hasChildNodes()) {
|
if ($child->hasChildNodes()) {
|
||||||
return $this->doc->saveHTML($child);
|
return $this->rewrite_content_to_base($base, $this->doc->saveHTML($child));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this->subtree_or_text($content);
|
return $this->rewrite_content_to_base($base, $this->subtree_or_text($content));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: duplicate code should be merged with get_content()
|
||||||
function get_description() {
|
function get_description() {
|
||||||
$content = $this->elem->getElementsByTagName("summary")->item(0);
|
$content = $this->elem->getElementsByTagName("summary")->item(0);
|
||||||
|
|
||||||
if ($content) {
|
if ($content) {
|
||||||
|
$base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $content);
|
||||||
|
|
||||||
if ($content->hasAttribute('type')) {
|
if ($content->hasAttribute('type')) {
|
||||||
if ($content->getAttribute('type') == 'xhtml') {
|
if ($content->getAttribute('type') == 'xhtml') {
|
||||||
for ($i = 0; $i < $content->childNodes->length; $i++) {
|
for ($i = 0; $i < $content->childNodes->length; $i++) {
|
||||||
$child = $content->childNodes->item($i);
|
$child = $content->childNodes->item($i);
|
||||||
|
|
||||||
if ($child->hasChildNodes()) {
|
if ($child->hasChildNodes()) {
|
||||||
return $this->doc->saveHTML($child);
|
return $this->rewrite_content_to_base($base, $this->doc->saveHTML($child));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this->subtree_or_text($content);
|
return $this->rewrite_content_to_base($base, $this->subtree_or_text($content));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -122,16 +155,22 @@ class FeedItem_Atom extends FeedItem_Common {
|
||||||
function get_enclosures() {
|
function get_enclosures() {
|
||||||
$links = $this->elem->getElementsByTagName("link");
|
$links = $this->elem->getElementsByTagName("link");
|
||||||
|
|
||||||
$encs = array();
|
$encs = [];
|
||||||
|
|
||||||
foreach ($links as $link) {
|
foreach ($links as $link) {
|
||||||
if ($link && $link->hasAttribute("href") && $link->hasAttribute("rel")) {
|
if ($link && $link->hasAttribute("href") && $link->hasAttribute("rel")) {
|
||||||
|
$base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link);
|
||||||
|
|
||||||
if ($link->getAttribute("rel") == "enclosure") {
|
if ($link->getAttribute("rel") == "enclosure") {
|
||||||
$enc = new FeedEnclosure();
|
$enc = new FeedEnclosure();
|
||||||
|
|
||||||
$enc->type = clean($link->getAttribute("type"));
|
$enc->type = clean($link->getAttribute("type"));
|
||||||
$enc->link = clean($link->getAttribute("href"));
|
|
||||||
$enc->length = clean($link->getAttribute("length"));
|
$enc->length = clean($link->getAttribute("length"));
|
||||||
|
$enc->link = clean($link->getAttribute("href"));
|
||||||
|
|
||||||
|
if (!empty($base)) {
|
||||||
|
$enc->link = UrlHelper::rewrite_relative($base, $enc->link);
|
||||||
|
}
|
||||||
|
|
||||||
array_push($encs, $enc);
|
array_push($encs, $enc);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,14 +20,14 @@ class UrlHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a (possibly) relative URL to a absolute one.
|
* Converts a (possibly) relative URL to a absolute one, using provided base URL.
|
||||||
*
|
*
|
||||||
* @param string $url Base URL (i.e. from where the document is)
|
* @param string $base_url Base URL (i.e. from where the document is)
|
||||||
* @param string $rel_url Possibly relative URL in the document
|
* @param string $rel_url Possibly relative URL in the document
|
||||||
*
|
*
|
||||||
* @return string Absolute URL
|
* @return string Absolute URL
|
||||||
*/
|
*/
|
||||||
public static function rewrite_relative($url, $rel_url) {
|
public static function rewrite_relative($base_url, $rel_url) {
|
||||||
|
|
||||||
$rel_parts = parse_url($rel_url);
|
$rel_parts = parse_url($rel_url);
|
||||||
|
|
||||||
|
@ -40,14 +40,19 @@ class UrlHelper {
|
||||||
# allow magnet links
|
# allow magnet links
|
||||||
return $rel_url;
|
return $rel_url;
|
||||||
} else {
|
} else {
|
||||||
$parts = parse_url($url);
|
$base_parts = parse_url($base_url);
|
||||||
|
|
||||||
$rel_parts['host'] = $parts['host'];
|
$rel_parts['host'] = $base_parts['host'];
|
||||||
$rel_parts['scheme'] = $parts['scheme'];
|
$rel_parts['scheme'] = $base_parts['scheme'];
|
||||||
|
|
||||||
if (isset($rel_parts['path'])) {
|
if (isset($rel_parts['path'])) {
|
||||||
if (strpos($rel_parts['path'], '/') !== 0)
|
|
||||||
$rel_parts['path'] = '/' . $rel_parts['path'];
|
// experimental: if relative url path is not absolute (i.e. starting with /) concatenate it using base url path
|
||||||
|
// (i'm not sure if it's a good idea)
|
||||||
|
|
||||||
|
if (strpos($rel_parts['path'], '/') !== 0) {
|
||||||
|
$rel_parts['path'] = with_trailing_slash($base_parts['path']) . $rel_parts['path'];
|
||||||
|
}
|
||||||
|
|
||||||
$rel_parts['path'] = str_replace("/./", "/", $rel_parts['path']);
|
$rel_parts['path'] = str_replace("/./", "/", $rel_parts['path']);
|
||||||
$rel_parts['path'] = str_replace("//", "/", $rel_parts['path']);
|
$rel_parts['path'] = str_replace("//", "/", $rel_parts['path']);
|
||||||
|
|
|
@ -193,8 +193,8 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
/** function is @deprecated */
|
/** function is @deprecated */
|
||||||
function rewrite_relative_url($url, $rel_url) {
|
function rewrite_relative_url($base_url, $rel_url) {
|
||||||
return UrlHelper::rewrite_relative($url, $rel_url);
|
return UrlHelper::rewrite_relative($base_url, $rel_url);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** function is @deprecated */
|
/** function is @deprecated */
|
||||||
|
|
Loading…
Reference in New Issue