add "extractfeedurls" rpc action that extracts the feed URLs from a HTML page
This commit is contained in:
parent
f2c6c0088b
commit
f0266f51ab
|
@ -2943,15 +2943,9 @@
|
||||||
function subscribe_to_feed($link, $url, $cat_id = 0,
|
function subscribe_to_feed($link, $url, $cat_id = 0,
|
||||||
$auth_login = '', $auth_pass = '') {
|
$auth_login = '', $auth_pass = '') {
|
||||||
|
|
||||||
$url = fix_url($url);
|
$url = fix_url($url);
|
||||||
$parts = parse_url($url);
|
|
||||||
|
|
||||||
if (!validate_feed_url($url)) return 2;
|
if (!validate_feed_url($url)) return 2;
|
||||||
|
|
||||||
if ($parts['scheme'] == 'feed') $parts['scheme'] = 'http';
|
|
||||||
|
|
||||||
$url = make_url_from_parts($parts);
|
|
||||||
|
|
||||||
if ($cat_id == "0" || !$cat_id) {
|
if ($cat_id == "0" || !$cat_id) {
|
||||||
$cat_qpart = "NULL";
|
$cat_qpart = "NULL";
|
||||||
} else {
|
} else {
|
||||||
|
@ -6674,6 +6668,8 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fixes incomplete URLs by prepending "http://".
|
* Fixes incomplete URLs by prepending "http://".
|
||||||
|
* Also replaces feed:// with http://, and
|
||||||
|
* prepends a trailing slash if the url is a domain name only.
|
||||||
*
|
*
|
||||||
* @param string $url Possibly incomplete URL
|
* @param string $url Possibly incomplete URL
|
||||||
*
|
*
|
||||||
|
@ -6682,6 +6678,14 @@
|
||||||
function fix_url($url) {
|
function fix_url($url) {
|
||||||
if (strpos($url, '://') === false) {
|
if (strpos($url, '://') === false) {
|
||||||
$url = 'http://' . $url;
|
$url = 'http://' . $url;
|
||||||
|
} else if (substr($url, 0, 5) == 'feed:') {
|
||||||
|
$url = 'http:' . substr($url, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
//prepend slash if the URL has no slash in it
|
||||||
|
// "http://www.example" -> "http://www.example/"
|
||||||
|
if (strpos($url, '/', 7) === false) {
|
||||||
|
$url .= '/';
|
||||||
}
|
}
|
||||||
return $url;
|
return $url;
|
||||||
}
|
}
|
||||||
|
@ -6973,4 +6977,39 @@
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts RSS/Atom feed URLs from the given HTML URL.
|
||||||
|
*
|
||||||
|
* @param string $url HTML page URL
|
||||||
|
*
|
||||||
|
* @return array Array of feeds. Key is the full URL, value the title
|
||||||
|
*/
|
||||||
|
function get_feeds_from_html($url)
|
||||||
|
{
|
||||||
|
$url = fix_url($url);
|
||||||
|
$baseUrl = substr($url, 0, strrpos($url, '/') + 1);
|
||||||
|
|
||||||
|
$doc = new DOMDocument();
|
||||||
|
$doc->loadHTMLFile($url);
|
||||||
|
$xpath = new DOMXPath($doc);
|
||||||
|
$entries = $xpath->query('/html/head/link[@rel="alternate"]');
|
||||||
|
$feedUrls = array();
|
||||||
|
foreach ($entries as $entry) {
|
||||||
|
if ($entry->hasAttribute('href')) {
|
||||||
|
$title = $entry->getAttribute('title');
|
||||||
|
if ($title == '') {
|
||||||
|
$title = $entry->getAttribute('type');
|
||||||
|
}
|
||||||
|
$feedUrl = $entry->getAttribute('href');
|
||||||
|
if (strpos($feedUrl, '://') === false) {
|
||||||
|
//no protocol -> relative URL
|
||||||
|
$feedUrl = $baseUrl . $feedUrl;
|
||||||
|
}
|
||||||
|
$feedUrls[$feedUrl] = $title;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $feedUrls;
|
||||||
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
|
|
@ -123,6 +123,16 @@
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($subop == "extractfeedurls") {
|
||||||
|
print "<rpc-reply>";
|
||||||
|
|
||||||
|
$urls = get_feeds_from_html($_REQUEST['url']);
|
||||||
|
print "<urls><![CDATA[" . json_encode($urls) . "]]></urls>";
|
||||||
|
|
||||||
|
print "</rpc-reply>";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if ($subop == "togglepref") {
|
if ($subop == "togglepref") {
|
||||||
print "<rpc-reply>";
|
print "<rpc-reply>";
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue