subscribe: verify XML before adding to the database; fetch: try to work around entity problems if initial parsing fails

This commit is contained in:
Andrew Dolgov 2013-04-19 13:17:28 +04:00
parent 1367bc3f5e
commit ebec81a6fb
3 changed files with 52 additions and 34 deletions

View File

@ -1558,6 +1558,7 @@
* Here you should call extractfeedurls in rpc-backend * Here you should call extractfeedurls in rpc-backend
* to get all possible feeds. * to get all possible feeds.
* 5 - Couldn't download the URL content. * 5 - Couldn't download the URL content.
* 6 - Content is an invalid XML.
*/ */
function subscribe_to_feed($url, $cat_id = 0, function subscribe_to_feed($url, $cat_id = 0,
$auth_login = '', $auth_pass = '') { $auth_login = '', $auth_pass = '') {
@ -1588,6 +1589,18 @@
$url = key($feedUrls); $url = key($feedUrls);
} }
libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadXML(html_entity_decode($contents));
$error = libxml_get_last_error();
libxml_clear_errors();
if ($error) {
$error_message = format_libxml_error($error);
return array("code" => 6, "message" => $error_message);
}
if ($cat_id == "0" || !$cat_id) { if ($cat_id == "0" || !$cat_id) {
$cat_qpart = "NULL"; $cat_qpart = "NULL";
} else { } else {
@ -4203,4 +4216,10 @@
return LABEL_BASE_INDEX - 1 + abs($feed); return LABEL_BASE_INDEX - 1 + abs($feed);
} }
function format_libxml_error($error) {
return T_sprintf("LibXML error %s at line %d (column %d): %s",
$error->code, $error->line, $error->column,
$error->message);
}
?> ?>

View File

@ -316,6 +316,25 @@
_debug("update_rss_feed: fetch done."); _debug("update_rss_feed: fetch done.");
} }
$error = verify_feed_xml($feed_data);
if ($error) {
if ($debug_enabled) {
_debug("update_rss_feed: error verifying XML, code: " . $error->code);
}
if ($error->code == 26) {
if ($debug_enabled) {
_debug("update_rss_feed: got error 26, trying to decode entities...");
}
$feed_data = html_entity_decode($feed_data, ENT_COMPAT, 'UTF-8');
$error = verify_feed_xml($feed_data);
if ($error) $feed_data = '';
}
}
} }
if (!$feed_data) { if (!$feed_data) {
@ -559,7 +578,7 @@
_debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]"); _debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]");
} }
$entry_title = html_entity_decode($item->get_title()); $entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8');
$entry_link = rewrite_relative_url($site_url, $item->get_link()); $entry_link = rewrite_relative_url($site_url, $item->get_link());
@ -1421,5 +1440,13 @@
mb_strtolower(strip_tags($title), 'utf-8')); mb_strtolower(strip_tags($title), 'utf-8'));
} }
function verify_feed_xml($feed_data) {
libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadXML($feed_data);
$error = libxml_get_last_error();
libxml_clear_errors();
return $error;
}
?> ?>

View File

@ -816,39 +816,6 @@ function quickAddFeed() {
alert(__("Specified URL doesn't seem to contain any feeds.")); alert(__("Specified URL doesn't seem to contain any feeds."));
break; break;
case 4: case 4:
/* notify_progress("Searching for feed urls...", true);
new Ajax.Request("backend.php", {
parameters: 'op=rpc&method=extractfeedurls&url=' + param_escape(feed_url),
onComplete: function(transport, dialog, feed_url) {
notify('');
var reply = JSON.parse(transport.responseText);
var feeds = reply['urls'];
console.log(transport.responseText);
var select = dijit.byId("feedDlg_feedContainerSelect");
while (select.getOptions().length > 0)
select.removeOption(0);
var count = 0;
for (var feedUrl in feeds) {
select.addOption({value: feedUrl, label: feeds[feedUrl]});
count++;
}
// if (count > 5) count = 5;
// select.size = count;
Effect.Appear('feedDlg_feedsContainer', {duration : 0.5});
}
});
break; */
feeds = rc['feeds']; feeds = rc['feeds'];
var select = dijit.byId("feedDlg_feedContainerSelect"); var select = dijit.byId("feedDlg_feedContainerSelect");
@ -871,6 +838,11 @@ function quickAddFeed() {
alert(__("Couldn't download the specified URL: %s"). alert(__("Couldn't download the specified URL: %s").
replace("%s", rc['message'])); replace("%s", rc['message']));
break; break;
case 6:
alert(__("XML validation failed: %s").
replace("%s", rc['message']));
break;
break;
case 0: case 0:
alert(__("You are already subscribed to this feed.")); alert(__("You are already subscribed to this feed."));
break; break;