try to force-convert feed data to utf8

This commit is contained in:
Andrew Dolgov 2013-05-17 20:09:43 +04:00
parent 175dd0794e
commit d1f3fa9791
2 changed files with 20 additions and 5 deletions

View File

@ -17,7 +17,22 @@ class FeedParser {
libxml_clear_errors(); libxml_clear_errors();
$this->doc = new DOMDocument(); $this->doc = new DOMDocument();
$this->doc->loadXML($data); $this->doc->loadXML($data);
$this->error = $this->format_error(libxml_get_last_error());
$error = libxml_get_last_error();
if ($error && $error->code == 9) {
libxml_clear_errors();
// we might want to try guessing input encoding here too
$data = iconv("UTF-8", "UTF-8//IGNORE", $data);
$this->doc = new DOMDocument();
$this->doc->loadXML($data);
$error = libxml_get_last_error();
}
$this->error = $this->format_error($error);
libxml_clear_errors(); libxml_clear_errors();
$this->items = array(); $this->items = array();

View File

@ -291,7 +291,7 @@
_debug("fetch done.", $debug_enabled); _debug("fetch done.", $debug_enabled);
if ($feed_data) { /* if ($feed_data) {
$error = verify_feed_xml($feed_data); $error = verify_feed_xml($feed_data);
if ($error) { if ($error) {
@ -307,7 +307,7 @@
if ($error) $feed_data = ''; if ($error) $feed_data = '';
} }
} }
} } */
} }
if (!$feed_data) { if (!$feed_data) {
@ -1349,14 +1349,14 @@
mb_strtolower(strip_tags($title), 'utf-8')); mb_strtolower(strip_tags($title), 'utf-8'));
} }
function verify_feed_xml($feed_data) { /* function verify_feed_xml($feed_data) {
libxml_use_internal_errors(true); libxml_use_internal_errors(true);
$doc = new DOMDocument(); $doc = new DOMDocument();
$doc->loadXML($feed_data); $doc->loadXML($feed_data);
$error = libxml_get_last_error(); $error = libxml_get_last_error();
libxml_clear_errors(); libxml_clear_errors();
return $error; return $error;
} } */
function housekeeping_common($debug) { function housekeeping_common($debug) {
expire_cached_files($debug); expire_cached_files($debug);