parser: add charset recoding hack for systems where libxml is build without support for iconv (handles libxml error 32)

This commit is contained in:
Andrew Dolgov 2013-07-11 15:40:09 +04:00
parent d272da9120
commit 4f00f55ca2
1 changed files with 18 additions and 0 deletions

View File

@ -20,6 +20,24 @@ class FeedParser {
$error = libxml_get_last_error();
// libxml compiled without iconv?
if ($error && $error->code == 32) {
if (preg_match('/^(<\\?xml .*?)encoding="(.+?)"(.*?\\?>)/', $data, $matches) === 1) {
libxml_clear_errors();
$enc = $matches[2];
$data = iconv($enc, 'UTF-8//IGNORE', $data);
$data = preg_replace('/^<\\?xml .*?\\?>/', $matches[1] . $matches[3] , $data);
$this->doc = new DOMDocument();
$this->doc->loadXML($data);
$error = libxml_get_last_error();
}
}
// some terrible invalid unicode entity?
if ($error && $error->code == 9) {
libxml_clear_errors();