Feedparser/encoding change: More general regular expression, set encoding to UTF-8 explicitly

This commit is contained in:
wltb 2013-07-15 00:25:45 +02:00
parent 9e2d513dbd
commit f8160106af
1 changed files with 3 additions and 3 deletions

View File

@ -22,13 +22,13 @@ class FeedParser {
// libxml compiled without iconv? // libxml compiled without iconv?
if ($error && $error->code == 32) { if ($error && $error->code == 32) {
if (preg_match('/^(<\\?xml .*?)encoding="(.+?)"(.*?\\?>)/', $data, $matches) === 1) { if (preg_match('/^(<\?xml[\t\n\r ].*?encoding=["\'])(.+?)(["\'].*?\?>)/s', $data, $matches) === 1) {
libxml_clear_errors(); libxml_clear_errors();
$enc = $matches[2]; $enc = $matches[2];
$data = iconv($enc, 'UTF-8//IGNORE', $data); $data = iconv($enc, 'UTF-8//IGNORE', $data);
$data = preg_replace('/^<\\?xml .*?\\?>/', $matches[1] . $matches[3] , $data); $data = preg_replace('/^<\?xml[\t\n\r ].*?\?>/s', $matches[1] . "UTF-8" . $matches[3] , $data);
$this->doc = new DOMDocument(); $this->doc = new DOMDocument();
$this->doc->loadXML($data); $this->doc->loadXML($data);