diff --git a/classes/handler/public.php b/classes/handler/public.php index f73427cbf..318cecd72 100755 --- a/classes/handler/public.php +++ b/classes/handler/public.php @@ -329,7 +329,7 @@ class Handler_Public extends Handler { if (!$og_image) { $tmpdoc = new DOMDocument(); - if (@$tmpdoc->loadHTML(mb_substr($content, 0, 131070))) { + if (@$tmpdoc->loadHTML('' . mb_substr($content, 0, 131070))) { $tmpxpath = new DOMXPath($tmpdoc); $imgs = $tmpxpath->query("//img"); diff --git a/classes/rssutils.php b/classes/rssutils.php index 82a6963e8..5aff2f8a3 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -1200,12 +1200,8 @@ class RSSUtils { static function cache_media($html, $site_url) { libxml_use_internal_errors(true); - $charset_hack = '
- - '; - $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $html); + $doc->loadHTML('' . $html); $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])'); diff --git a/include/functions.php b/include/functions.php index 5ebd4e0ff..d59e79126 100755 --- a/include/functions.php +++ b/include/functions.php @@ -562,7 +562,7 @@ libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($html); + $doc->loadHTML('' . $html); $xpath = new DOMXPath($doc); $base = $xpath->query('/html/head/base[@href]'); @@ -1518,14 +1518,10 @@ // plugins work on original source URLs used before caching function rewrite_cached_urls($str) { - $charset_hack = ' - - '; - $res = trim($str); if (!$res) return ''; $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $res); + $doc->loadHTML('' . $res); $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video/source[@src]|//audio/source[@src])'); @@ -1580,16 +1576,10 @@ $res = trim($str); if (!$res) return ''; - $charset_hack = ' - - '; - - $res = trim($res); if (!$res) return ''; - libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $res); + $doc->loadHTML('' . $res); $xpath = new DOMXPath($doc); $rewrite_base_url = $site_url ? $site_url : get_self_url_prefix(); @@ -2115,7 +2105,7 @@ libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($content); + $doc->loadHTML('' . $content); $xpath = new DOMXPath($doc); $entries = $xpath->query('/html/head/link[@rel="alternate" and '. '(contains(@type,"rss") or contains(@type,"atom"))]|/html/head/link[@rel="feed"]'); @@ -2136,7 +2126,7 @@ } function is_html($content) { - return preg_match("/ - - '; - - @$doc->loadHTML($charset_hack . $article["content"]); + @$doc->loadHTML('' . $article["content"]); if ($doc) { $xpath = new DOMXPath($doc); diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 117646c30..32c54a2c7 100755 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -172,14 +172,10 @@ class Af_Readability extends Plugin { if (!$tmpdoc->loadHTML($tmp)) return false; + // this is the worst hack yet :( if (strtolower($tmpdoc->encoding) != 'utf-8') { - $tmpxpath = new DOMXPath($tmpdoc); - - foreach ($tmpxpath->query("//meta") as $elem) { - $elem->parentNode->removeChild($elem); - } - - $tmp = $tmpdoc->saveHTML(); + $tmp = preg_replace("/