From 39ede9862f1df94b24cbe476ec66eca99a1d1a2f Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 29 Apr 2013 16:59:36 +0400 Subject: [PATCH] experimental: decode numerical utf entities on import in entry title --- include/rssfuncs.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/rssfuncs.php b/include/rssfuncs.php index 0ecab6a25..a5d3898ce 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -553,6 +553,7 @@ _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled); $entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8'); + $entry_title = decode_numeric_entities($entry_title); $entry_link = rewrite_relative_url($site_url, $item->get_link()); @@ -1388,4 +1389,15 @@ _debug("Cleaned $rc cached tags."); } + + function utf8_entity_decode($entity){ + $convmap = array(0x0, 0x10000, 0, 0xfffff); + return mb_decode_numericentity($entity, $convmap, 'UTF-8'); + } + + function decode_numeric_entities($body) { + $body = preg_replace('/&#\d{2,5};/ue', "utf8_entity_decode('$0')", $body ); + $body = preg_replace('/&#x([a-fA-F0-7]{2,8});/ue', "utf8_entity_decode('&#'.hexdec('$1').';')", $body ); + return $body; + } ?>