diff --git a/backend.php b/backend.php index cf80e0e37..9ed250cf5 100644 --- a/backend.php +++ b/backend.php @@ -210,6 +210,13 @@ array_push($articles, format_article($link, $id, false)); } else if ($mode == "zoom") { array_push($articles, format_article($link, $id, false, true, true)); + } else if ($mode == "raw") { + if ($_REQUEST['html']) header("Content-Type: text/html"); + + $article = format_article($link, $id, false); + print $article['id'] . "\n\n"; + print $article['content']; + return; } else { catchupArticleById($link, $id, 0); } diff --git a/functions.php b/functions.php index 8a44e8bee..f8ea2503e 100644 --- a/functions.php +++ b/functions.php @@ -114,9 +114,13 @@ $config = HTMLPurifier_Config::createDefault(); - $allowed = "p,a[href],i,em,b,strong,code,pre,blockquote,br,img[src|alt|title],ul,ol,li,h1,h2,h3,h4,s"; + $allowed = "p,a[href],i,em,b,strong,code,pre,blockquote,br,img[src|alt|title],ul,ol,li,h1,h2,h3,h4,s,object[classid|type|id|name|width|height|codebase],param[name|value]"; + $config->set('HTML.SafeObject', true); $config->set('HTML', 'Allowed', $allowed); + $config->set('Output.FlashCompat', true); + $config->set('Attr.EnableID', true); + $purifier = new HTMLPurifier($config); /** diff --git a/lib/htmlpurifier/CREDITS b/lib/htmlpurifier/CREDITS old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/LICENSE b/lib/htmlpurifier/LICENSE old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier.auto.php b/lib/htmlpurifier/library/HTMLPurifier.auto.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier.autoload.php b/lib/htmlpurifier/library/HTMLPurifier.autoload.php old mode 100755 new mode 100644 index 8d4017640..62da5b60d --- a/lib/htmlpurifier/library/HTMLPurifier.autoload.php +++ b/lib/htmlpurifier/library/HTMLPurifier.autoload.php @@ -3,6 +3,7 @@ /** * @file * Convenience file that registers autoload handler for HTML Purifier. + * It also does some sanity checks. */ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) { @@ -18,4 +19,8 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un } } +if (ini_get('zend.ze1_compatibility_mode')) { + trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR); +} + // vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier.func.php b/lib/htmlpurifier/library/HTMLPurifier.func.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier.includes.php b/lib/htmlpurifier/library/HTMLPurifier.includes.php old mode 100755 new mode 100644 index 944f0893b..b9baf8f0a --- a/lib/htmlpurifier/library/HTMLPurifier.includes.php +++ b/lib/htmlpurifier/library/HTMLPurifier.includes.php @@ -7,7 +7,7 @@ * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * FILE, changes will be overwritten the next time the script is run. * - * @version 3.3.0 + * @version 4.3.0 * * @warning * You must *not* include any other HTML Purifier files before this file, @@ -98,6 +98,8 @@ require 'HTMLPurifier/AttrDef/CSS/Percentage.php'; require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php'; require 'HTMLPurifier/AttrDef/CSS/URI.php'; require 'HTMLPurifier/AttrDef/HTML/Bool.php'; +require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php'; +require 'HTMLPurifier/AttrDef/HTML/Class.php'; require 'HTMLPurifier/AttrDef/HTML/Color.php'; require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php'; require 'HTMLPurifier/AttrDef/HTML/ID.php'; @@ -105,7 +107,6 @@ require 'HTMLPurifier/AttrDef/HTML/Pixels.php'; require 'HTMLPurifier/AttrDef/HTML/Length.php'; require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php'; require 'HTMLPurifier/AttrDef/HTML/MultiLength.php'; -require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php'; require 'HTMLPurifier/AttrDef/URI/Email.php'; require 'HTMLPurifier/AttrDef/URI/Host.php'; require 'HTMLPurifier/AttrDef/URI/IPv4.php'; @@ -123,6 +124,8 @@ require 'HTMLPurifier/AttrTransform/Input.php'; require 'HTMLPurifier/AttrTransform/Lang.php'; require 'HTMLPurifier/AttrTransform/Length.php'; require 'HTMLPurifier/AttrTransform/Name.php'; +require 'HTMLPurifier/AttrTransform/NameSync.php'; +require 'HTMLPurifier/AttrTransform/Nofollow.php'; require 'HTMLPurifier/AttrTransform/SafeEmbed.php'; require 'HTMLPurifier/AttrTransform/SafeObject.php'; require 'HTMLPurifier/AttrTransform/SafeParam.php'; @@ -149,6 +152,7 @@ require 'HTMLPurifier/HTMLModule/Image.php'; require 'HTMLPurifier/HTMLModule/Legacy.php'; require 'HTMLPurifier/HTMLModule/List.php'; require 'HTMLPurifier/HTMLModule/Name.php'; +require 'HTMLPurifier/HTMLModule/Nofollow.php'; require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; require 'HTMLPurifier/HTMLModule/Object.php'; require 'HTMLPurifier/HTMLModule/Presentation.php'; @@ -174,6 +178,7 @@ require 'HTMLPurifier/Injector/DisplayLinkURI.php'; require 'HTMLPurifier/Injector/Linkify.php'; require 'HTMLPurifier/Injector/PurifierLinkify.php'; require 'HTMLPurifier/Injector/RemoveEmpty.php'; +require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php'; require 'HTMLPurifier/Injector/SafeObject.php'; require 'HTMLPurifier/Lexer/DOMLex.php'; require 'HTMLPurifier/Lexer/DirectLex.php'; @@ -193,9 +198,12 @@ require 'HTMLPurifier/Token/Start.php'; require 'HTMLPurifier/Token/Text.php'; require 'HTMLPurifier/URIFilter/DisableExternal.php'; require 'HTMLPurifier/URIFilter/DisableExternalResources.php'; +require 'HTMLPurifier/URIFilter/DisableResources.php'; require 'HTMLPurifier/URIFilter/HostBlacklist.php'; require 'HTMLPurifier/URIFilter/MakeAbsolute.php'; require 'HTMLPurifier/URIFilter/Munge.php'; +require 'HTMLPurifier/URIScheme/data.php'; +require 'HTMLPurifier/URIScheme/file.php'; require 'HTMLPurifier/URIScheme/ftp.php'; require 'HTMLPurifier/URIScheme/http.php'; require 'HTMLPurifier/URIScheme/https.php'; diff --git a/lib/htmlpurifier/library/HTMLPurifier.kses.php b/lib/htmlpurifier/library/HTMLPurifier.kses.php old mode 100755 new mode 100644 index 24bef74a5..3143feb17 --- a/lib/htmlpurifier/library/HTMLPurifier.kses.php +++ b/lib/htmlpurifier/library/HTMLPurifier.kses.php @@ -17,11 +17,11 @@ function kses($string, $allowed_html, $allowed_protocols = null) { $allowed_attributes["$element.$attribute"] = true; } } - $config->set('HTML', 'AllowedElements', $allowed_elements); - $config->set('HTML', 'AllowedAttributes', $allowed_attributes); + $config->set('HTML.AllowedElements', $allowed_elements); + $config->set('HTML.AllowedAttributes', $allowed_attributes); $allowed_schemes = array(); if ($allowed_protocols !== null) { - $config->set('URI', 'AllowedSchemes', $allowed_protocols); + $config->set('URI.AllowedSchemes', $allowed_protocols); } $purifier = new HTMLPurifier($config); return $purifier->purify($string); diff --git a/lib/htmlpurifier/library/HTMLPurifier.path.php b/lib/htmlpurifier/library/HTMLPurifier.path.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier.php b/lib/htmlpurifier/library/HTMLPurifier.php old mode 100755 new mode 100644 index 4b1eddecb..914ba25ae --- a/lib/htmlpurifier/library/HTMLPurifier.php +++ b/lib/htmlpurifier/library/HTMLPurifier.php @@ -19,7 +19,7 @@ */ /* - HTML Purifier 3.3.0 - Standards Compliant HTML Filtering + HTML Purifier 4.3.0 - Standards Compliant HTML Filtering Copyright (C) 2006-2008 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -55,10 +55,10 @@ class HTMLPurifier { /** Version of HTML Purifier */ - public $version = '3.3.0'; + public $version = '4.3.0'; /** Constant with version of HTML Purifier */ - const VERSION = '3.3.0'; + const VERSION = '4.3.0'; /** Global configuration object */ public $config; @@ -128,7 +128,7 @@ class HTMLPurifier $context->register('Generator', $this->generator); // set up global context variables - if ($config->get('Core', 'CollectErrors')) { + if ($config->get('Core.CollectErrors')) { // may get moved out if other facilities use it $language_factory = HTMLPurifier_LanguageFactory::instance(); $language = $language_factory->create($config, $context); @@ -152,6 +152,7 @@ class HTMLPurifier $filters = array(); foreach ($filter_flags as $filter => $flag) { if (!$flag) continue; + if (strpos($filter, '.') !== false) continue; $class = "HTMLPurifier_Filter_$filter"; $filters[] = new $class; } diff --git a/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php b/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php old mode 100755 new mode 100644 index 7d393036a..a5c0d5bb8 --- a/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php +++ b/lib/htmlpurifier/library/HTMLPurifier.safe-includes.php @@ -92,6 +92,8 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php'; +require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php'; +require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php'; @@ -99,7 +101,6 @@ require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php'; -require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php'; @@ -117,6 +118,8 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php'; +require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php'; +require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php'; @@ -143,6 +146,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/List.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php'; +require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php'; @@ -168,6 +172,7 @@ require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php'; require_once $__dir . '/HTMLPurifier/Injector/Linkify.php'; require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php'; require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php'; +require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php'; require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php'; require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php'; require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php'; @@ -187,9 +192,12 @@ require_once $__dir . '/HTMLPurifier/Token/Start.php'; require_once $__dir . '/HTMLPurifier/Token/Text.php'; require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php'; require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php'; +require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php'; require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php'; require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php'; require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php'; +require_once $__dir . '/HTMLPurifier/URIScheme/data.php'; +require_once $__dir . '/HTMLPurifier/URIScheme/file.php'; require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php'; require_once $__dir . '/HTMLPurifier/URIScheme/http.php'; require_once $__dir . '/HTMLPurifier/URIScheme/https.php'; diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrCollections.php b/lib/htmlpurifier/library/HTMLPurifier/AttrCollections.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef.php old mode 100755 new mode 100644 index d32fa62d6..b2e4f36c5 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef.php @@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); } + /** + * Parses a possibly escaped CSS string and returns the "pure" + * version of it. + */ + protected function expandCSSEscape($string) { + // flexibly parse it + $ret = ''; + for ($i = 0, $c = strlen($string); $i < $c; $i++) { + if ($string[$i] === '\\') { + $i++; + if ($i >= $c) { + $ret .= '\\'; + break; + } + if (ctype_xdigit($string[$i])) { + $code = $string[$i]; + for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { + if (!ctype_xdigit($string[$i])) break; + $code .= $string[$i]; + } + // We have to be extremely careful when adding + // new characters, to make sure we're not breaking + // the encoding. + $char = HTMLPurifier_Encoder::unichr(hexdec($code)); + if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue; + $ret .= $char; + if ($i < $c && trim($string[$i]) !== '') $i--; + continue; + } + if ($string[$i] === "\n") continue; + } + $ret .= $string[$i]; + } + return $ret; + } + } // vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Background.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Background.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php old mode 100755 new mode 100644 index 35df3985e..fae82eaec --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php @@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef $keywords = array(); $keywords['h'] = false; // left, right $keywords['v'] = false; // top, bottom - $keywords['c'] = false; // center + $keywords['ch'] = false; // center (first word) + $keywords['cv'] = false; // center (second word) $measures = array(); $i = 0; @@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef $lbit = ctype_lower($bit) ? $bit : strtolower($bit); if (isset($lookup[$lbit])) { $status = $lookup[$lbit]; + if ($status == 'c') { + if ($i == 0) { + $status = 'ch'; + } else { + $status = 'cv'; + } + } $keywords[$status] = $lbit; $i++; } @@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef if (!$i) return false; // no valid values were caught - $ret = array(); // first keyword if ($keywords['h']) $ret[] = $keywords['h']; - elseif (count($measures)) $ret[] = array_shift($measures); - elseif ($keywords['c']) { - $ret[] = $keywords['c']; - $keywords['c'] = false; // prevent re-use: center = center center + elseif ($keywords['ch']) { + $ret[] = $keywords['ch']; + $keywords['cv'] = false; // prevent re-use: center = center center } + elseif (count($measures)) $ret[] = array_shift($measures); if ($keywords['v']) $ret[] = $keywords['v']; + elseif ($keywords['cv']) $ret[] = $keywords['cv']; elseif (count($measures)) $ret[] = array_shift($measures); - elseif ($keywords['c']) $ret[] = $keywords['c']; if (empty($ret)) return false; return implode(' ', $ret); diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Border.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Border.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php old mode 100755 new mode 100644 index 14c6594b6..07f95a671 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php @@ -9,7 +9,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef public function validate($color, $config, $context) { static $colors = null; - if ($colors === null) $colors = $config->get('Core', 'ColorKeywords'); + if ($colors === null) $colors = $config->get('Core.ColorKeywords'); $color = trim($color); if ($color === '') return false; diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Composite.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Composite.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Filter.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Filter.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Font.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Font.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php old mode 100755 new mode 100644 index 705ac893d..0d9a4e12c --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php @@ -2,11 +2,43 @@ /** * Validates a font family list according to CSS spec - * @todo whitelisting allowed fonts would be nice */ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef { + protected $mask = null; + + public function __construct() { + $this->mask = '- '; + for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c; + for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c; + for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine + // special bytes used by UTF-8 + for ($i = 0x80; $i <= 0xFF; $i++) { + // We don't bother excluding invalid bytes in this range, + // because the our restriction of well-formed UTF-8 will + // prevent these from ever occurring. + $this->mask .= chr($i); + } + + /* + PHP's internal strcspn implementation is + O(length of string * length of mask), making it inefficient + for large masks. However, it's still faster than + preg_match 8) + for (p = s1;;) { + spanp = s2; + do { + if (*spanp == c || p == s1_end) { + return p - s1; + } + } while (spanp++ < (s2_end - 1)); + c = *++p; + } + */ + // possible optimization: invert the mask. + } + public function validate($string, $config, $context) { static $generic_names = array( 'serif' => true, @@ -15,6 +47,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef 'fantasy' => true, 'cursive' => true ); + $allowed_fonts = $config->get('CSS.AllowedFonts'); // assume that no font names contain commas in them $fonts = explode(',', $string); @@ -24,7 +57,9 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef if ($font === '') continue; // match a generic name if (isset($generic_names[$font])) { - $final .= $font . ', '; + if ($allowed_fonts === null || isset($allowed_fonts[$font])) { + $final .= $font . ', '; + } continue; } // match a quoted name @@ -34,50 +69,122 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef $quote = $font[0]; if ($font[$length - 1] !== $quote) continue; $font = substr($font, 1, $length - 2); - - $new_font = ''; - for ($i = 0, $c = strlen($font); $i < $c; $i++) { - if ($font[$i] === '\\') { - $i++; - if ($i >= $c) { - $new_font .= '\\'; - break; - } - if (ctype_xdigit($font[$i])) { - $code = $font[$i]; - for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { - if (!ctype_xdigit($font[$i])) break; - $code .= $font[$i]; - } - // We have to be extremely careful when adding - // new characters, to make sure we're not breaking - // the encoding. - $char = HTMLPurifier_Encoder::unichr(hexdec($code)); - if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue; - $new_font .= $char; - if ($i < $c && trim($font[$i]) !== '') $i--; - continue; - } - if ($font[$i] === "\n") continue; - } - $new_font .= $font[$i]; - } - - $font = $new_font; } + + $font = $this->expandCSSEscape($font); + // $font is a pure representation of the font name + if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) { + continue; + } + if (ctype_alnum($font) && $font !== '') { // very simple font, allow it in unharmed $final .= $font . ', '; continue; } - // complicated font, requires quoting + // bugger out on whitespace. form feed (0C) really + // shouldn't show up regardless + $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font); - // armor single quotes and new lines - $font = str_replace("\\", "\\\\", $font); - $font = str_replace("'", "\\'", $font); + // Here, there are various classes of characters which need + // to be treated differently: + // - Alphanumeric characters are essentially safe. We + // handled these above. + // - Spaces require quoting, though most parsers will do + // the right thing if there aren't any characters that + // can be misinterpreted + // - Dashes rarely occur, but they fairly unproblematic + // for parsing/rendering purposes. + // The above characters cover the majority of Western font + // names. + // - Arbitrary Unicode characters not in ASCII. Because + // most parsers give little thought to Unicode, treatment + // of these codepoints is basically uniform, even for + // punctuation-like codepoints. These characters can + // show up in non-Western pages and are supported by most + // major browsers, for example: "MS 明朝" is a + // legitimate font-name + // . See + // the CSS3 spec for more examples: + // + // You can see live samples of these on the Internet: + // + // However, most of these fonts have ASCII equivalents: + // for example, 'MS Mincho', and it's considered + // professional to use ASCII font names instead of + // Unicode font names. Thanks Takeshi Terada for + // providing this information. + // The following characters, to my knowledge, have not been + // used to name font names. + // - Single quote. While theoretically you might find a + // font name that has a single quote in its name (serving + // as an apostrophe, e.g. Dave's Scribble), I haven't + // been able to find any actual examples of this. + // Internet Explorer's cssText translation (which I + // believe is invoked by innerHTML) normalizes any + // quoting to single quotes, and fails to escape single + // quotes. (Note that this is not IE's behavior for all + // CSS properties, just some sort of special casing for + // font-family). So a single quote *cannot* be used + // safely in the font-family context if there will be an + // innerHTML/cssText translation. Note that Firefox 3.x + // does this too. + // - Double quote. In IE, these get normalized to + // single-quotes, no matter what the encoding. (Fun + // fact, in IE8, the 'content' CSS property gained + // support, where they special cased to preserve encoded + // double quotes, but still translate unadorned double + // quotes into single quotes.) So, because their + // fixpoint behavior is identical to single quotes, they + // cannot be allowed either. Firefox 3.x displays + // single-quote style behavior. + // - Backslashes are reduced by one (so \\ -> \) every + // iteration, so they cannot be used safely. This shows + // up in IE7, IE8 and FF3 + // - Semicolons, commas and backticks are handled properly. + // - The rest of the ASCII punctuation is handled properly. + // We haven't checked what browsers do to unadorned + // versions, but this is not important as long as the + // browser doesn't /remove/ surrounding quotes (as IE does + // for HTML). + // + // With these results in hand, we conclude that there are + // various levels of safety: + // - Paranoid: alphanumeric, spaces and dashes(?) + // - International: Paranoid + non-ASCII Unicode + // - Edgy: Everything except quotes, backslashes + // - NoJS: Standards compliance, e.g. sod IE. Note that + // with some judicious character escaping (since certain + // types of escaping doesn't work) this is theoretically + // OK as long as innerHTML/cssText is not called. + // We believe that international is a reasonable default + // (that we will implement now), and once we do more + // extensive research, we may feel comfortable with dropping + // it down to edgy. + + // Edgy: alphanumeric, spaces, dashes and Unicode. Use of + // str(c)spn assumes that the string was already well formed + // Unicode (which of course it is). + if (strspn($font, $this->mask) !== strlen($font)) { + continue; + } + + // Historical: + // In the absence of innerHTML/cssText, these ugly + // transforms don't pose a security risk (as \\ and \" + // might--these escapes are not supported by most browsers). + // We could try to be clever and use single-quote wrapping + // when there is a double quote present, but I have choosen + // not to implement that. (NOTE: you can reduce the amount + // of escapes by one depending on what quoting style you use) + // $font = str_replace('\\', '\\5C ', $font); + // $font = str_replace('"', '\\22 ', $font); + // $font = str_replace("'", '\\27 ', $font); + + // font possibly with spaces, requires quoting $final .= "'$font', "; } $final = rtrim($final, ', '); diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Length.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Length.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ListStyle.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ListStyle.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Multiple.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Multiple.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Number.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Number.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Percentage.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Percentage.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php old mode 100755 new mode 100644 index 435d7930b..c2f767e57 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php @@ -34,20 +34,25 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI $uri = substr($uri, 1, $new_length - 1); } - $keys = array( '(', ')', ',', ' ', '"', "'"); - $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'"); - $uri = str_replace($values, $keys, $uri); + $uri = $this->expandCSSEscape($uri); $result = parent::validate($uri, $config, $context); if ($result === false) return false; - // escape necessary characters according to CSS spec - // except for the comma, none of these should appear in the - // URI at all - $result = str_replace($keys, $values, $result); + // extra sanity check; should have been done by URI + $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result); - return "url($result)"; + // suspicious characters are ()'; we're going to percent encode + // them for safety. + $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result); + + // there's an extra bug where ampersands lose their escaping on + // an innerHTML cycle, so a very unlucky query parameter could + // then change the meaning of the URL. Unfortunately, there's + // not much we can do about that... + + return "url(\"$result\")"; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Enum.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Enum.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Class.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Class.php new file mode 100644 index 000000000..370068d97 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Class.php @@ -0,0 +1,34 @@ +getDefinition('HTML')->doctype->name; + if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { + return parent::split($string, $config, $context); + } else { + return preg_split('/\s+/', $string); + } + } + protected function filter($tokens, $config, $context) { + $allowed = $config->get('Attr.AllowedClasses'); + $forbidden = $config->get('Attr.ForbiddenClasses'); + $ret = array(); + foreach ($tokens as $token) { + if ( + ($allowed === null || isset($allowed[$token])) && + !isset($forbidden[$token]) && + // We need this O(n) check because of PHP's array + // implementation that casts -0 to 0. + !in_array($token, $ret, true) + ) { + $ret[] = $token; + } + } + return $ret; + } +} diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php old mode 100755 new mode 100644 index 5311a3c61..d01e20454 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php @@ -9,7 +9,7 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef public function validate($string, $config, $context) { static $colors = null; - if ($colors === null) $colors = $config->get('Core', 'ColorKeywords'); + if ($colors === null) $colors = $config->get('Core.ColorKeywords'); $string = trim($string); diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php old mode 100755 new mode 100644 index bd281a89f..ae6ea7c01 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php @@ -12,7 +12,7 @@ class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum public function __construct() {} public function validate($string, $config, $context) { - if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets'); + if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets'); return parent::validate($string, $config, $context); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php old mode 100755 new mode 100644 index 7c5c169c2..81d03762d --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -17,18 +17,18 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef public function validate($id, $config, $context) { - if (!$config->get('Attr', 'EnableID')) return false; + if (!$config->get('Attr.EnableID')) return false; $id = trim($id); // trim it first if ($id === '') return false; - $prefix = $config->get('Attr', 'IDPrefix'); + $prefix = $config->get('Attr.IDPrefix'); if ($prefix !== '') { - $prefix .= $config->get('Attr', 'IDPrefixLocal'); + $prefix .= $config->get('Attr.IDPrefixLocal'); // prevent re-appending the prefix if (strpos($id, $prefix) !== 0) $id = $prefix . $id; - } elseif ($config->get('Attr', 'IDPrefixLocal') !== '') { + } elseif ($config->get('Attr.IDPrefixLocal') !== '') { trigger_error('%Attr.IDPrefixLocal cannot be used unless '. '%Attr.IDPrefix is set', E_USER_WARNING); } @@ -51,7 +51,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef $result = ($trim === ''); } - $regexp = $config->get('Attr', 'IDBlacklistRegexp'); + $regexp = $config->get('Attr.IDBlacklistRegexp'); if ($regexp && preg_match($regexp, $id)) { return false; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php old mode 100755 new mode 100644 index 8a0da0c89..76d25ed08 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php @@ -27,7 +27,7 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef public function validate($string, $config, $context) { - $allowed = $config->get('Attr', $this->name); + $allowed = $config->get('Attr.' . $this->name); if (empty($allowed)) return false; $string = $this->parseCDATA($string); diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php old mode 100755 new mode 100644 index 55035c4d0..aa34120bd --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php @@ -2,10 +2,6 @@ /** * Validates contents based on NMTOKENS attribute type. - * @note The only current use for this is the class attribute in HTML - * @note Could have some functionality factored out into Nmtoken class - * @warning We cannot assume this class will be used only for 'class' - * attributes. Not sure how to hook in magic behavior, then. */ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef { @@ -17,6 +13,17 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef // early abort: '' and '0' (strings that convert to false) are invalid if (!$string) return false; + $tokens = $this->split($string, $config, $context); + $tokens = $this->filter($tokens, $config, $context); + if (empty($tokens)) return false; + return implode(' ', $tokens); + + } + + /** + * Splits a space separated list of tokens into its constituent parts. + */ + protected function split($string, $config, $context) { // OPTIMIZABLE! // do the preg_match, capture all subpatterns for reformulation @@ -24,23 +31,20 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef // escaping because I don't know how to do that with regexps // and plus it would complicate optimization efforts (you never // see that anyway). - $matches = array(); $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. '(?:(?=\s)|\z)/'; // look ahead for space or string end preg_match_all($pattern, $string, $matches); + return $matches[1]; + } - if (empty($matches[1])) return false; - - // reconstruct string - $new_string = ''; - foreach ($matches[1] as $token) { - $new_string .= $token . ' '; - } - $new_string = rtrim($new_string); - - return $new_string; - + /** + * Template method for removing certain tokens based on arbitrary criteria. + * @note If we wanted to be really functional, we'd do an array_filter + * with a callback. But... we're not. + */ + protected function filter($tokens, $config, $context) { + return $tokens; } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Integer.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Integer.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Lang.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Lang.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Switch.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Switch.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Text.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Text.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php old mode 100755 new mode 100644 index 93d2f0bbf..01a6d83e9 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php @@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef public function validate($uri, $config, $context) { - if ($config->get('URI', 'Disable')) return false; + if ($config->get('URI.Disable')) return false; $uri = $this->parseCDATA($uri); diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php old mode 100755 new mode 100644 index 2156c10c6..feca469d7 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php @@ -23,6 +23,12 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef public function validate($string, $config, $context) { $length = strlen($string); + // empty hostname is OK; it's usually semantically equivalent: + // the default host as defined by a URI scheme is used: + // + // If the URI scheme defines a default for host, then that + // default applies when the host subcomponent is undefined + // or when the registered name is empty (zero length). if ($string === '') return ''; if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') { //IPv6 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv4.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv4.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Background.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Background.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/BdoDir.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/BdoDir.php old mode 100755 new mode 100644 index 40310b914..4d1a05665 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/BdoDir.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/BdoDir.php @@ -10,7 +10,7 @@ class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform public function transform($attr, $config, $context) { if (isset($attr['dir'])) return $attr; - $attr['dir'] = $config->get('Attr', 'DefaultTextDir'); + $attr['dir'] = $config->get('Attr.DefaultTextDir'); return $attr; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/BgColor.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/BgColor.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/BoolToCSS.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/BoolToCSS.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Border.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Border.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/EnumToCSS.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/EnumToCSS.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgRequired.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgRequired.php old mode 100755 new mode 100644 index 25c9403c2..7f0e4b7a5 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgRequired.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgRequired.php @@ -15,21 +15,22 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform $src = true; if (!isset($attr['src'])) { - if ($config->get('Core', 'RemoveInvalidImg')) return $attr; - $attr['src'] = $config->get('Attr', 'DefaultInvalidImage'); + if ($config->get('Core.RemoveInvalidImg')) return $attr; + $attr['src'] = $config->get('Attr.DefaultInvalidImage'); $src = false; } if (!isset($attr['alt'])) { if ($src) { - $alt = $config->get('Attr', 'DefaultImageAlt'); + $alt = $config->get('Attr.DefaultImageAlt'); if ($alt === null) { - $attr['alt'] = basename($attr['src']); + // truncate if the alt is too long + $attr['alt'] = substr(basename($attr['src']),0,40); } else { $attr['alt'] = $alt; } } else { - $attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt'); + $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt'); } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgSpace.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgSpace.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Input.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Input.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Lang.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Lang.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Length.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Length.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Name.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Name.php old mode 100755 new mode 100644 index e6f93aee3..15315bc73 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Name.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Name.php @@ -7,6 +7,8 @@ class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform { public function transform($attr, $config, $context) { + // Abort early if we're using relaxed definition of name + if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr; if (!isset($attr['name'])) return $attr; $id = $this->confiscateAttr($attr, 'name'); if ( isset($attr['id'])) return $attr; diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/NameSync.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/NameSync.php new file mode 100644 index 000000000..a95638c14 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/NameSync.php @@ -0,0 +1,27 @@ +idDef = new HTMLPurifier_AttrDef_HTML_ID(); + } + + public function transform($attr, $config, $context) { + if (!isset($attr['name'])) return $attr; + $name = $attr['name']; + if (isset($attr['id']) && $attr['id'] === $name) return $attr; + $result = $this->idDef->validate($name, $config, $context); + if ($result === false) unset($attr['name']); + else $attr['name'] = $result; + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php new file mode 100644 index 000000000..573b42c9c --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php @@ -0,0 +1,41 @@ +parser = new HTMLPurifier_URIParser(); + } + + public function transform($attr, $config, $context) { + + if (!isset($attr['href'])) { + return $attr; + } + + // XXX Kind of inefficient + $url = $this->parser->parse($attr['href']); + $scheme = $url->getSchemeObj($config, $context); + + if (!is_null($url->host) && $scheme !== false && $scheme->browsable) { + if (isset($attr['rel'])) { + $attr['rel'] .= ' nofollow'; + } else { + $attr['rel'] = 'nofollow'; + } + } + + return $attr; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeEmbed.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeEmbed.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeObject.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeObject.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeParam.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeParam.php old mode 100755 new mode 100644 index 94e8052a9..bd86a7455 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeParam.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeParam.php @@ -19,6 +19,7 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform public function __construct() { $this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded + $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent')); } public function transform($attr, $config, $context) { @@ -33,12 +34,25 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform case 'allowNetworking': $attr['value'] = 'internal'; break; + case 'allowFullScreen': + if ($config->get('HTML.FlashAllowFullScreen')) { + $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false'; + } else { + $attr['value'] = 'false'; + } + break; case 'wmode': - $attr['value'] = 'window'; + $attr['value'] = $this->wmode->validate($attr['value'], $config, $context); break; case 'movie': + case 'src': + $attr['name'] = "movie"; $attr['value'] = $this->uri->validate($attr['value'], $config, $context); break; + case 'flashvars': + // we're going to allow arbitrary inputs to the SWF, on + // the reasoning that it could only hack the SWF, not us. + break; // add other cases to support other param name/value pairs default: $attr['name'] = $attr['value'] = null; diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/ScriptRequired.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/ScriptRequired.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Textarea.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTransform/Textarea.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php b/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php old mode 100755 new mode 100644 index 6c624bb0b..fc2ea4e58 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrTypes.php @@ -36,6 +36,9 @@ class HTMLPurifier_AttrTypes $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text(); $this->info['Character'] = new HTMLPurifier_AttrDef_Text(); + // "proprietary" types + $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class(); + // number is really a positive integer (one or more digits) // FIXME: ^^ not always, see start and value of list items $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrValidator.php b/lib/htmlpurifier/library/HTMLPurifier/AttrValidator.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php b/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php old mode 100755 new mode 100644 index 559f61a23..607c5b188 --- a/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Bootstrap.php @@ -37,7 +37,12 @@ class HTMLPurifier_Bootstrap public static function autoload($class) { $file = HTMLPurifier_Bootstrap::getPath($class); if (!$file) return false; - require HTMLPURIFIER_PREFIX . '/' . $file; + // Technically speaking, it should be ok and more efficient to + // just do 'require', but Antonio Parraga reports that with + // Zend extensions such as Zend debugger and APC, this invariant + // may be broken. Since we have efficient alternatives, pay + // the cost here and avoid the bug. + require_once HTMLPURIFIER_PREFIX . '/' . $file; return true; } @@ -65,10 +70,11 @@ class HTMLPurifier_Bootstrap if ( ($funcs = spl_autoload_functions()) === false ) { spl_autoload_register($autoload); } elseif (function_exists('spl_autoload_unregister')) { + $buggy = version_compare(PHP_VERSION, '5.2.11', '<'); $compat = version_compare(PHP_VERSION, '5.1.2', '<=') && version_compare(PHP_VERSION, '5.1.0', '>='); foreach ($funcs as $func) { - if (is_array($func)) { + if ($buggy && is_array($func)) { // :TRICKY: There are some compatibility issues and some // places where we need to error out $reflector = new ReflectionMethod($func[0], $func[1]); diff --git a/lib/htmlpurifier/library/HTMLPurifier/CSSDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/CSSDefinition.php old mode 100755 new mode 100644 index 1a1805733..91619f5d3 --- a/lib/htmlpurifier/library/HTMLPurifier/CSSDefinition.php +++ b/lib/htmlpurifier/library/HTMLPurifier/CSSDefinition.php @@ -154,7 +154,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('auto')) )); - $max = $config->get('CSS', 'MaxImgLength'); + $max = $config->get('CSS.MaxImgLength'); $this->info['width'] = $this->info['height'] = @@ -211,15 +211,19 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition // partial support $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap')); - if ($config->get('CSS', 'Proprietary')) { + if ($config->get('CSS.Proprietary')) { $this->doSetupProprietary($config); } - if ($config->get('CSS', 'AllowTricky')) { + if ($config->get('CSS.AllowTricky')) { $this->doSetupTricky($config); } - $allow_important = $config->get('CSS', 'AllowImportant'); + if ($config->get('CSS.Trusted')) { + $this->doSetupTrusted($config); + } + + $allow_important = $config->get('CSS.AllowImportant'); // wrap all attr-defs with decorator that handles !important foreach ($this->info as $k => $v) { $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important); @@ -260,6 +264,23 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll')); } + protected function doSetupTrusted($config) { + $this->info['position'] = new HTMLPurifier_AttrDef_Enum(array( + 'static', 'relative', 'absolute', 'fixed' + )); + $this->info['top'] = + $this->info['left'] = + $this->info['right'] = + $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length(), + new HTMLPurifier_AttrDef_CSS_Percentage(), + new HTMLPurifier_AttrDef_Enum(array('auto')), + )); + $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Integer(), + new HTMLPurifier_AttrDef_Enum(array('auto')), + )); + } /** * Performs extra config-based processing. Based off of @@ -272,20 +293,29 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition // setup allowed elements $support = "(for information on implementing this, see the ". "support forums) "; - $allowed_attributes = $config->get('CSS', 'AllowedProperties'); - if ($allowed_attributes !== null) { + $allowed_properties = $config->get('CSS.AllowedProperties'); + if ($allowed_properties !== null) { foreach ($this->info as $name => $d) { - if(!isset($allowed_attributes[$name])) unset($this->info[$name]); - unset($allowed_attributes[$name]); + if(!isset($allowed_properties[$name])) unset($this->info[$name]); + unset($allowed_properties[$name]); } // emit errors - foreach ($allowed_attributes as $name => $d) { + foreach ($allowed_properties as $name => $d) { // :TODO: Is this htmlspecialchars() call really necessary? $name = htmlspecialchars($name); trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING); } } + $forbidden_properties = $config->get('CSS.ForbiddenProperties'); + if ($forbidden_properties !== null) { + foreach ($this->info as $name => $d) { + if (isset($forbidden_properties[$name])) { + unset($this->info[$name]); + } + } + } + } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php old mode 100755 new mode 100644 index c3e748b26..4889f249b --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php @@ -59,7 +59,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef $all_whitespace = true; // some configuration - $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren'); + $escape_invalid_children = $config->get('Core.EscapeInvalidChildren'); // generator $gen = new HTMLPurifier_Generator($config, $context); diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Config.php b/lib/htmlpurifier/library/HTMLPurifier/Config.php old mode 100755 new mode 100644 index f8e1f7804..b6551398f --- a/lib/htmlpurifier/library/HTMLPurifier/Config.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Config.php @@ -20,7 +20,7 @@ class HTMLPurifier_Config /** * HTML Purifier's version */ - public $version = '3.3.0'; + public $version = '4.3.0'; /** * Bool indicator whether or not to automatically finalize @@ -68,12 +68,31 @@ class HTMLPurifier_Config */ protected $plist; + /** + * Whether or not a set is taking place due to an + * alias lookup. + */ + private $aliasMode; + + /** + * Set to false if you do not want line and file numbers in errors + * (useful when unit testing). This will also compress some errors + * and exceptions. + */ + public $chatty = true; + + /** + * Current lock; only gets to this namespace are allowed. + */ + private $lock; + /** * @param $definition HTMLPurifier_ConfigSchema that defines what directives * are allowed. */ - public function __construct($definition) { - $this->plist = new HTMLPurifier_PropertyList($definition->defaultPlist); + public function __construct($definition, $parent = null) { + $parent = $parent ? $parent : $definition->defaultPlist; + $this->plist = new HTMLPurifier_PropertyList($parent); $this->def = $definition; // keep a copy around for checking $this->parser = new HTMLPurifier_VarParser_Flexible(); } @@ -102,6 +121,16 @@ class HTMLPurifier_Config return $ret; } + /** + * Creates a new config object that inherits from a previous one. + * @param HTMLPurifier_Config $config Configuration object to inherit + * from. + * @return HTMLPurifier_Config object with $config as its parent. + */ + public static function inherit(HTMLPurifier_Config $config) { + return new HTMLPurifier_Config($config->def, $config->plist); + } + /** * Convenience constructor that creates a default configuration object. * @return Default HTMLPurifier_Config object. @@ -114,24 +143,34 @@ class HTMLPurifier_Config /** * Retreives a value from the configuration. - * @param $namespace String namespace * @param $key String key */ - public function get($namespace, $key) { - if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); - if (!isset($this->def->info[$namespace][$key])) { + public function get($key, $a = null) { + if ($a !== null) { + $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING); + $key = "$key.$a"; + } + if (!$this->finalized) $this->autoFinalize(); + if (!isset($this->def->info[$key])) { // can't add % due to SimpleTest bug - trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"), + $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key), E_USER_WARNING); return; } - if (isset($this->def->info[$namespace][$key]->isAlias)) { - $d = $this->def->info[$namespace][$key]; - trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name, + if (isset($this->def->info[$key]->isAlias)) { + $d = $this->def->info[$key]; + $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key, E_USER_ERROR); return; } - return $this->plist->get("$namespace.$key"); + if ($this->lock) { + list($ns) = explode('.', $key); + if ($ns !== $this->lock) { + $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); + return; + } + } + return $this->plist->get($key); } /** @@ -139,13 +178,13 @@ class HTMLPurifier_Config * @param $namespace String namespace */ public function getBatch($namespace) { - if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); - if (!isset($this->def->info[$namespace])) { - trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), + if (!$this->finalized) $this->autoFinalize(); + $full = $this->getAll(); + if (!isset($full[$namespace])) { + $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), E_USER_WARNING); return; } - $full = $this->getAll(); return $full[$namespace]; } @@ -178,9 +217,10 @@ class HTMLPurifier_Config /** * Retrieves all directives, organized by namespace + * @warning This is a pretty inefficient function, avoid if you can */ public function getAll() { - if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); + if (!$this->finalized) $this->autoFinalize(); $ret = array(); foreach ($this->plist->squash() as $name => $value) { list($ns, $key) = explode('.', $name, 2); @@ -191,29 +231,37 @@ class HTMLPurifier_Config /** * Sets a value to configuration. - * @param $namespace String namespace * @param $key String key * @param $value Mixed value */ - public function set($namespace, $key, $value, $from_alias = false) { + public function set($key, $value, $a = null) { + if (strpos($key, '.') === false) { + $namespace = $key; + $directive = $value; + $value = $a; + $key = "$key.$directive"; + $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE); + } else { + list($namespace) = explode('.', $key); + } if ($this->isFinalized('Cannot set directive after finalization')) return; - if (!isset($this->def->info[$namespace][$key])) { - trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value', + if (!isset($this->def->info[$key])) { + $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value', E_USER_WARNING); return; } - $def = $this->def->info[$namespace][$key]; + $def = $this->def->info[$key]; if (isset($def->isAlias)) { - if ($from_alias) { - trigger_error('Double-aliases not allowed, please fix '. - 'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR); + if ($this->aliasMode) { + $this->triggerError('Double-aliases not allowed, please fix '. + 'ConfigSchema bug with' . $key, E_USER_ERROR); return; } - $this->set($new_ns = $def->namespace, - $new_dir = $def->name, - $value, true); - trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE); + $this->aliasMode = true; + $this->set($def->key, $value); + $this->aliasMode = false; + $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE); return; } @@ -231,7 +279,7 @@ class HTMLPurifier_Config try { $value = $this->parser->parse($value, $type, $allow_null); } catch (HTMLPurifier_VarParserException $e) { - trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); + $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); return; } if (is_string($value) && is_object($def)) { @@ -241,17 +289,17 @@ class HTMLPurifier_Config } // check to see if the value is allowed if (isset($def->allowed) && !isset($def->allowed[$value])) { - trigger_error('Value not supported, valid values are: ' . + $this->triggerError('Value not supported, valid values are: ' . $this->_listify($def->allowed), E_USER_WARNING); return; } } - $this->plist->set("$namespace.$key", $value); + $this->plist->set($key, $value); // reset definitions if the directives they depend on changed // this is a very costly process, so it's discouraged // with finalization - if ($namespace == 'HTML' || $namespace == 'CSS') { + if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') { $this->definitions[$namespace] = null; } @@ -271,74 +319,203 @@ class HTMLPurifier_Config * Retrieves object reference to the HTML definition. * @param $raw Return a copy that has not been setup yet. Must be * called before it's been setup, otherwise won't work. + * @param $optimized If true, this method may return null, to + * indicate that a cached version of the modified + * definition object is available and no further edits + * are necessary. Consider using + * maybeGetRawHTMLDefinition, which is more explicitly + * named, instead. */ - public function getHTMLDefinition($raw = false) { - return $this->getDefinition('HTML', $raw); + public function getHTMLDefinition($raw = false, $optimized = false) { + return $this->getDefinition('HTML', $raw, $optimized); } /** * Retrieves object reference to the CSS definition * @param $raw Return a copy that has not been setup yet. Must be * called before it's been setup, otherwise won't work. + * @param $optimized If true, this method may return null, to + * indicate that a cached version of the modified + * definition object is available and no further edits + * are necessary. Consider using + * maybeGetRawCSSDefinition, which is more explicitly + * named, instead. */ - public function getCSSDefinition($raw = false) { - return $this->getDefinition('CSS', $raw); + public function getCSSDefinition($raw = false, $optimized = false) { + return $this->getDefinition('CSS', $raw, $optimized); + } + + /** + * Retrieves object reference to the URI definition + * @param $raw Return a copy that has not been setup yet. Must be + * called before it's been setup, otherwise won't work. + * @param $optimized If true, this method may return null, to + * indicate that a cached version of the modified + * definition object is available and no further edits + * are necessary. Consider using + * maybeGetRawURIDefinition, which is more explicitly + * named, instead. + */ + public function getURIDefinition($raw = false, $optimized = false) { + return $this->getDefinition('URI', $raw, $optimized); } /** * Retrieves a definition * @param $type Type of definition: HTML, CSS, etc * @param $raw Whether or not definition should be returned raw + * @param $optimized Only has an effect when $raw is true. Whether + * or not to return null if the result is already present in + * the cache. This is off by default for backwards + * compatibility reasons, but you need to do things this + * way in order to ensure that caching is done properly. + * Check out enduser-customize.html for more details. + * We probably won't ever change this default, as much as the + * maybe semantics is the "right thing to do." */ - public function getDefinition($type, $raw = false) { - if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true); + public function getDefinition($type, $raw = false, $optimized = false) { + if ($optimized && !$raw) { + throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false"); + } + if (!$this->finalized) $this->autoFinalize(); + // temporarily suspend locks, so we can handle recursive definition calls + $lock = $this->lock; + $this->lock = null; $factory = HTMLPurifier_DefinitionCacheFactory::instance(); $cache = $factory->create($type, $this); + $this->lock = $lock; if (!$raw) { - // see if we can quickly supply a definition + // full definition + // --------------- + // check if definition is in memory if (!empty($this->definitions[$type])) { - if (!$this->definitions[$type]->setup) { - $this->definitions[$type]->setup($this); - $cache->set($this->definitions[$type], $this); + $def = $this->definitions[$type]; + // check if the definition is setup + if ($def->setup) { + return $def; + } else { + $def->setup($this); + if ($def->optimized) $cache->add($def, $this); + return $def; } - return $this->definitions[$type]; } - // memory check missed, try cache - $this->definitions[$type] = $cache->get($this); - if ($this->definitions[$type]) { - // definition in cache, return it - return $this->definitions[$type]; + // check if definition is in cache + $def = $cache->get($this); + if ($def) { + // definition in cache, save to memory and return it + $this->definitions[$type] = $def; + return $def; } - } elseif ( - !empty($this->definitions[$type]) && - !$this->definitions[$type]->setup - ) { - // raw requested, raw in memory, quick return - return $this->definitions[$type]; + // initialize it + $def = $this->initDefinition($type); + // set it up + $this->lock = $type; + $def->setup($this); + $this->lock = null; + // save in cache + $cache->add($def, $this); + // return it + return $def; + } else { + // raw definition + // -------------- + // check preconditions + $def = null; + if ($optimized) { + if (is_null($this->get($type . '.DefinitionID'))) { + // fatally error out if definition ID not set + throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); + } + } + if (!empty($this->definitions[$type])) { + $def = $this->definitions[$type]; + if ($def->setup && !$optimized) { + $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : ""; + throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra); + } + if ($def->optimized === null) { + $extra = $this->chatty ? " (try flushing your cache)" : ""; + throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra); + } + if ($def->optimized !== $optimized) { + $msg = $optimized ? "optimized" : "unoptimized"; + $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : ""; + throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra); + } + } + // check if definition was in memory + if ($def) { + if ($def->setup) { + // invariant: $optimized === true (checked above) + return null; + } else { + return $def; + } + } + // if optimized, check if definition was in cache + // (because we do the memory check first, this formulation + // is prone to cache slamming, but I think + // guaranteeing that either /all/ of the raw + // setup code or /none/ of it is run is more important.) + if ($optimized) { + // This code path only gets run once; once we put + // something in $definitions (which is guaranteed by the + // trailing code), we always short-circuit above. + $def = $cache->get($this); + if ($def) { + // save the full definition for later, but don't + // return it yet + $this->definitions[$type] = $def; + return null; + } + } + // check invariants for creation + if (!$optimized) { + if (!is_null($this->get($type . '.DefinitionID'))) { + if ($this->chatty) { + $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See Customize for more details", E_USER_WARNING); + } else { + $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING); + } + } + } + // initialize it + $def = $this->initDefinition($type); + $def->optimized = $optimized; + return $def; } + throw new HTMLPurifier_Exception("The impossible happened!"); + } + + private function initDefinition($type) { // quick checks failed, let's create the object if ($type == 'HTML') { - $this->definitions[$type] = new HTMLPurifier_HTMLDefinition(); + $def = new HTMLPurifier_HTMLDefinition(); } elseif ($type == 'CSS') { - $this->definitions[$type] = new HTMLPurifier_CSSDefinition(); + $def = new HTMLPurifier_CSSDefinition(); } elseif ($type == 'URI') { - $this->definitions[$type] = new HTMLPurifier_URIDefinition(); + $def = new HTMLPurifier_URIDefinition(); } else { throw new HTMLPurifier_Exception("Definition of $type type not supported"); } - // quick abort if raw - if ($raw) { - if (is_null($this->get($type, 'DefinitionID'))) { - // fatally error out if definition ID not set - throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); - } - return $this->definitions[$type]; - } - // set it up - $this->definitions[$type]->setup($this); - // save in cache - $cache->set($this->definitions[$type], $this); - return $this->definitions[$type]; + $this->definitions[$type] = $def; + return $def; + } + + public function maybeGetRawDefinition($name) { + return $this->getDefinition($name, true, true); + } + + public function maybeGetRawHTMLDefinition() { + return $this->getDefinition('HTML', true, true); + } + + public function maybeGetRawCSSDefinition() { + return $this->getDefinition('CSS', true, true); + } + + public function maybeGetRawURIDefinition() { + return $this->getDefinition('URI', true, true); } /** @@ -351,14 +528,12 @@ class HTMLPurifier_Config foreach ($config_array as $key => $value) { $key = str_replace('_', '.', $key); if (strpos($key, '.') !== false) { - // condensed form - list($namespace, $directive) = explode('.', $key); - $this->set($namespace, $directive, $value); + $this->set($key, $value); } else { $namespace = $key; $namespace_values = $value; foreach ($namespace_values as $directive => $value) { - $this->set($namespace, $directive, $value); + $this->set($namespace .'.'. $directive, $value); } } } @@ -394,16 +569,15 @@ class HTMLPurifier_Config } } $ret = array(); - foreach ($schema->info as $ns => $keypairs) { - foreach ($keypairs as $directive => $def) { - if ($allowed !== true) { - if (isset($blacklisted_directives["$ns.$directive"])) continue; - if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; - } - if (isset($def->isAlias)) continue; - if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; - $ret[] = array($ns, $directive); + foreach ($schema->info as $key => $def) { + list($ns, $directive) = explode('.', $key, 2); + if ($allowed !== true) { + if (isset($blacklisted_directives["$ns.$directive"])) continue; + if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; } + if (isset($def->isAlias)) continue; + if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; + $ret[] = array($ns, $directive); } return $ret; } @@ -472,7 +646,7 @@ class HTMLPurifier_Config */ public function isFinalized($error = false) { if ($this->finalized && $error) { - trigger_error($error, E_USER_ERROR); + $this->triggerError($error, E_USER_ERROR); } return $this->finalized; } @@ -482,7 +656,11 @@ class HTMLPurifier_Config * already finalized */ public function autoFinalize() { - if (!$this->finalized && $this->autoFinalize) $this->finalize(); + if ($this->autoFinalize) { + $this->finalize(); + } else { + $this->plist->squash(true); + } } /** @@ -490,6 +668,40 @@ class HTMLPurifier_Config */ public function finalize() { $this->finalized = true; + unset($this->parser); + } + + /** + * Produces a nicely formatted error message by supplying the + * stack frame information OUTSIDE of HTMLPurifier_Config. + */ + protected function triggerError($msg, $no) { + // determine previous stack frame + $extra = ''; + if ($this->chatty) { + $trace = debug_backtrace(); + // zip(tail(trace), trace) -- but PHP is not Haskell har har + for ($i = 0, $c = count($trace); $i < $c - 1; $i++) { + if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') { + continue; + } + $frame = $trace[$i]; + $extra = " invoked on line {$frame['line']} in file {$frame['file']}"; + break; + } + } + trigger_error($msg . $extra, $no); + } + + /** + * Returns a serialized form of the configuration object that can + * be reconstituted. + */ + public function serialize() { + $this->getDefinition('HTML'); + $this->getDefinition('CSS'); + $this->getDefinition('URI'); + return serialize($this); } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php old mode 100755 new mode 100644 index 340ed7dbc..fadf7a589 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema.php @@ -60,7 +60,13 @@ class HTMLPurifier_ConfigSchema { * Unserializes the default ConfigSchema. */ public static function makeFromSerial() { - return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser')); + $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'); + $r = unserialize($contents); + if (!$r) { + $hash = sha1($contents); + trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR); + } + return $r; } /** @@ -87,24 +93,13 @@ class HTMLPurifier_ConfigSchema { * HTMLPurifier_DirectiveDef::$type for allowed values * @param $allow_null Whether or not to allow null values */ - public function add($namespace, $name, $default, $type, $allow_null) { + public function add($key, $default, $type, $allow_null) { $obj = new stdclass(); $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type]; if ($allow_null) $obj->allow_null = true; - $this->info[$namespace][$name] = $obj; - $this->defaults[$namespace][$name] = $default; - $this->defaultPlist->set("$namespace.$name", $default); - } - - /** - * Defines a namespace for directives to be put into. - * @warning This is slightly different from the corresponding static - * method. - * @param $namespace Namespace's name - */ - public function addNamespace($namespace) { - $this->info[$namespace] = array(); - $this->defaults[$namespace] = array(); + $this->info[$key] = $obj; + $this->defaults[$key] = $default; + $this->defaultPlist->set($key, $default); } /** @@ -116,12 +111,12 @@ class HTMLPurifier_ConfigSchema { * @param $name Name of Directive * @param $aliases Hash of aliased values to the real alias */ - public function addValueAliases($namespace, $name, $aliases) { - if (!isset($this->info[$namespace][$name]->aliases)) { - $this->info[$namespace][$name]->aliases = array(); + public function addValueAliases($key, $aliases) { + if (!isset($this->info[$key]->aliases)) { + $this->info[$key]->aliases = array(); } foreach ($aliases as $alias => $real) { - $this->info[$namespace][$name]->aliases[$alias] = $real; + $this->info[$key]->aliases[$alias] = $real; } } @@ -133,8 +128,8 @@ class HTMLPurifier_ConfigSchema { * @param $name Name of directive * @param $allowed Lookup array of allowed values */ - public function addAllowedValues($namespace, $name, $allowed) { - $this->info[$namespace][$name]->allowed = $allowed; + public function addAllowedValues($key, $allowed) { + $this->info[$key]->allowed = $allowed; } /** @@ -144,88 +139,26 @@ class HTMLPurifier_ConfigSchema { * @param $new_namespace * @param $new_name Directive that the alias will be to */ - public function addAlias($namespace, $name, $new_namespace, $new_name) { + public function addAlias($key, $new_key) { $obj = new stdclass; - $obj->namespace = $new_namespace; - $obj->name = $new_name; + $obj->key = $new_key; $obj->isAlias = true; - $this->info[$namespace][$name] = $obj; + $this->info[$key] = $obj; } /** * Replaces any stdclass that only has the type property with type integer. */ public function postProcess() { - foreach ($this->info as $namespace => $info) { - foreach ($info as $directive => $v) { - if (count((array) $v) == 1) { - $this->info[$namespace][$directive] = $v->type; - } elseif (count((array) $v) == 2 && isset($v->allow_null)) { - $this->info[$namespace][$directive] = -$v->type; - } + foreach ($this->info as $key => $v) { + if (count((array) $v) == 1) { + $this->info[$key] = $v->type; + } elseif (count((array) $v) == 2 && isset($v->allow_null)) { + $this->info[$key] = -$v->type; } } } - // DEPRECATED METHODS - - /** @see HTMLPurifier_ConfigSchema->set() */ - public static function define($namespace, $name, $default, $type, $description) { - HTMLPurifier_ConfigSchema::deprecated(__METHOD__); - $type_values = explode('/', $type, 2); - $type = $type_values[0]; - $modifier = isset($type_values[1]) ? $type_values[1] : false; - $allow_null = ($modifier === 'null'); - $def = HTMLPurifier_ConfigSchema::instance(); - $def->add($namespace, $name, $default, $type, $allow_null); - } - - /** @see HTMLPurifier_ConfigSchema->addNamespace() */ - public static function defineNamespace($namespace, $description) { - HTMLPurifier_ConfigSchema::deprecated(__METHOD__); - $def = HTMLPurifier_ConfigSchema::instance(); - $def->addNamespace($namespace); - } - - /** @see HTMLPurifier_ConfigSchema->addValueAliases() */ - public static function defineValueAliases($namespace, $name, $aliases) { - HTMLPurifier_ConfigSchema::deprecated(__METHOD__); - $def = HTMLPurifier_ConfigSchema::instance(); - $def->addValueAliases($namespace, $name, $aliases); - } - - /** @see HTMLPurifier_ConfigSchema->addAllowedValues() */ - public static function defineAllowedValues($namespace, $name, $allowed_values) { - HTMLPurifier_ConfigSchema::deprecated(__METHOD__); - $allowed = array(); - foreach ($allowed_values as $value) { - $allowed[$value] = true; - } - $def = HTMLPurifier_ConfigSchema::instance(); - $def->addAllowedValues($namespace, $name, $allowed); - } - - /** @see HTMLPurifier_ConfigSchema->addAlias() */ - public static function defineAlias($namespace, $name, $new_namespace, $new_name) { - HTMLPurifier_ConfigSchema::deprecated(__METHOD__); - $def = HTMLPurifier_ConfigSchema::instance(); - $def->addAlias($namespace, $name, $new_namespace, $new_name); - } - - /** @deprecated, use HTMLPurifier_VarParser->parse() */ - public function validate($a, $b, $c = false) { - trigger_error("HTMLPurifier_ConfigSchema->validate deprecated, use HTMLPurifier_VarParser->parse instead", E_USER_NOTICE); - $parser = new HTMLPurifier_VarParser(); - return $parser->parse($a, $b, $c); - } - - /** - * Throws an E_USER_NOTICE stating that a method is deprecated. - */ - private static function deprecated($method) { - trigger_error("Static HTMLPurifier_ConfigSchema::$method deprecated, use add*() method instead", E_USER_NOTICE); - } - } // vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php old mode 100755 new mode 100644 index 987f547bc..c05668a70 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php @@ -9,36 +9,28 @@ class HTMLPurifier_ConfigSchema_Builder_ConfigSchema public function build($interchange) { $schema = new HTMLPurifier_ConfigSchema(); - foreach ($interchange->namespaces as $n) { - $schema->addNamespace($n->namespace); - } foreach ($interchange->directives as $d) { $schema->add( - $d->id->namespace, - $d->id->directive, + $d->id->key, $d->default, $d->type, $d->typeAllowsNull ); if ($d->allowed !== null) { $schema->addAllowedValues( - $d->id->namespace, - $d->id->directive, + $d->id->key, $d->allowed ); } foreach ($d->aliases as $alias) { $schema->addAlias( - $alias->namespace, - $alias->directive, - $d->id->namespace, - $d->id->directive + $alias->key, + $d->id->key ); } if ($d->valueAliases !== null) { $schema->addValueAliases( - $d->id->namespace, - $d->id->directive, + $d->id->key, $d->valueAliases ); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php old mode 100755 new mode 100644 index 51bcab78c..244561a37 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Builder/Xml.php @@ -8,6 +8,7 @@ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter { protected $interchange; + private $namespace; protected function writeHTMLDiv($html) { $this->startElement('div'); @@ -34,36 +35,33 @@ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter $this->startElement('configdoc'); $this->writeElement('title', $interchange->name); - foreach ($interchange->namespaces as $namespace) { - $this->buildNamespace($namespace); + foreach ($interchange->directives as $directive) { + $this->buildDirective($directive); } + if ($this->namespace) $this->endElement(); // namespace + $this->endElement(); // configdoc $this->flush(); } - public function buildNamespace($namespace) { - $this->startElement('namespace'); - $this->writeAttribute('id', $namespace->namespace); + public function buildDirective($directive) { - $this->writeElement('name', $namespace->namespace); - $this->startElement('description'); - $this->writeHTMLDiv($namespace->description); - $this->endElement(); // description - - foreach ($this->interchange->directives as $directive) { - if ($directive->id->namespace !== $namespace->namespace) continue; - $this->buildDirective($directive); + // Kludge, although I suppose having a notion of a "root namespace" + // certainly makes things look nicer when documentation is built. + // Depends on things being sorted. + if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) { + if ($this->namespace) $this->endElement(); // namespace + $this->namespace = $directive->id->getRootNamespace(); + $this->startElement('namespace'); + $this->writeAttribute('id', $this->namespace); + $this->writeElement('name', $this->namespace); } - $this->endElement(); // namespace - } - - public function buildDirective($directive) { $this->startElement('directive'); $this->writeAttribute('id', $directive->id->toString()); - $this->writeElement('name', $directive->id->directive); + $this->writeElement('name', $directive->id->getDirective()); $this->startElement('aliases'); foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString()); diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Exception.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Exception.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange.php old mode 100755 new mode 100644 index 365c66357..91a5aa730 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange.php @@ -13,26 +13,11 @@ class HTMLPurifier_ConfigSchema_Interchange */ public $name; - /** - * Array of Namespace ID => array(namespace info) - */ - public $namespaces = array(); - /** * Array of Directive ID => array(directive info) */ public $directives = array(); - /** - * Adds a namespace array to $namespaces - */ - public function addNamespace($namespace) { - if (isset($this->namespaces[$i = $namespace->namespace])) { - throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine namespace '$i'"); - } - $this->namespaces[$i] = $namespace; - } - /** * Adds a directive array to $directives */ diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Directive.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php old mode 100755 new mode 100644 index ec01589b6..b9b3c6f5c --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Interchange/Id.php @@ -6,11 +6,10 @@ class HTMLPurifier_ConfigSchema_Interchange_Id { - public $namespace, $directive; + public $key; - public function __construct($namespace, $directive) { - $this->namespace = $namespace; - $this->directive = $directive; + public function __construct($key) { + $this->key = $key; } /** @@ -18,12 +17,19 @@ class HTMLPurifier_ConfigSchema_Interchange_Id * cause problems for PHP 5.0 support. */ public function toString() { - return $this->namespace . '.' . $this->directive; + return $this->key; + } + + public function getRootNamespace() { + return substr($this->key, 0, strpos($this->key, ".")); + } + + public function getDirective() { + return substr($this->key, strpos($this->key, ".") + 1); } public static function make($id) { - list($namespace, $directive) = explode('.', $id); - return new HTMLPurifier_ConfigSchema_Interchange_Id($namespace, $directive); + return new HTMLPurifier_ConfigSchema_Interchange_Id($id); } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php old mode 100755 new mode 100644 index a1a24eacb..785b72ce8 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php @@ -13,13 +13,17 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder } public static function buildFromDirectory($dir = null) { - $parser = new HTMLPurifier_StringHashParser(); $builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder(); $interchange = new HTMLPurifier_ConfigSchema_Interchange(); + return $builder->buildDir($interchange, $dir); + } - if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema/'; - $info = parse_ini_file($dir . 'info.ini'); - $interchange->name = $info['name']; + public function buildDir($interchange, $dir = null) { + if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema'; + if (file_exists($dir . '/info.ini')) { + $info = parse_ini_file($dir . '/info.ini'); + $interchange->name = $info['name']; + } $files = array(); $dh = opendir($dir); @@ -33,15 +37,20 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder sort($files); foreach ($files as $file) { - $builder->build( - $interchange, - new HTMLPurifier_StringHash( $parser->parseFile($dir . $file) ) - ); + $this->buildFile($interchange, $dir . '/' . $file); } return $interchange; } + public function buildFile($interchange, $file) { + $parser = new HTMLPurifier_StringHashParser(); + $this->build( + $interchange, + new HTMLPurifier_StringHash( $parser->parseFile($file) ) + ); + } + /** * Builds an interchange object based on a hash. * @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build @@ -55,22 +64,17 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID'); } if (strpos($hash['ID'], '.') === false) { - $this->buildNamespace($interchange, $hash); + if (count($hash) == 2 && isset($hash['DESCRIPTION'])) { + $hash->offsetGet('DESCRIPTION'); // prevent complaining + } else { + throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace'); + } } else { $this->buildDirective($interchange, $hash); } $this->_findUnused($hash); } - public function buildNamespace($interchange, $hash) { - $namespace = new HTMLPurifier_ConfigSchema_Interchange_Namespace(); - $namespace->namespace = $hash->offsetGet('ID'); - if (isset($hash['DESCRIPTION'])) { - $namespace->description = $hash->offsetGet('DESCRIPTION'); - } - $interchange->addNamespace($namespace); - } - public function buildDirective($interchange, $hash) { $directive = new HTMLPurifier_ConfigSchema_Interchange_Directive(); diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Validator.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Validator.php old mode 100755 new mode 100644 index 2dfd37bae..f374f6a02 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Validator.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/Validator.php @@ -39,10 +39,6 @@ class HTMLPurifier_ConfigSchema_Validator $this->aliases = array(); // PHP is a bit lax with integer <=> string conversions in // arrays, so we don't use the identical !== comparison - foreach ($interchange->namespaces as $i => $namespace) { - if ($i != $namespace->namespace) $this->error(false, "Integrity violation: key '$i' does not match internal id '{$namespace->namespace}'"); - $this->validateNamespace($namespace); - } foreach ($interchange->directives as $i => $directive) { $id = $directive->id->toString(); if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'"); @@ -51,20 +47,6 @@ class HTMLPurifier_ConfigSchema_Validator return true; } - /** - * Validates a HTMLPurifier_ConfigSchema_Interchange_Namespace object. - */ - public function validateNamespace($n) { - $this->context[] = "namespace '{$n->namespace}'"; - $this->with($n, 'namespace') - ->assertNotEmpty() - ->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder - $this->with($n, 'description') - ->assertNotEmpty() - ->assertIsString(); // handled by InterchangeBuilder - array_pop($this->context); - } - /** * Validates a HTMLPurifier_ConfigSchema_Interchange_Id object. */ @@ -75,12 +57,11 @@ class HTMLPurifier_ConfigSchema_Validator // handled by InterchangeBuilder $this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id'); } - if (!isset($this->interchange->namespaces[$id->namespace])) { - $this->error('namespace', 'does not exist'); // assumes that the namespace was validated already - } - $this->with($id, 'directive') + // keys are now unconstrained (we might want to narrow down to A-Za-z0-9.) + // we probably should check that it has at least one namespace + $this->with($id, 'key') ->assertNotEmpty() - ->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder + ->assertIsString(); // implicit assertIsString handled by InterchangeBuilder array_pop($this->context); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser old mode 100755 new mode 100644 index 1eaecd11f..245ba5d2d Binary files a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser and b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser differ diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt new file mode 100644 index 000000000..0517fed0a --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt @@ -0,0 +1,8 @@ +Attr.AllowedClasses +TYPE: lookup/null +VERSION: 4.0.0 +DEFAULT: null +--DESCRIPTION-- +List of allowed class values in the class attribute. By default, this is null, +which means all classes are allowed. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt new file mode 100644 index 000000000..e774b823b --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt @@ -0,0 +1,19 @@ +Attr.ClassUseCDATA +TYPE: bool/null +DEFAULT: null +VERSION: 4.0.0 +--DESCRIPTION-- +If null, class will auto-detect the doctype and, if matching XHTML 1.1 or +XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise, +it will use a relaxed CDATA definition. If true, the relaxed CDATA definition +is forced; if false, the NMTOKENS definition is forced. To get behavior +of HTML Purifier prior to 4.0.0, set this directive to false. + +Some rational behind the auto-detection: +in previous versions of HTML Purifier, it was assumed that the form of +class was NMTOKENS, as specified by the XHTML Modularization (representing +XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however +specify class as CDATA. HTML 5 effectively defines it as CDATA, but +with the additional constraint that each name should be unique (this is not +explicitly outlined in previous specifications). +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt new file mode 100644 index 000000000..f31d226f5 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt @@ -0,0 +1,8 @@ +Attr.ForbiddenClasses +TYPE: lookup +VERSION: 4.0.0 +DEFAULT: array() +--DESCRIPTION-- +List of forbidden class values in the class attribute. By default, this is +empty, which means that no classes are forbidden. See also %Attr.AllowedClasses. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt new file mode 100644 index 000000000..db58b1346 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt @@ -0,0 +1,12 @@ +AutoFormat.PurifierLinkify.DocURL +TYPE: string +VERSION: 2.0.1 +DEFAULT: '#%s' +ALIASES: AutoFormatParam.PurifierLinkifyDocURL +--DESCRIPTION-- +

+ Location of configuration documentation to link to, let %s substitute + into the configuration's namespace and directive names sans the percent + sign. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt new file mode 100644 index 000000000..35c393b4e --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt @@ -0,0 +1,11 @@ +AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions +TYPE: lookup +VERSION: 4.0.0 +DEFAULT: array('td' => true, 'th' => true) +--DESCRIPTION-- +

+ When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp + are enabled, this directive defines what HTML elements should not be + removede if they have only a non-breaking space in them. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt new file mode 100644 index 000000000..ca17eb1dc --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt @@ -0,0 +1,15 @@ +AutoFormat.RemoveEmpty.RemoveNbsp +TYPE: bool +VERSION: 4.0.0 +DEFAULT: false +--DESCRIPTION-- +

+ When enabled, HTML Purifier will treat any elements that contain only + non-breaking spaces as well as regular whitespace as empty, and remove + them when %AutoForamt.RemoveEmpty is enabled. +

+

+ See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements + that don't have this behavior applied to them. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt old mode 100755 new mode 100644 index aaede47d6..34657ba47 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt @@ -31,7 +31,8 @@ DEFAULT: false

Elements that contain only whitespace will be treated as empty. Non-breaking - spaces, however, do not count as whitespace. + spaces, however, do not count as whitespace. See + %AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.

This algorithm is not perfect; you may still notice some empty tags, @@ -39,7 +40,7 @@ DEFAULT: false because they were not permitted in that context, or tags that, after being auto-closed by another tag, where empty. This is for safety reasons to prevent clever code from breaking validation. The general rule of thumb: - if a tag looked empty on the way end, it will get removed; if HTML Purifier + if a tag looked empty on the way in, it will get removed; if HTML Purifier made it empty, it will stay.

--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt new file mode 100644 index 000000000..dde990ab2 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt @@ -0,0 +1,11 @@ +AutoFormat.RemoveSpansWithoutAttributes +TYPE: bool +VERSION: 4.0.1 +DEFAULT: false +--DESCRIPTION-- +

+ This directive causes span tags without any attributes + to be removed. It will also remove spans that had all attributes + removed during processing. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedFonts.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedFonts.txt new file mode 100644 index 000000000..3fd465406 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedFonts.txt @@ -0,0 +1,12 @@ +CSS.AllowedFonts +TYPE: lookup/null +VERSION: 4.3.0 +DEFAULT: NULL +--DESCRIPTION-- +

+ Allows you to manually specify a set of allowed fonts. If + NULL, all fonts are allowed. This directive + affects generic names (serif, sans-serif, monospace, cursive, + fantasy) as well as specific font families. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.ForbiddenProperties.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.ForbiddenProperties.txt new file mode 100644 index 000000000..f1f5c5f12 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.ForbiddenProperties.txt @@ -0,0 +1,13 @@ +CSS.ForbiddenProperties +TYPE: lookup +VERSION: 4.2.0 +DEFAULT: array() +--DESCRIPTION-- +

+ This is the logical inverse of %CSS.AllowedProperties, and it will + override that directive or any other directive. If possible, + %CSS.AllowedProperties is recommended over this directive, + because it can sometimes be difficult to tell whether or not you've + forbidden all of the CSS properties you truly would like to disallow. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt new file mode 100644 index 000000000..e733a61e8 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt @@ -0,0 +1,9 @@ +CSS.Trusted +TYPE: bool +VERSION: 4.2.1 +DEFAULT: false +--DESCRIPTION-- +Indicates whether or not the user's CSS input is trusted or not. If the +input is trusted, a more expansive set of allowed properties. See +also %HTML.Trusted. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt new file mode 100644 index 000000000..b2b83d9ab --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt @@ -0,0 +1,11 @@ +Cache.SerializerPermissions +TYPE: int +VERSION: 4.3.0 +DEFAULT: 0755 +--DESCRIPTION-- + +

+ Directory permissions of the files and directories created inside + the DefinitionCache/Serializer or other custom serializer path. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.Language.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.Language.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.NormalizeNewlines.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.NormalizeNewlines.txt new file mode 100644 index 000000000..d77f5360d --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.NormalizeNewlines.txt @@ -0,0 +1,11 @@ +Core.NormalizeNewlines +TYPE: bool +VERSION: 4.2.0 +DEFAULT: true +--DESCRIPTION-- +

+ Whether or not to normalize newlines to the operating + system default. When false, HTML Purifier + will attempt to preserve mixed newline files. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt new file mode 100644 index 000000000..3397d9f71 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt @@ -0,0 +1,11 @@ +Core.RemoveProcessingInstructions +TYPE: bool +VERSION: 4.2.0 +DEFAULT: false +--DESCRIPTION-- +Instead of escaping processing instructions in the form <? ... +?>, remove it out-right. This may be useful if the HTML +you are validating contains XML processing instruction gunk, however, +it can also be user-unfriendly for people attempting to post PHP +snippets. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt new file mode 100644 index 000000000..16829bcda --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt @@ -0,0 +1,14 @@ +Filter.ExtractStyleBlocks.Escaping +TYPE: bool +VERSION: 3.0.0 +DEFAULT: true +ALIASES: Filter.ExtractStyleBlocksEscaping, FilterParam.ExtractStyleBlocksEscaping +--DESCRIPTION-- + +

+ Whether or not to escape the dangerous characters <, > and & + as \3C, \3E and \26, respectively. This is can be safely set to false + if the contents of StyleBlocks will be placed in an external stylesheet, + where there is no risk of it being interpreted as HTML. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt new file mode 100644 index 000000000..7f95f54d1 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt @@ -0,0 +1,29 @@ +Filter.ExtractStyleBlocks.Scope +TYPE: string/null +VERSION: 3.0.0 +DEFAULT: NULL +ALIASES: Filter.ExtractStyleBlocksScope, FilterParam.ExtractStyleBlocksScope +--DESCRIPTION-- + +

+ If you would like users to be able to define external stylesheets, but + only allow them to specify CSS declarations for a specific node and + prevent them from fiddling with other elements, use this directive. + It accepts any valid CSS selector, and will prepend this to any + CSS declaration extracted from the document. For example, if this + directive is set to #user-content and a user uses the + selector a:hover, the final selector will be + #user-content a:hover. +

+

+ The comma shorthand may be used; consider the above example, with + #user-content, #user-content2, the final selector will + be #user-content a:hover, #user-content2 a:hover. +

+

+ Warning: It is possible for users to bypass this measure + using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML + Purifier, and I am working to get it fixed. Until then, HTML Purifier + performs a basic check to prevent this. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt new file mode 100644 index 000000000..6c231b2d7 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt @@ -0,0 +1,16 @@ +Filter.ExtractStyleBlocks.TidyImpl +TYPE: mixed/null +VERSION: 3.1.0 +DEFAULT: NULL +ALIASES: FilterParam.ExtractStyleBlocksTidyImpl +--DESCRIPTION-- +

+ If left NULL, HTML Purifier will attempt to instantiate a csstidy + class to use for internal cleaning. This will usually be good enough. +

+

+ However, for trusted user input, you can set this to false to + disable cleaning. In addition, you can supply your own concrete implementation + of Tidy's interface to use, although I don't know why you'd want to do that. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt old mode 100755 new mode 100644 index 7fa6536b2..321eaa2d8 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt @@ -3,6 +3,11 @@ TYPE: bool VERSION: 3.1.0 DEFAULT: false --DESCRIPTION-- +

+ Warning: Deprecated in favor of %HTML.SafeObject and + %Output.FlashCompat (turn both on to allow YouTube videos and other + Flash content). +

This directive enables YouTube video embedding in HTML Purifier. Check this document diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt old mode 100755 new mode 100644 index 3e231d2d1..0b2c106da --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt @@ -5,11 +5,14 @@ DEFAULT: NULL --DESCRIPTION--

- This is a convenience directive that rolls the functionality of - %HTML.AllowedElements and %HTML.AllowedAttributes into one directive. + This is a preferred convenience directive that combines + %HTML.AllowedElements and %HTML.AllowedAttributes. Specify elements and attributes that are allowed using: - element1[attr1|attr2],element2.... You can also use - newlines instead of commas to separate elements. + element1[attr1|attr2],element2.... For example, + if you would like to only allow paragraphs and links, specify + a[href],p. You can specify attributes that apply + to all elements using an asterisk, e.g. *[lang]. + You can also use newlines instead of commas to separate elements.

Warning: diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt old mode 100755 new mode 100644 index 888d55819..1d3fa7907 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt @@ -4,12 +4,17 @@ VERSION: 1.3.0 DEFAULT: NULL --DESCRIPTION--

- If HTML Purifier's tag set is unsatisfactory for your needs, you - can overload it with your own list of tags to allow. Note that this - method is subtractive: it does its job by taking away from HTML Purifier - usual feature set, so you cannot add a tag that HTML Purifier never - supported in the first place (like embed, form or head). If you - change this, you probably also want to change %HTML.AllowedAttributes. + If HTML Purifier's tag set is unsatisfactory for your needs, you can + overload it with your own list of tags to allow. If you change + this, you probably also want to change %HTML.AllowedAttributes; see + also %HTML.Allowed which lets you set allowed elements and + attributes at the same time. +

+

+ If you attempt to allow an element that HTML Purifier does not know + about, HTML Purifier will raise an error. You will need to manually + tell HTML Purifier about this element by using the + advanced customization features.

Warning: If another directive conflicts with the diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt new file mode 100644 index 000000000..151fb7b82 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt @@ -0,0 +1,11 @@ +HTML.Attr.Name.UseCDATA +TYPE: bool +DEFAULT: false +VERSION: 4.0.0 +--DESCRIPTION-- +The W3C specification DTD defines the name attribute to be CDATA, not ID, due +to limitations of DTD. In certain documents, this relaxed behavior is desired, +whether it is to specify duplicate names, or to specify names that would be +illegal IDs (for example, names that begin with a digit.) Set this configuration +directive to true to use the relaxed parsing rules. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.FlashAllowFullScreen.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.FlashAllowFullScreen.txt new file mode 100644 index 000000000..7878dc0bf --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.FlashAllowFullScreen.txt @@ -0,0 +1,11 @@ +HTML.FlashAllowFullScreen +TYPE: bool +VERSION: 4.2.0 +DEFAULT: false +--DESCRIPTION-- +

+ Whether or not to permit embedded Flash content from + %HTML.SafeObject to expand to the full screen. Corresponds to + the allowFullScreen parameter. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt new file mode 100644 index 000000000..700b30924 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt @@ -0,0 +1,7 @@ +HTML.Nofollow +TYPE: bool +VERSION: 4.3.0 +DEFAULT: FALSE +--DESCRIPTION-- +If enabled, nofollow rel attributes are added to all outgoing links. +--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt old mode 100755 new mode 100644 index f635a6854..cdda09a4c --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt @@ -7,8 +7,7 @@ DEFAULT: false Whether or not to permit embed tags in documents, with a number of extra security features added to prevent script execution. This is similar to what websites like MySpace do to embed tags. Embed is a proprietary - element and will cause your website to stop validating. You probably want - to enable this with %HTML.SafeObject. - Highly experimental. -

+ element and will cause your website to stop validating; you should + see if you can use %Output.FlashCompat with %HTML.SafeObject instead + first.

--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt old mode 100755 new mode 100644 index 32967b88f..ceb342e22 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt @@ -6,9 +6,8 @@ DEFAULT: false

Whether or not to permit object tags in documents, with a number of extra security features added to prevent script execution. This is similar to - what websites like MySpace do to object tags. You may also want to - enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer, - although embed tags will cause your website to stop validating. - Highly experimental. + what websites like MySpace do to object tags. You should also enable + %Output.FlashCompat in order to generate Internet Explorer + compatibility code for your object tags.

--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt old mode 100755 new mode 100644 index 89133b1a3..1db9237e9 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt @@ -5,4 +5,5 @@ DEFAULT: false --DESCRIPTION-- Indicates whether or not the user input is trusted or not. If the input is trusted, a more expansive set of allowed tags and attributes will be used. +See also %CSS.Trusted. --# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt new file mode 100644 index 000000000..d6f0d9f29 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt @@ -0,0 +1,15 @@ +Output.FixInnerHTML +TYPE: bool +VERSION: 4.3.0 +DEFAULT: true +--DESCRIPTION-- +

+ If true, HTML Purifier will protect against Internet Explorer's + mishandling of the innerHTML attribute by appending + a space to any attribute that does not contain angled brackets, spaces + or quotes, but contains a backtick. This slightly changes the + semantics of any given attribute, so if this is unacceptable and + you do not use innerHTML on any of your pages, you can + turn this directive off. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt new file mode 100644 index 000000000..93398e859 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt @@ -0,0 +1,11 @@ +Output.FlashCompat +TYPE: bool +VERSION: 4.1.0 +DEFAULT: false +--DESCRIPTION-- +

+ If true, HTML Purifier will generate Internet Explorer compatibility + code for all object code. This is highly recommended if you enable + %HTML.SafeObject. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt old mode 100755 new mode 100644 index 98fdfe922..666635a5f --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt @@ -12,4 +12,6 @@ array ( --DESCRIPTION-- Whitelist that defines the schemes that a URI is allowed to have. This prevents XSS attacks from using pseudo-schemes like javascript or mocha. +There is also support for the data and file +URI schemes, but they are not enabled by default. --# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.Base.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.Base.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt old mode 100755 new mode 100644 index 51e6ea91f..f891de499 --- a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt +++ b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt @@ -1,12 +1,15 @@ URI.DisableResources TYPE: bool -VERSION: 1.3.0 +VERSION: 4.2.0 DEFAULT: false --DESCRIPTION-- -

Disables embedding resources, essentially meaning no pictures. You can still link to them though. See %URI.DisableExternalResources for why this might be a good idea.

+

+ Note: While this directive has been available since 1.3.0, + it didn't actually start doing anything until 4.2.0. +

--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.Host.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.Host.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/info.ini b/lib/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/info.ini old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ContentSets.php b/lib/htmlpurifier/library/HTMLPurifier/ContentSets.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Context.php b/lib/htmlpurifier/library/HTMLPurifier/Context.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Definition.php b/lib/htmlpurifier/library/HTMLPurifier/Definition.php old mode 100755 new mode 100644 index a7408c974..c7f82eba4 --- a/lib/htmlpurifier/library/HTMLPurifier/Definition.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Definition.php @@ -12,6 +12,17 @@ abstract class HTMLPurifier_Definition */ public $setup = false; + /** + * If true, write out the final definition object to the cache after + * setup. This will be true only if all invocations to get a raw + * definition object are also optimized. This does not cause file + * system thrashing because on subsequent calls the cached object + * is used and any writes to the raw definition object are short + * circuited. See enduser-customize.html for the high-level + * picture. + */ + public $optimized = null; + /** * What type of definition is it? */ diff --git a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache.php b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache.php old mode 100755 new mode 100644 index fa5a087bb..c6e1e388c --- a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache.php +++ b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache.php @@ -28,7 +28,7 @@ abstract class HTMLPurifier_DefinitionCache public function generateKey($config) { return $config->version . ',' . // possibly replace with function calls $config->getBatchSerial($this->type) . ',' . - $config->get($this->type, 'DefinitionRev'); + $config->get($this->type . '.DefinitionRev'); } /** @@ -46,7 +46,7 @@ abstract class HTMLPurifier_DefinitionCache // versions match, ids match, check revision number if ( $hash == $config->getBatchSerial($this->type) && - $revision < $config->get($this->type, 'DefinitionRev') + $revision < $config->get($this->type . '.DefinitionRev') ) return true; return false; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator.php b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Memory.php b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Memory.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Template.php.in b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Template.php.in old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Null.php b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Null.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php old mode 100755 new mode 100644 index acbbe2c3b..73d5e90a6 --- a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php +++ b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php @@ -9,14 +9,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends $file = $this->generateFilePath($config); if (file_exists($file)) return false; if (!$this->_prepareDir($config)) return false; - return $this->_write($file, serialize($def)); + return $this->_write($file, serialize($def), $config); } public function set($def, $config) { if (!$this->checkDefType($def)) return; $file = $this->generateFilePath($config); if (!$this->_prepareDir($config)) return false; - return $this->_write($file, serialize($def)); + return $this->_write($file, serialize($def), $config); } public function replace($def, $config) { @@ -24,7 +24,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends $file = $this->generateFilePath($config); if (!file_exists($file)) return false; if (!$this->_prepareDir($config)) return false; - return $this->_write($file, serialize($def)); + return $this->_write($file, serialize($def), $config); } public function get($config) { @@ -88,7 +88,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends * @todo Make protected */ public function generateBaseDirectoryPath($config) { - $base = $config->get('Cache', 'SerializerPath'); + $base = $config->get('Cache.SerializerPath'); $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base; return $base; } @@ -97,18 +97,34 @@ class HTMLPurifier_DefinitionCache_Serializer extends * Convenience wrapper function for file_put_contents * @param $file File name to write to * @param $data Data to write into file + * @param $config Config object * @return Number of bytes written if success, or false if failure. */ - private function _write($file, $data) { - return file_put_contents($file, $data); + private function _write($file, $data, $config) { + $result = file_put_contents($file, $data); + if ($result !== false) { + // set permissions of the new file (no execute) + $chmod = $config->get('Cache.SerializerPermissions'); + if (!$chmod) { + $chmod = 0644; // invalid config or simpletest + } + $chmod = $chmod & 0666; + chmod($file, $chmod); + } + return $result; } /** * Prepares the directory that this type stores the serials in + * @param $config Config object * @return True if successful */ private function _prepareDir($config) { $directory = $this->generateDirectoryPath($config); + $chmod = $config->get('Cache.SerializerPermissions'); + if (!$chmod) { + $chmod = 0755; // invalid config or simpletest + } if (!is_dir($directory)) { $base = $this->generateBaseDirectoryPath($config); if (!is_dir($base)) { @@ -116,13 +132,13 @@ class HTMLPurifier_DefinitionCache_Serializer extends please create or change using %Cache.SerializerPath', E_USER_WARNING); return false; - } elseif (!$this->_testPermissions($base)) { + } elseif (!$this->_testPermissions($base, $chmod)) { return false; } - $old = umask(0022); // disable group and world writes - mkdir($directory); + $old = umask(0000); + mkdir($directory, $chmod); umask($old); - } elseif (!$this->_testPermissions($directory)) { + } elseif (!$this->_testPermissions($directory, $chmod)) { return false; } return true; @@ -131,8 +147,11 @@ class HTMLPurifier_DefinitionCache_Serializer extends /** * Tests permissions on a directory and throws out friendly * error messages and attempts to chmod it itself if possible + * @param $dir Directory path + * @param $chmod Permissions + * @return True if directory writable */ - private function _testPermissions($dir) { + private function _testPermissions($dir, $chmod) { // early abort, if it is writable, everything is hunky-dory if (is_writable($dir)) return true; if (!is_dir($dir)) { @@ -146,17 +165,17 @@ class HTMLPurifier_DefinitionCache_Serializer extends // POSIX system, we can give more specific advice if (fileowner($dir) === posix_getuid()) { // we can chmod it ourselves - chmod($dir, 0755); - return true; + $chmod = $chmod | 0700; + if (chmod($dir, $chmod)) return true; } elseif (filegroup($dir) === posix_getgid()) { - $chmod = '775'; + $chmod = $chmod | 0070; } else { // PHP's probably running as nobody, so we'll // need to give global permissions - $chmod = '777'; + $chmod = $chmod | 0777; } trigger_error('Directory '.$dir.' not writable, '. - 'please chmod to ' . $chmod, + 'please chmod to ' . decoct($chmod), E_USER_WARNING); } else { // generic error message diff --git a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCacheFactory.php b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCacheFactory.php old mode 100755 new mode 100644 index 3adefbb66..a6ead6281 --- a/lib/htmlpurifier/library/HTMLPurifier/DefinitionCacheFactory.php +++ b/lib/htmlpurifier/library/HTMLPurifier/DefinitionCacheFactory.php @@ -46,7 +46,7 @@ class HTMLPurifier_DefinitionCacheFactory * @param $config Instance of HTMLPurifier_Config */ public function create($type, $config) { - $method = $config->get('Cache', 'DefinitionImpl'); + $method = $config->get('Cache.DefinitionImpl'); if ($method === null) { return new HTMLPurifier_DefinitionCache_Null($type); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/Doctype.php b/lib/htmlpurifier/library/HTMLPurifier/Doctype.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/DoctypeRegistry.php b/lib/htmlpurifier/library/HTMLPurifier/DoctypeRegistry.php old mode 100755 new mode 100644 index d6552aa55..86049e939 --- a/lib/htmlpurifier/library/HTMLPurifier/DoctypeRegistry.php +++ b/lib/htmlpurifier/library/HTMLPurifier/DoctypeRegistry.php @@ -80,17 +80,17 @@ class HTMLPurifier_DoctypeRegistry */ public function getDoctypeFromConfig($config) { // recommended test - $doctype = $config->get('HTML', 'Doctype'); + $doctype = $config->get('HTML.Doctype'); if (!empty($doctype)) return $doctype; - $doctype = $config->get('HTML', 'CustomDoctype'); + $doctype = $config->get('HTML.CustomDoctype'); if (!empty($doctype)) return $doctype; // backwards-compatibility - if ($config->get('HTML', 'XHTML')) { + if ($config->get('HTML.XHTML')) { $doctype = 'XHTML 1.0'; } else { $doctype = 'HTML 4.01'; } - if ($config->get('HTML', 'Strict')) { + if ($config->get('HTML.Strict')) { $doctype .= ' Strict'; } else { $doctype .= ' Transitional'; diff --git a/lib/htmlpurifier/library/HTMLPurifier/ElementDef.php b/lib/htmlpurifier/library/HTMLPurifier/ElementDef.php old mode 100755 new mode 100644 index b55c7bd79..5498d9567 --- a/lib/htmlpurifier/library/HTMLPurifier/ElementDef.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ElementDef.php @@ -97,6 +97,13 @@ class HTMLPurifier_ElementDef */ public $autoclose = array(); + /** + * If a foreign element is found in this element, test if it is + * allowed by this sub-element; if it is, instead of closing the + * current element, place it inside this element. + */ + public $wrap; + /** * Whether or not this is a formatting element affected by the * "Active Formatting Elements" algorithm. @@ -142,7 +149,8 @@ class HTMLPurifier_ElementDef $this->_mergeAssocArray($this->excludes, $def->excludes); if(!empty($def->content_model)) { - $this->content_model .= ' | ' . $def->content_model; + $this->content_model = + str_replace("#SUPER", $this->content_model, $def->content_model); $this->child = false; } if(!empty($def->content_model_type)) { diff --git a/lib/htmlpurifier/library/HTMLPurifier/Encoder.php b/lib/htmlpurifier/library/HTMLPurifier/Encoder.php old mode 100755 new mode 100644 index 79a42d26e..2b3140caa --- a/lib/htmlpurifier/library/HTMLPurifier/Encoder.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Encoder.php @@ -17,7 +17,7 @@ class HTMLPurifier_Encoder /** * Error-handler that mutes errors, alternative to shut-up operator. */ - private static function muteErrorHandler() {} + public static function muteErrorHandler() {} /** * Cleans a UTF-8 string for well-formedness and SGML validity @@ -264,12 +264,12 @@ class HTMLPurifier_Encoder * Converts a string to UTF-8 based on configuration. */ public static function convertToUTF8($str, $config, $context) { - $encoding = $config->get('Core', 'Encoding'); + $encoding = $config->get('Core.Encoding'); if ($encoding === 'utf-8') return $str; static $iconv = null; if ($iconv === null) $iconv = function_exists('iconv'); set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); - if ($iconv && !$config->get('Test', 'ForceNoIconv')) { + if ($iconv && !$config->get('Test.ForceNoIconv')) { $str = iconv($encoding, 'utf-8//IGNORE', $str); if ($str === false) { // $encoding is not a valid encoding @@ -297,15 +297,15 @@ class HTMLPurifier_Encoder * characters being omitted. */ public static function convertFromUTF8($str, $config, $context) { - $encoding = $config->get('Core', 'Encoding'); + $encoding = $config->get('Core.Encoding'); if ($encoding === 'utf-8') return $str; static $iconv = null; if ($iconv === null) $iconv = function_exists('iconv'); - if ($escape = $config->get('Core', 'EscapeNonASCIICharacters')) { + if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str); } set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); - if ($iconv && !$config->get('Test', 'ForceNoIconv')) { + if ($iconv && !$config->get('Test.ForceNoIconv')) { // Undo our previous fix in convertToUTF8, otherwise iconv will barf $ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding); if (!$escape && !empty($ascii_fix)) { diff --git a/lib/htmlpurifier/library/HTMLPurifier/EntityLookup.php b/lib/htmlpurifier/library/HTMLPurifier/EntityLookup.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/EntityLookup/entities.ser b/lib/htmlpurifier/library/HTMLPurifier/EntityLookup/entities.ser old mode 100755 new mode 100644 index f2b8b8f2d..e8b08128b --- a/lib/htmlpurifier/library/HTMLPurifier/EntityLookup/entities.ser +++ b/lib/htmlpurifier/library/HTMLPurifier/EntityLookup/entities.ser @@ -1 +1 @@ -a:246:{s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"­";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"‌";s:3:"zwj";s:3:"‍";s:3:"lrm";s:3:"‎";s:3:"rlm";s:3:"‏";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"fnof";s:2:"ƒ";s:5:"Alpha";s:2:"Α";s:4:"Beta";s:2:"Β";s:5:"Gamma";s:2:"Γ";s:5:"Delta";s:2:"Δ";s:7:"Epsilon";s:2:"Ε";s:4:"Zeta";s:2:"Ζ";s:3:"Eta";s:2:"Η";s:5:"Theta";s:2:"Θ";s:4:"Iota";s:2:"Ι";s:5:"Kappa";s:2:"Κ";s:6:"Lambda";s:2:"Λ";s:2:"Mu";s:2:"Μ";s:2:"Nu";s:2:"Ν";s:2:"Xi";s:2:"Ξ";s:7:"Omicron";s:2:"Ο";s:2:"Pi";s:2:"Π";s:3:"Rho";s:2:"Ρ";s:5:"Sigma";s:2:"Σ";s:3:"Tau";s:2:"Τ";s:7:"Upsilon";s:2:"Υ";s:3:"Phi";s:2:"Φ";s:3:"Chi";s:2:"Χ";s:3:"Psi";s:2:"Ψ";s:5:"Omega";s:2:"Ω";s:5:"alpha";s:2:"α";s:4:"beta";s:2:"β";s:5:"gamma";s:2:"γ";s:5:"delta";s:2:"δ";s:7:"epsilon";s:2:"ε";s:4:"zeta";s:2:"ζ";s:3:"eta";s:2:"η";s:5:"theta";s:2:"θ";s:4:"iota";s:2:"ι";s:5:"kappa";s:2:"κ";s:6:"lambda";s:2:"λ";s:2:"mu";s:2:"μ";s:2:"nu";s:2:"ν";s:2:"xi";s:2:"ξ";s:7:"omicron";s:2:"ο";s:2:"pi";s:2:"π";s:3:"rho";s:2:"ρ";s:6:"sigmaf";s:2:"ς";s:5:"sigma";s:2:"σ";s:3:"tau";s:2:"τ";s:7:"upsilon";s:2:"υ";s:3:"phi";s:2:"φ";s:3:"chi";s:2:"χ";s:3:"psi";s:2:"ψ";s:5:"omega";s:2:"ω";s:8:"thetasym";s:2:"ϑ";s:5:"upsih";s:2:"ϒ";s:3:"piv";s:2:"ϖ";s:4:"bull";s:3:"•";s:6:"hellip";s:3:"…";s:5:"prime";s:3:"′";s:5:"Prime";s:3:"″";s:5:"oline";s:3:"‾";s:5:"frasl";s:3:"⁄";s:6:"weierp";s:3:"℘";s:5:"image";s:3:"ℑ";s:4:"real";s:3:"ℜ";s:5:"trade";s:3:"™";s:7:"alefsym";s:3:"ℵ";s:4:"larr";s:3:"←";s:4:"uarr";s:3:"↑";s:4:"rarr";s:3:"→";s:4:"darr";s:3:"↓";s:4:"harr";s:3:"↔";s:5:"crarr";s:3:"↵";s:4:"lArr";s:3:"⇐";s:4:"uArr";s:3:"⇑";s:4:"rArr";s:3:"⇒";s:4:"dArr";s:3:"⇓";s:4:"hArr";s:3:"⇔";s:6:"forall";s:3:"∀";s:4:"part";s:3:"∂";s:5:"exist";s:3:"∃";s:5:"empty";s:3:"∅";s:5:"nabla";s:3:"∇";s:4:"isin";s:3:"∈";s:5:"notin";s:3:"∉";s:2:"ni";s:3:"∋";s:4:"prod";s:3:"∏";s:3:"sum";s:3:"∑";s:5:"minus";s:3:"−";s:6:"lowast";s:3:"∗";s:5:"radic";s:3:"√";s:4:"prop";s:3:"∝";s:5:"infin";s:3:"∞";s:3:"ang";s:3:"∠";s:3:"and";s:3:"∧";s:2:"or";s:3:"∨";s:3:"cap";s:3:"∩";s:3:"cup";s:3:"∪";s:3:"int";s:3:"∫";s:3:"sim";s:3:"∼";s:4:"cong";s:3:"≅";s:5:"asymp";s:3:"≈";s:2:"ne";s:3:"≠";s:5:"equiv";s:3:"≡";s:2:"le";s:3:"≤";s:2:"ge";s:3:"≥";s:3:"sub";s:3:"⊂";s:3:"sup";s:3:"⊃";s:4:"nsub";s:3:"⊄";s:4:"sube";s:3:"⊆";s:4:"supe";s:3:"⊇";s:5:"oplus";s:3:"⊕";s:6:"otimes";s:3:"⊗";s:4:"perp";s:3:"⊥";s:4:"sdot";s:3:"⋅";s:5:"lceil";s:3:"⌈";s:5:"rceil";s:3:"⌉";s:6:"lfloor";s:3:"⌊";s:6:"rfloor";s:3:"⌋";s:4:"lang";s:3:"〈";s:4:"rang";s:3:"〉";s:3:"loz";s:3:"◊";s:6:"spades";s:3:"♠";s:5:"clubs";s:3:"♣";s:6:"hearts";s:3:"♥";s:5:"diams";s:3:"♦";} \ No newline at end of file +a:253:{s:4:"fnof";s:2:"ƒ";s:5:"Alpha";s:2:"Α";s:4:"Beta";s:2:"Β";s:5:"Gamma";s:2:"Γ";s:5:"Delta";s:2:"Δ";s:7:"Epsilon";s:2:"Ε";s:4:"Zeta";s:2:"Ζ";s:3:"Eta";s:2:"Η";s:5:"Theta";s:2:"Θ";s:4:"Iota";s:2:"Ι";s:5:"Kappa";s:2:"Κ";s:6:"Lambda";s:2:"Λ";s:2:"Mu";s:2:"Μ";s:2:"Nu";s:2:"Ν";s:2:"Xi";s:2:"Ξ";s:7:"Omicron";s:2:"Ο";s:2:"Pi";s:2:"Π";s:3:"Rho";s:2:"Ρ";s:5:"Sigma";s:2:"Σ";s:3:"Tau";s:2:"Τ";s:7:"Upsilon";s:2:"Υ";s:3:"Phi";s:2:"Φ";s:3:"Chi";s:2:"Χ";s:3:"Psi";s:2:"Ψ";s:5:"Omega";s:2:"Ω";s:5:"alpha";s:2:"α";s:4:"beta";s:2:"β";s:5:"gamma";s:2:"γ";s:5:"delta";s:2:"δ";s:7:"epsilon";s:2:"ε";s:4:"zeta";s:2:"ζ";s:3:"eta";s:2:"η";s:5:"theta";s:2:"θ";s:4:"iota";s:2:"ι";s:5:"kappa";s:2:"κ";s:6:"lambda";s:2:"λ";s:2:"mu";s:2:"μ";s:2:"nu";s:2:"ν";s:2:"xi";s:2:"ξ";s:7:"omicron";s:2:"ο";s:2:"pi";s:2:"π";s:3:"rho";s:2:"ρ";s:6:"sigmaf";s:2:"ς";s:5:"sigma";s:2:"σ";s:3:"tau";s:2:"τ";s:7:"upsilon";s:2:"υ";s:3:"phi";s:2:"φ";s:3:"chi";s:2:"χ";s:3:"psi";s:2:"ψ";s:5:"omega";s:2:"ω";s:8:"thetasym";s:2:"ϑ";s:5:"upsih";s:2:"ϒ";s:3:"piv";s:2:"ϖ";s:4:"bull";s:3:"•";s:6:"hellip";s:3:"…";s:5:"prime";s:3:"′";s:5:"Prime";s:3:"″";s:5:"oline";s:3:"‾";s:5:"frasl";s:3:"⁄";s:6:"weierp";s:3:"℘";s:5:"image";s:3:"ℑ";s:4:"real";s:3:"ℜ";s:5:"trade";s:3:"™";s:7:"alefsym";s:3:"ℵ";s:4:"larr";s:3:"←";s:4:"uarr";s:3:"↑";s:4:"rarr";s:3:"→";s:4:"darr";s:3:"↓";s:4:"harr";s:3:"↔";s:5:"crarr";s:3:"↵";s:4:"lArr";s:3:"⇐";s:4:"uArr";s:3:"⇑";s:4:"rArr";s:3:"⇒";s:4:"dArr";s:3:"⇓";s:4:"hArr";s:3:"⇔";s:6:"forall";s:3:"∀";s:4:"part";s:3:"∂";s:5:"exist";s:3:"∃";s:5:"empty";s:3:"∅";s:5:"nabla";s:3:"∇";s:4:"isin";s:3:"∈";s:5:"notin";s:3:"∉";s:2:"ni";s:3:"∋";s:4:"prod";s:3:"∏";s:3:"sum";s:3:"∑";s:5:"minus";s:3:"−";s:6:"lowast";s:3:"∗";s:5:"radic";s:3:"√";s:4:"prop";s:3:"∝";s:5:"infin";s:3:"∞";s:3:"ang";s:3:"∠";s:3:"and";s:3:"∧";s:2:"or";s:3:"∨";s:3:"cap";s:3:"∩";s:3:"cup";s:3:"∪";s:3:"int";s:3:"∫";s:6:"there4";s:3:"∴";s:3:"sim";s:3:"∼";s:4:"cong";s:3:"≅";s:5:"asymp";s:3:"≈";s:2:"ne";s:3:"≠";s:5:"equiv";s:3:"≡";s:2:"le";s:3:"≤";s:2:"ge";s:3:"≥";s:3:"sub";s:3:"⊂";s:3:"sup";s:3:"⊃";s:4:"nsub";s:3:"⊄";s:4:"sube";s:3:"⊆";s:4:"supe";s:3:"⊇";s:5:"oplus";s:3:"⊕";s:6:"otimes";s:3:"⊗";s:4:"perp";s:3:"⊥";s:4:"sdot";s:3:"⋅";s:5:"lceil";s:3:"⌈";s:5:"rceil";s:3:"⌉";s:6:"lfloor";s:3:"⌊";s:6:"rfloor";s:3:"⌋";s:4:"lang";s:3:"〈";s:4:"rang";s:3:"〉";s:3:"loz";s:3:"◊";s:6:"spades";s:3:"♠";s:5:"clubs";s:3:"♣";s:6:"hearts";s:3:"♥";s:5:"diams";s:3:"♦";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"‌";s:3:"zwj";s:3:"‍";s:3:"lrm";s:3:"‎";s:3:"rlm";s:3:"‏";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"­";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:4:"sup2";s:2:"²";s:4:"sup3";s:2:"³";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"sup1";s:2:"¹";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"frac14";s:2:"¼";s:6:"frac12";s:2:"½";s:6:"frac34";s:2:"¾";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";} \ No newline at end of file diff --git a/lib/htmlpurifier/library/HTMLPurifier/EntityParser.php b/lib/htmlpurifier/library/HTMLPurifier/EntityParser.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ErrorCollector.php b/lib/htmlpurifier/library/HTMLPurifier/ErrorCollector.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ErrorStruct.php b/lib/htmlpurifier/library/HTMLPurifier/ErrorStruct.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Exception.php b/lib/htmlpurifier/library/HTMLPurifier/Exception.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter.php b/lib/htmlpurifier/library/HTMLPurifier/Filter.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php old mode 100755 new mode 100644 index 970f9e0c9..bbf78a663 --- a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php @@ -38,7 +38,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter * @todo Extend to indicate non-text/css style blocks */ public function preFilter($html, $config, $context) { - $tidy = $config->get('FilterParam', 'ExtractStyleBlocksTidyImpl'); + $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl'); if ($tidy !== null) $this->_tidy = $tidy; $html = preg_replace_callback('#(.+)#isU', array($this, 'styleCallback'), $html); $style_blocks = $this->_styleMatches; @@ -62,7 +62,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter */ public function cleanCSS($css, $config, $context) { // prepare scope - $scope = $config->get('FilterParam', 'ExtractStyleBlocksScope'); + $scope = $config->get('Filter.ExtractStyleBlocks.Scope'); if ($scope !== null) { $scopes = array_map('trim', explode(',', $scope)); } else { @@ -120,7 +120,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter $css = $this->_tidy->print->plain(); // we are going to escape any special characters <>& to ensure // that no funny business occurs (i.e. in a font-family prop). - if ($config->get('FilterParam', 'ExtractStyleBlocksEscaping')) { + if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { $css = str_replace( array('<', '>', '&'), array('\3C ', '\3E ', '\26 '), diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php old mode 100755 new mode 100644 index aca972f6c..23df221ea --- a/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php @@ -7,13 +7,13 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter public function preFilter($html, $config, $context) { $pre_regex = '#]+>.+?'. - 'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?#s'; + 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s'; $pre_replace = '\1'; return preg_replace($pre_regex, $pre_replace, $html); } public function postFilter($html, $config, $context) { - $post_regex = '#([A-Za-z0-9\-_]+)#'; + $post_regex = '#((?:v|cp)/[A-Za-z0-9\-_=]+)#'; return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); } @@ -24,10 +24,10 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter protected function postFilterCallback($matches) { $url = $this->armorUrl($matches[1]); return ''. - ''. + 'data="http://www.youtube.com/'.$url.'">'. + ''. ''. diff --git a/lib/htmlpurifier/library/HTMLPurifier/Generator.php b/lib/htmlpurifier/library/HTMLPurifier/Generator.php old mode 100755 new mode 100644 index a1b96b9e4..fee1a5f84 --- a/lib/htmlpurifier/library/HTMLPurifier/Generator.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Generator.php @@ -31,6 +31,22 @@ class HTMLPurifier_Generator */ private $_sortAttr; + /** + * Cache of %Output.FlashCompat + */ + private $_flashCompat; + + /** + * Cache of %Output.FixInnerHTML + */ + private $_innerHTMLFix; + + /** + * Stack for keeping track of object information when outputting IE + * compatibility code. + */ + private $_flashStack = array(); + /** * Configuration for the generator */ @@ -42,8 +58,10 @@ class HTMLPurifier_Generator */ public function __construct($config, $context) { $this->config = $config; - $this->_scriptFix = $config->get('Output', 'CommentScriptContents'); - $this->_sortAttr = $config->get('Output', 'SortAttr'); + $this->_scriptFix = $config->get('Output.CommentScriptContents'); + $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); + $this->_sortAttr = $config->get('Output.SortAttr'); + $this->_flashCompat = $config->get('Output.FlashCompat'); $this->_def = $config->getHTMLDefinition(); $this->_xhtml = $this->_def->doctype->xml; } @@ -72,7 +90,7 @@ class HTMLPurifier_Generator } // Tidy cleanup - if (extension_loaded('tidy') && $this->config->get('Output', 'TidyFormat')) { + if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { $tidy = new Tidy; $tidy->parseString($html, array( 'indent'=> true, @@ -86,9 +104,11 @@ class HTMLPurifier_Generator } // Normalize newlines to system defined value - $nl = $this->config->get('Output', 'Newline'); - if ($nl === null) $nl = PHP_EOL; - if ($nl !== "\n") $html = str_replace("\n", $nl, $html); + if ($this->config->get('Core.NormalizeNewlines')) { + $nl = $this->config->get('Output.Newline'); + if ($nl === null) $nl = PHP_EOL; + if ($nl !== "\n") $html = str_replace("\n", $nl, $html); + } return $html; } @@ -104,12 +124,29 @@ class HTMLPurifier_Generator } elseif ($token instanceof HTMLPurifier_Token_Start) { $attr = $this->generateAttributes($token->attr, $token->name); + if ($this->_flashCompat) { + if ($token->name == "object") { + $flash = new stdclass(); + $flash->attr = $token->attr; + $flash->param = array(); + $this->_flashStack[] = $flash; + } + } return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; } elseif ($token instanceof HTMLPurifier_Token_End) { - return 'name . '>'; + $_extra = ''; + if ($this->_flashCompat) { + if ($token->name == "object" && !empty($this->_flashStack)) { + // doesn't do anything for now + } + } + return $_extra . 'name . '>'; } elseif ($token instanceof HTMLPurifier_Token_Empty) { + if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { + $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; + } $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . ( $this->_xhtml ? ' /': '' ) //
v.
@@ -159,6 +196,37 @@ class HTMLPurifier_Generator continue; } } + // Workaround for Internet Explorer innerHTML bug. + // Essentially, Internet Explorer, when calculating + // innerHTML, omits quotes if there are no instances of + // angled brackets, quotes or spaces. However, when parsing + // HTML (for example, when you assign to innerHTML), it + // treats backticks as quotes. Thus, + // `` + // becomes + // `` + // becomes + // + // Fortunately, all we need to do is trigger an appropriate + // quoting style, which we do by adding an extra space. + // This also is consistent with the W3C spec, which states + // that user agents may ignore leading or trailing + // whitespace (in fact, most don't, at least for attributes + // like alt, but an extra space at the end is barely + // noticeable). Still, we have a configuration knob for + // this, since this transformation is not necesary if you + // don't process user input with innerHTML or you don't plan + // on supporting Internet Explorer. + if ($this->_innerHTMLFix) { + if (strpos($value, '`') !== false) { + // check if correct quoting style would not already be + // triggered + if (strcspn($value, '"\' <>') === strlen($value)) { + // protect! + $value .= ' '; + } + } + } $html .= $key.'="'.$this->escape($value).'" '; } return rtrim($html); @@ -174,7 +242,10 @@ class HTMLPurifier_Generator * permissible for non-attribute output. * @return String escaped data. */ - public function escape($string, $quote = ENT_COMPAT) { + public function escape($string, $quote = null) { + // Workaround for APC bug on Mac Leopard reported by sidepodcast + // http://htmlpurifier.org/phorum/read.php?3,4823,4846 + if ($quote === null) $quote = ENT_COMPAT; return htmlspecialchars($string, $quote, 'UTF-8'); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php old mode 100755 new mode 100644 index 3368821c7..33bb38ac5 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php @@ -114,7 +114,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition * @note See HTMLPurifier_HTMLModule::addElement for detailed * parameter and return value descriptions. */ - public function addElement($element_name, $type, $contents, $attr_collections, $attributes) { + public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) { $module = $this->getAnonymousModule(); // assume that if the user is calling this, the element // is safe. This may not be a good idea @@ -219,7 +219,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition */ protected function setupConfigStuff($config) { - $block_wrapper = $config->get('HTML', 'BlockWrapper'); + $block_wrapper = $config->get('HTML.BlockWrapper'); if (isset($this->info_content_sets['Block'][$block_wrapper])) { $this->info_block_wrapper = $block_wrapper; } else { @@ -227,7 +227,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition E_USER_ERROR); } - $parent = $config->get('HTML', 'Parent'); + $parent = $config->get('HTML.Parent'); $def = $this->manager->getElement($parent, true); if ($def) { $this->info_parent = $parent; @@ -244,11 +244,11 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition // setup allowed elements ----------------------------------------- - $allowed_elements = $config->get('HTML', 'AllowedElements'); - $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); // retrieve early + $allowed_elements = $config->get('HTML.AllowedElements'); + $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { - $allowed = $config->get('HTML', 'Allowed'); + $allowed = $config->get('HTML.Allowed'); if (is_string($allowed)) { list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); } @@ -300,7 +300,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition unset($allowed_attributes_mutable[$key]); } } - if ($delete) unset($this->info[$tag]->attr[$attr]); + if ($delete) { + if ($this->info[$tag]->attr[$attr]->required) { + trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING); + } + unset($this->info[$tag]->attr[$attr]); + } } } // emit errors @@ -334,8 +339,8 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition // setup forbidden elements --------------------------------------- - $forbidden_elements = $config->get('HTML', 'ForbiddenElements'); - $forbidden_attributes = $config->get('HTML', 'ForbiddenAttributes'); + $forbidden_elements = $config->get('HTML.ForbiddenElements'); + $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); foreach ($this->info as $tag => $info) { if (isset($forbidden_elements[$tag])) { diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Bdo.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Bdo.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/CommonAttributes.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/CommonAttributes.php old mode 100755 new mode 100644 index fdf7b3239..7c15da84f --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/CommonAttributes.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/CommonAttributes.php @@ -8,7 +8,7 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule 'Core' => array( 0 => array('Style'), // 'xml:space' => false, - 'class' => 'NMTOKENS', + 'class' => 'Class', 'id' => 'ID', 'title' => 'CDATA', ), @@ -20,6 +20,7 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule 0 => array('Core', 'I18N') ) ); + } // vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Edit.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Edit.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Hypertext.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Hypertext.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Image.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Image.php old mode 100755 new mode 100644 index e6ed53aea..948d435bc --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Image.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Image.php @@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule public $name = 'Image'; public function setup($config) { - $max = $config->get('HTML', 'MaxImgLength'); + $max = $config->get('HTML.MaxImgLength'); $img = $this->addElement( 'img', 'Inline', 'Empty', 'Common', array( @@ -24,7 +24,7 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded ) ); - if ($max === null || $config->get('HTML', 'Trusted')) { + if ($max === null || $config->get('HTML.Trusted')) { $img->attr['height'] = $img->attr['width'] = 'Length'; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Legacy.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php old mode 100755 new mode 100644 index 1d15f2729..74d4522f4 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/List.php @@ -20,8 +20,10 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule public $content_sets = array('Flow' => 'List'); public function setup($config) { - $this->addElement('ol', 'List', 'Required: li', 'Common'); - $this->addElement('ul', 'List', 'Required: li', 'Common'); + $ol = $this->addElement('ol', 'List', 'Required: li', 'Common'); + $ol->wrap = "li"; + $ul = $this->addElement('ul', 'List', 'Required: li', 'Common'); + $ul->wrap = "li"; $this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); $this->addElement('li', false, 'Flow', 'Common'); diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Name.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Name.php old mode 100755 new mode 100644 index d908a0ada..05694b450 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Name.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Name.php @@ -9,7 +9,10 @@ class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule $elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map'); foreach ($elements as $name) { $element = $this->addBlankElement($name); - $element->attr['name'] = 'ID'; + $element->attr['name'] = 'CDATA'; + if (!$config->get('HTML.Attr.Name.UseCDATA')) { + $element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync(); + } } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Nofollow.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Nofollow.php new file mode 100644 index 000000000..3aa6654a5 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Nofollow.php @@ -0,0 +1,19 @@ +addBlankElement('a'); + $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow(); + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Object.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Object.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Presentation.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Presentation.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Proprietary.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Proprietary.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Ruby.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Ruby.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeEmbed.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeEmbed.php old mode 100755 new mode 100644 index 635e8f2d2..9f3758a32 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeEmbed.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeEmbed.php @@ -10,7 +10,7 @@ class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule public function setup($config) { - $max = $config->get('HTML', 'MaxImgLength'); + $max = $config->get('HTML.MaxImgLength'); $embed = $this->addElement( 'embed', 'Inline', 'Empty', 'Common', array( @@ -20,7 +20,8 @@ class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule 'height' => 'Pixels#' . $max, 'allowscriptaccess' => 'Enum#never', 'allownetworking' => 'Enum#internal', - 'wmode' => 'Enum#window', + 'flashvars' => 'Text', + 'wmode' => 'Enum#window,transparent,opaque', 'name' => 'ID', ) ); diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeObject.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeObject.php old mode 100755 new mode 100644 index bbda7a214..00da342ef --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeObject.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeObject.php @@ -16,7 +16,7 @@ class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule // These definitions are not intrinsically safe: the attribute transforms // are a vital part of ensuring safety. - $max = $config->get('HTML', 'MaxImgLength'); + $max = $config->get('HTML.MaxImgLength'); $object = $this->addElement( 'object', 'Inline', @@ -28,7 +28,9 @@ class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule 'type' => 'Enum#application/x-shockwave-flash', 'width' => 'Pixels#' . $max, 'height' => 'Pixels#' . $max, - 'data' => 'URI#embedded' + 'data' => 'URI#embedded', + 'codebase' => new HTMLPurifier_AttrDef_Enum(array( + 'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')), ) ); $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject(); diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Scripting.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Scripting.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/StyleAttribute.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/StyleAttribute.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tables.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Target.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Target.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Text.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Text.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy.php old mode 100755 new mode 100644 index c9d470648..21783f18e --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy.php @@ -42,12 +42,12 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule $this->makeFixesForLevel($fixes); // figure out which fixes to use - $level = $config->get('HTML', 'TidyLevel'); + $level = $config->get('HTML.TidyLevel'); $fixes_lookup = $this->getFixesForLevel($level); // get custom fix declarations: these need namespace processing - $add_fixes = $config->get('HTML', 'TidyAdd'); - $remove_fixes = $config->get('HTML', 'TidyRemove'); + $add_fixes = $config->get('HTML.TidyAdd'); + $remove_fixes = $config->get('HTML.TidyRemove'); foreach ($fixes as $name => $fix) { // needs to be refactored a little to implement globbing diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Name.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Name.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php old mode 100755 new mode 100644 index 85fa90a94..14c15c4a0 --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php @@ -15,6 +15,7 @@ class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_T $r['thead@background'] = new HTMLPurifier_AttrTransform_Background(); $r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background(); $r['tbody@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['table@height'] = new HTMLPurifier_AttrTransform_Length('height'); return $r; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Strict.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Strict.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Transitional.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Transitional.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/XHTML.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/XHTML.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php b/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php old mode 100755 new mode 100644 index 78f38781d..362e3b78d --- a/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php +++ b/lib/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php @@ -199,15 +199,15 @@ class HTMLPurifier_HTMLModuleManager */ public function setup($config) { - $this->trusted = $config->get('HTML', 'Trusted'); + $this->trusted = $config->get('HTML.Trusted'); // generate $this->doctype = $this->doctypes->make($config); $modules = $this->doctype->modules; // take out the default modules that aren't allowed - $lookup = $config->get('HTML', 'AllowedModules'); - $special_cases = $config->get('HTML', 'CoreModules'); + $lookup = $config->get('HTML.AllowedModules'); + $special_cases = $config->get('HTML.CoreModules'); if (is_array($lookup)) { foreach ($modules as $k => $m) { @@ -216,19 +216,19 @@ class HTMLPurifier_HTMLModuleManager } } - // add proprietary module (this gets special treatment because - // it is completely removed from doctypes, etc.) - if ($config->get('HTML', 'Proprietary')) { + // custom modules + if ($config->get('HTML.Proprietary')) { $modules[] = 'Proprietary'; } - - // add SafeObject/Safeembed modules - if ($config->get('HTML', 'SafeObject')) { + if ($config->get('HTML.SafeObject')) { $modules[] = 'SafeObject'; } - if ($config->get('HTML', 'SafeEmbed')) { + if ($config->get('HTML.SafeEmbed')) { $modules[] = 'SafeEmbed'; } + if ($config->get('HTML.Nofollow')) { + $modules[] = 'Nofollow'; + } // merge in custom modules $modules = array_merge($modules, $this->userModules); diff --git a/lib/htmlpurifier/library/HTMLPurifier/IDAccumulator.php b/lib/htmlpurifier/library/HTMLPurifier/IDAccumulator.php old mode 100755 new mode 100644 index d546cd751..73215295a --- a/lib/htmlpurifier/library/HTMLPurifier/IDAccumulator.php +++ b/lib/htmlpurifier/library/HTMLPurifier/IDAccumulator.php @@ -23,7 +23,7 @@ class HTMLPurifier_IDAccumulator */ public static function build($config, $context) { $id_accumulator = new HTMLPurifier_IDAccumulator(); - $id_accumulator->load($config->get('Attr', 'IDBlacklist')); + $id_accumulator->load($config->get('Attr.IDBlacklist')); return $id_accumulator; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector.php b/lib/htmlpurifier/library/HTMLPurifier/Injector.php old mode 100755 new mode 100644 index bc093b805..5922f8130 --- a/lib/htmlpurifier/library/HTMLPurifier/Injector.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Injector.php @@ -137,6 +137,12 @@ abstract class HTMLPurifier_Injector if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { return false; } + // check for exclusion + for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { + $node = $this->currentNesting[$i]; + $def = $this->htmlDefinition->info[$node->name]; + if (isset($def->excludes[$name])) return false; + } return true; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector/AutoParagraph.php b/lib/htmlpurifier/library/HTMLPurifier/Injector/AutoParagraph.php old mode 100755 new mode 100644 index 8cc952549..afa760892 --- a/lib/htmlpurifier/library/HTMLPurifier/Injector/AutoParagraph.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Injector/AutoParagraph.php @@ -34,16 +34,21 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector // ---- // This is a degenerate case } else { - // State 1.2: PAR1 - // ---- + if (!$token->is_whitespace || $this->_isInline($current)) { + // State 1.2: PAR1 + // ---- - // State 1.3: PAR1\n\nPAR2 - // ------------ + // State 1.3: PAR1\n\nPAR2 + // ------------ - // State 1.4:
PAR1\n\nPAR2 (see State 2) - // ------------ - $token = array($this->_pStart()); - $this->_splitText($text, $token); + // State 1.4:
PAR1\n\nPAR2 (see State 2) + // ------------ + $token = array($this->_pStart()); + $this->_splitText($text, $token); + } else { + // State 1.5: \n
+ // -- + } } } else { // State 2:
PAR1... (similar to 1.4) diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector/DisplayLinkURI.php b/lib/htmlpurifier/library/HTMLPurifier/Injector/DisplayLinkURI.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php b/lib/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector/PurifierLinkify.php b/lib/htmlpurifier/library/HTMLPurifier/Injector/PurifierLinkify.php old mode 100755 new mode 100644 index 3c706a33a..ad2455a91 --- a/lib/htmlpurifier/library/HTMLPurifier/Injector/PurifierLinkify.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Injector/PurifierLinkify.php @@ -12,7 +12,7 @@ class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector public $needed = array('a' => array('href')); public function prepare($config, $context) { - $this->docURL = $config->get('AutoFormatParam', 'PurifierLinkifyDocURL'); + $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL'); return parent::prepare($config, $context); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php b/lib/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php old mode 100755 new mode 100644 index d85ca97d9..638bfca03 --- a/lib/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php @@ -3,12 +3,14 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector { - private $context, $config; + private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; public function prepare($config, $context) { parent::prepare($config, $context); $this->config = $config; $this->context = $context; + $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); + $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); $this->attrValidator = new HTMLPurifier_AttrValidator(); } @@ -17,7 +19,14 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector $next = false; for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { $next = $this->inputTokens[$i]; - if ($next instanceof HTMLPurifier_Token_Text && $next->is_whitespace) continue; + if ($next instanceof HTMLPurifier_Token_Text) { + if ($next->is_whitespace) continue; + if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { + $plain = str_replace("\xC2\xA0", "", $next->data); + $isWsOrNbsp = $plain === '' || ctype_space($plain); + if ($isWsOrNbsp) continue; + } + } break; } if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php b/lib/htmlpurifier/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php new file mode 100644 index 000000000..b21313470 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php @@ -0,0 +1,60 @@ +attrValidator = new HTMLPurifier_AttrValidator(); + $this->config = $config; + $this->context = $context; + return parent::prepare($config, $context); + } + + public function handleElement(&$token) { + if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { + return; + } + + // We need to validate the attributes now since this doesn't normally + // happen until after MakeWellFormed. If all the attributes are removed + // the span needs to be removed too. + $this->attrValidator->validateToken($token, $this->config, $this->context); + $token->armor['ValidateAttributes'] = true; + + if (!empty($token->attr)) { + return; + } + + $nesting = 0; + $spanContentTokens = array(); + while ($this->forwardUntilEndToken($i, $current, $nesting)) {} + + if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { + // Mark closing span tag for deletion + $current->markForDeletion = true; + // Delete open span tag + $token = false; + } + } + + public function handleEnd(&$token) { + if ($token->markForDeletion) { + $token = false; + } + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php b/lib/htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php old mode 100755 new mode 100644 index 341582868..c1d8b0412 --- a/lib/htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php @@ -20,6 +20,9 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector protected $allowedParam = array( 'wmode' => true, 'movie' => true, + 'flashvars' => true, + 'src' => true, + 'allowFullScreen' => true, // if omitted, assume to be 'false' ); public function prepare($config, $context) { @@ -47,7 +50,8 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector // We need this fix because YouTube doesn't supply a data // attribute, which we need if a type is specified. This is // *very* Flash specific. - if (!isset($this->objectStack[$i]->attr['data']) && $token->attr['name'] == 'movie') { + if (!isset($this->objectStack[$i]->attr['data']) && + ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) { $this->objectStack[$i]->attr['data'] = $token->attr['value']; } // Check if the parameter is the correct value but has not diff --git a/lib/htmlpurifier/library/HTMLPurifier/Language.php b/lib/htmlpurifier/library/HTMLPurifier/Language.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Language/classes/en-x-test.php b/lib/htmlpurifier/library/HTMLPurifier/Language/classes/en-x-test.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Language/messages/en-x-test.php b/lib/htmlpurifier/library/HTMLPurifier/Language/messages/en-x-test.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Language/messages/en-x-testmini.php b/lib/htmlpurifier/library/HTMLPurifier/Language/messages/en-x-testmini.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Language/messages/en.php b/lib/htmlpurifier/library/HTMLPurifier/Language/messages/en.php old mode 100755 new mode 100644 index aab2e52eb..8d7b5736b --- a/lib/htmlpurifier/library/HTMLPurifier/Language/messages/en.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Language/messages/en.php @@ -23,6 +23,7 @@ $messages = array( 'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', 'Lexer: Missing attribute key' => 'Attribute declaration has no key', 'Lexer: Missing end quote' => 'Attribute declaration has no end quote', +'Lexer: Extracted body' => 'Removed document metadata tags', 'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', 'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', diff --git a/lib/htmlpurifier/library/HTMLPurifier/LanguageFactory.php b/lib/htmlpurifier/library/HTMLPurifier/LanguageFactory.php old mode 100755 new mode 100644 index baa4422eb..134ef8c74 --- a/lib/htmlpurifier/library/HTMLPurifier/LanguageFactory.php +++ b/lib/htmlpurifier/library/HTMLPurifier/LanguageFactory.php @@ -85,7 +85,7 @@ class HTMLPurifier_LanguageFactory // validate language code if ($code === false) { $code = $this->validator->validate( - $config->get('Core', 'Language'), $config, $context + $config->get('Core.Language'), $config, $context ); } else { $code = $this->validator->validate($code, $config, $context); diff --git a/lib/htmlpurifier/library/HTMLPurifier/Length.php b/lib/htmlpurifier/library/HTMLPurifier/Length.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer.php old mode 100755 new mode 100644 index 945886998..9bdbbbb25 --- a/lib/htmlpurifier/library/HTMLPurifier/Lexer.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer.php @@ -73,12 +73,12 @@ class HTMLPurifier_Lexer HTMLPurifier_Lexer::create() is deprecated, please instead use %Core.LexerImpl", E_USER_WARNING); } else { - $lexer = $config->get('Core', 'LexerImpl'); + $lexer = $config->get('Core.LexerImpl'); } $needs_tracking = - $config->get('Core', 'MaintainLineNumbers') || - $config->get('Core', 'CollectErrors'); + $config->get('Core.MaintainLineNumbers') || + $config->get('Core.CollectErrors'); $inst = null; if (is_object($lexer)) { @@ -230,6 +230,17 @@ class HTMLPurifier_Lexer ); } + /** + * Special Internet Explorer conditional comments should be removed. + */ + protected static function removeIEConditional($string) { + return preg_replace( + '##si', // probably should generalize for all strings + '', + $string + ); + } + /** * Callback function for escapeCDATA() that does the work. * @@ -252,10 +263,12 @@ class HTMLPurifier_Lexer public function normalize($html, $config, $context) { // normalize newlines to \n - $html = str_replace("\r\n", "\n", $html); - $html = str_replace("\r", "\n", $html); + if ($config->get('Core.NormalizeNewlines')) { + $html = str_replace("\r\n", "\n", $html); + $html = str_replace("\r", "\n", $html); + } - if ($config->get('HTML', 'Trusted')) { + if ($config->get('HTML.Trusted')) { // escape convoluted CDATA $html = $this->escapeCommentedCDATA($html); } @@ -263,9 +276,19 @@ class HTMLPurifier_Lexer // escape CDATA $html = $this->escapeCDATA($html); + $html = $this->removeIEConditional($html); + // extract body from document if applicable - if ($config->get('Core', 'ConvertDocumentToFragment')) { - $html = $this->extractBody($html); + if ($config->get('Core.ConvertDocumentToFragment')) { + $e = false; + if ($config->get('Core.CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + $new_html = $this->extractBody($html); + if ($e && $new_html != $html) { + $e->send(E_WARNING, 'Lexer: Extracted body'); + } + $html = $new_html; } // expand entities that aren't the big five @@ -276,6 +299,11 @@ class HTMLPurifier_Lexer // represent non-SGML characters (horror, horror!) $html = HTMLPurifier_Encoder::cleanUTF8($html); + // if processing instructions are to removed, remove them now + if ($config->get('Core.RemoveProcessingInstructions')) { + $html = preg_replace('#<\?.+?\?>#s', '', $html); + } + return $html; } @@ -285,7 +313,7 @@ class HTMLPurifier_Lexer */ public function extractBody($html) { $matches = array(); - $result = preg_match('!]*>(.+?)!is', $html, $matches); + $result = preg_match('!]*>(.*)!is', $html, $matches); if ($result) { return $matches[1]; } else { diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer/DOMLex.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer/DOMLex.php old mode 100755 new mode 100644 index 0db3974bf..82f377450 --- a/lib/htmlpurifier/library/HTMLPurifier/Lexer/DOMLex.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer/DOMLex.php @@ -41,7 +41,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer // attempt to armor stray angled brackets that cannot possibly // form tags and thus are probably being used as emoticons - if ($config->get('Core', 'AggressivelyFixLt')) { + if ($config->get('Core.AggressivelyFixLt')) { $char = '[^a-z!\/]'; $comment = "/|\z)/is"; $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html); @@ -72,23 +72,57 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer } /** - * Recursive function that tokenizes a node, putting it into an accumulator. - * + * Iterative function that tokenizes a node, putting it into an accumulator. + * To iterate is human, to recurse divine - L. Peter Deutsch * @param $node DOMNode to be tokenized. * @param $tokens Array-list of already tokenized tokens. - * @param $collect Says whether or start and close are collected, set to - * false at first recursion because it's the implicit DIV - * tag you're dealing with. * @returns Tokens of node appended to previously passed tokens. */ - protected function tokenizeDOM($node, &$tokens, $collect = false) { + protected function tokenizeDOM($node, &$tokens) { + $level = 0; + $nodes = array($level => array($node)); + $closingNodes = array(); + do { + while (!empty($nodes[$level])) { + $node = array_shift($nodes[$level]); // FIFO + $collect = $level > 0 ? true : false; + $needEndingTag = $this->createStartNode($node, $tokens, $collect); + if ($needEndingTag) { + $closingNodes[$level][] = $node; + } + if ($node->childNodes && $node->childNodes->length) { + $level++; + $nodes[$level] = array(); + foreach ($node->childNodes as $childNode) { + array_push($nodes[$level], $childNode); + } + } + } + $level--; + if ($level && isset($closingNodes[$level])) { + while($node = array_pop($closingNodes[$level])) { + $this->createEndNode($node, $tokens); + } + } + } while ($level > 0); + } + + /** + * @param $node DOMNode to be tokenized. + * @param $tokens Array-list of already tokenized tokens. + * @param $collect Says whether or start and close are collected, set to + * false at first recursion because it's the implicit DIV + * tag you're dealing with. + * @returns bool if the token needs an endtoken + */ + protected function createStartNode($node, &$tokens, $collect) { // intercept non element nodes. WE MUST catch all of them, // but we're not getting the character reference nodes because // those should have been preprocessed if ($node->nodeType === XML_TEXT_NODE) { $tokens[] = $this->factory->createText($node->data); - return; + return false; } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) { // undo libxml's special treatment of )#si', array($this, 'scriptCallback'), $html); } @@ -45,12 +45,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $array = array(); // result array // This is also treated to mean maintain *column* numbers too - $maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers'); + $maintain_line_numbers = $config->get('Core.MaintainLineNumbers'); if ($maintain_line_numbers === null) { // automatically determine line numbering by checking // if error collection is on - $maintain_line_numbers = $config->get('Core', 'CollectErrors'); + $maintain_line_numbers = $config->get('Core.CollectErrors'); } if ($maintain_line_numbers) { @@ -67,10 +67,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $nl = "\n"; // how often to manually recalculate. This will ALWAYS be right, // but it's pretty wasteful. Set to 0 to turn off - $synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval'); + $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval'); $e = false; - if ($config->get('Core', 'CollectErrors')) { + if ($config->get('Core.CollectErrors')) { $e =& $context->get('ErrorCollector'); } @@ -345,7 +345,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if ($string == '') return array(); // no attributes $e = false; - if ($config->get('Core', 'CollectErrors')) { + if ($config->get('Core.CollectErrors')) { $e =& $context->get('ErrorCollector'); } @@ -384,7 +384,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } } if ($value === false) $value = ''; - return array($key => $value); + return array($key => $this->parseData($value)); } // setup loop environment diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php old mode 100755 new mode 100644 index 57cffa82a..1d358c7b6 --- a/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PEARSax3.php @@ -26,13 +26,20 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer * Internal accumulator array for SAX parsers. */ protected $tokens = array(); + protected $last_token_was_empty; + + private $parent_handler; + private $stack = array(); public function tokenizeHTML($string, $config, $context) { $this->tokens = array(); + $this->last_token_was_empty = false; $string = $this->normalize($string, $config, $context); + $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler')); + $parser = new XML_HTMLSax3(); $parser->set_object($this); $parser->set_element_handler('openHandler','closeHandler'); @@ -44,6 +51,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer $parser->parse($string); + restore_error_handler(); + return $this->tokens; } @@ -58,9 +67,11 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer } if ($closed) { $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); + $this->last_token_was_empty = true; } else { $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); } + $this->stack[] = $name; return true; } @@ -71,10 +82,12 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer // HTMLSax3 seems to always send empty tags an extra close tag // check and ignore if you see it: // [TESTME] to make sure it doesn't overreach - if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) { + if ($this->last_token_was_empty) { + $this->last_token_was_empty = false; return true; } $this->tokens[] = new HTMLPurifier_Token_End($name); + if (!empty($this->stack)) array_pop($this->stack); return true; } @@ -82,6 +95,7 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer * Data event handler, interface is defined by PEAR package. */ public function dataHandler(&$parser, $data) { + $this->last_token_was_empty = false; $this->tokens[] = new HTMLPurifier_Token_Text($data); return true; } @@ -91,7 +105,18 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer */ public function escapeHandler(&$parser, $data) { if (strpos($data, '--') === 0) { - $this->tokens[] = new HTMLPurifier_Token_Comment($data); + // remove trailing and leading double-dashes + $data = substr($data, 2); + if (strlen($data) >= 2 && substr($data, -2) == "--") { + $data = substr($data, 0, -2); + } + if (isset($this->stack[sizeof($this->stack) - 1]) && + $this->stack[sizeof($this->stack) - 1] == "style") { + $this->tokens[] = new HTMLPurifier_Token_Text($data); + } else { + $this->tokens[] = new HTMLPurifier_Token_Comment($data); + } + $this->last_token_was_empty = false; } // CDATA is handled elsewhere, but if it was handled here: //if (strpos($data, '[CDATA[') === 0) { @@ -101,6 +126,14 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer return true; } + /** + * An error handler that mutes strict errors + */ + public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) { + if ($errno == E_STRICT) return; + return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext); + } + } // vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php old mode 100755 new mode 100644 index fa1bf973e..faf00b829 --- a/lib/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php @@ -125,8 +125,6 @@ class HTML5 { const EOF = 5; public function __construct($data) { - $data = str_replace("\r\n", "\n", $data); - $data = str_replace("\r", null, $data); $this->data = $data; $this->char = -1; diff --git a/lib/htmlpurifier/library/HTMLPurifier/PercentEncoder.php b/lib/htmlpurifier/library/HTMLPurifier/PercentEncoder.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Printer.php b/lib/htmlpurifier/library/HTMLPurifier/Printer.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Printer/CSSDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/Printer/CSSDefinition.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.css b/lib/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.css old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.js b/lib/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.js old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php b/lib/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php old mode 100755 new mode 100644 index 744dec659..02aa65689 --- a/lib/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php @@ -91,7 +91,7 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer $all = array(); foreach ($allowed as $key) { list($ns, $directive) = $key; - $all[$ns][$directive] = $config->get($ns, $directive); + $all[$ns][$directive] = $config->get($ns .'.'. $directive); } $ret = ''; @@ -159,7 +159,7 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer $ret .= $this->end('th'); $ret .= $this->start('td'); - $def = $this->config->def->info[$ns][$directive]; + $def = $this->config->def->info["$ns.$directive"]; if (is_int($def)) { $allow_null = $def < 0; $type = abs($def); @@ -248,7 +248,7 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer { $this->prepareGenerator($gen_config); // this should probably be split up a little $ret = ''; - $def = $config->def->info[$ns][$directive]; + $def = $config->def->info["$ns.$directive"]; if (is_int($def)) { $type = abs($def); } else { diff --git a/lib/htmlpurifier/library/HTMLPurifier/Printer/HTMLDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/Printer/HTMLDefinition.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/PropertyList.php b/lib/htmlpurifier/library/HTMLPurifier/PropertyList.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/PropertyListIterator.php b/lib/htmlpurifier/library/HTMLPurifier/PropertyListIterator.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/Core.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/Core.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/FixNesting.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/FixNesting.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php old mode 100755 new mode 100644 index 0ec811f8a..c7aa1bb86 --- a/lib/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -2,6 +2,14 @@ /** * Takes tokens makes them well-formed (balance end tags, etc.) + * + * Specification of the armor attributes this strategy uses: + * + * - MakeWellFormed_TagClosedError: This armor field is used to + * suppress tag closed errors for certain tokens [TagClosedSuppress], + * in particular, if a tag was generated automatically by HTML + * Purifier, we may rely on our infrastructure to close it for us + * and shouldn't report an error to the user [TagClosedAuto]. */ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy { @@ -42,7 +50,13 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // local variables $generator = new HTMLPurifier_Generator($config, $context); - $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); + $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); + // used for autoclose early abortion + $global_parent_allowed_elements = array(); + if (isset($definition->info[$definition->info_parent])) { + // may be unset under testing circumstances + $global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config); + } $e = $context->get('ErrorCollector', true); $t = false; // token index $i = false; // injector index @@ -72,6 +86,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $custom_injectors = $injectors['Custom']; unset($injectors['Custom']); // special case foreach ($injectors as $injector => $b) { + // XXX: Fix with a legitimate lookup table of enabled filters + if (strpos($injector, '.') !== false) continue; $injector = "HTMLPurifier_Injector_$injector"; if (!$b) continue; $this->injectors[] = new $injector; @@ -81,6 +97,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $this->injectors[] = $injector; } foreach ($custom_injectors as $injector) { + if (!$injector) continue; if (is_string($injector)) { $injector = "HTMLPurifier_Injector_$injector"; $injector = new $injector; @@ -99,7 +116,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // -- end INJECTOR -- - // a note on punting: + // a note on reprocessing: // In order to reduce code duplication, whenever some code needs // to make HTML changes in order to make things "correct", the // new HTML gets sent through the purifier, regardless of its @@ -146,7 +163,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $top_nesting = array_pop($this->stack); $this->stack[] = $top_nesting; - // send error + // send error [TagClosedSuppress] if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); } @@ -162,6 +179,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $token = $tokens[$t]; //echo '
'; printTokens($tokens, $t); printTokens($this->stack); + //flush(); // quick-check: if it's not a tag, no need to process if (empty($token->is_tag)) { @@ -189,12 +207,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $ok = false; if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { // claims to be a start tag but is empty - $token = new HTMLPurifier_Token_Empty($token->name, $token->attr); + $token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor); $ok = true; } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { // claims to be empty but really is a start tag $this->swap(new HTMLPurifier_Token_End($token->name)); - $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr)); + $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor)); // punt (since we had to modify the input stream in a non-trivial way) $reprocess = true; continue; @@ -207,6 +225,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // ...unless they also have to close their parent if (!empty($this->stack)) { + // Performance note: you might think that it's rather + // inefficient, recalculating the autoclose information + // for every tag that a token closes (since when we + // do an autoclose, we push a new token into the + // stream and then /process/ that, before + // re-processing this token.) But this is + // necessary, because an injector can make an + // arbitrary transformations to the autoclosing + // tokens we introduce, so things may have changed + // in the meantime. Also, doing the inefficient thing is + // "easy" to reason about (for certain perverse definitions + // of "easy") + $parent = array_pop($this->stack); $this->stack[] = $parent; @@ -217,30 +248,73 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $autoclose = false; } + if ($autoclose && $definition->info[$token->name]->wrap) { + // Check if an element can be wrapped by another + // element to make it valid in a context (for + // example,
      needs a
    • in between) + $wrapname = $definition->info[$token->name]->wrap; + $wrapdef = $definition->info[$wrapname]; + $elements = $wrapdef->child->getAllowedElements($config); + $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config); + if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) { + $newtoken = new HTMLPurifier_Token_Start($wrapname); + $this->insertBefore($newtoken); + $reprocess = true; + continue; + } + } + $carryover = false; if ($autoclose && $definition->info[$parent->name]->formatting) { $carryover = true; } if ($autoclose) { - // errors need to be updated - $new_token = new HTMLPurifier_Token_End($parent->name); - $new_token->start = $parent; - if ($carryover) { - $element = clone $parent; - $element->armor['MakeWellFormed_TagClosedError'] = true; - $element->carryover = true; - $this->processToken(array($new_token, $token, $element)); - } else { - $this->insertBefore($new_token); - } - if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { - if (!$carryover) { - $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); - } else { - $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); + // check if this autoclose is doomed to fail + // (this rechecks $parent, which his harmless) + $autoclose_ok = isset($global_parent_allowed_elements[$token->name]); + if (!$autoclose_ok) { + foreach ($this->stack as $ancestor) { + $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config); + if (isset($elements[$token->name])) { + $autoclose_ok = true; + break; + } + if ($definition->info[$token->name]->wrap) { + $wrapname = $definition->info[$token->name]->wrap; + $wrapdef = $definition->info[$wrapname]; + $wrap_elements = $wrapdef->child->getAllowedElements($config); + if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) { + $autoclose_ok = true; + break; + } + } } } + if ($autoclose_ok) { + // errors need to be updated + $new_token = new HTMLPurifier_Token_End($parent->name); + $new_token->start = $parent; + if ($carryover) { + $element = clone $parent; + // [TagClosedAuto] + $element->armor['MakeWellFormed_TagClosedError'] = true; + $element->carryover = true; + $this->processToken(array($new_token, $token, $element)); + } else { + $this->insertBefore($new_token); + } + // [TagClosedSuppress] + if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { + if (!$carryover) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); + } else { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); + } + } + } else { + $this->remove(); + } $reprocess = true; continue; } @@ -346,7 +420,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if ($e) { for ($j = $c - 1; $j > 0; $j--) { // notice we exclude $j == 0, i.e. the current ending tag, from - // the errors... + // the errors... [TagClosedSuppress] if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]); } @@ -361,6 +435,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $new_token->start = $skipped_tags[$j]; array_unshift($replace, $new_token); if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) { + // [TagClosedAuto] $element = clone $skipped_tags[$j]; $element->carryover = true; $element->armor['MakeWellFormed_TagClosedError'] = true; @@ -429,7 +504,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } /** - * Inserts a token before the current token. Cursor now points to this token + * Inserts a token before the current token. Cursor now points to + * this token. You must reprocess after this. */ private function insertBefore($token) { array_splice($this->tokens, $this->t, 0, array($token)); @@ -437,14 +513,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy /** * Removes current token. Cursor now points to new token occupying previously - * occupied space. + * occupied space. You must reprocess after this. */ private function remove() { array_splice($this->tokens, $this->t, 1); } /** - * Swap current token with new token. Cursor points to new token (no change). + * Swap current token with new token. Cursor points to new token (no + * change). You must reprocess after this. */ private function swap($token) { $this->tokens[$this->t] = $token; diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php old mode 100755 new mode 100644 index d30e456db..cf3a33e40 --- a/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -16,14 +16,14 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy $generator = new HTMLPurifier_Generator($config, $context); $result = array(); - $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); - $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg'); + $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); + $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); // currently only used to determine if comments should be kept - $trusted = $config->get('HTML', 'Trusted'); + $trusted = $config->get('HTML.Trusted'); - $remove_script_contents = $config->get('Core', 'RemoveScriptContents'); - $hidden_elements = $config->get('Core', 'HiddenElements'); + $remove_script_contents = $config->get('Core.RemoveScriptContents'); + $hidden_elements = $config->get('Core.HiddenElements'); // remove script contents compatibility if ($remove_script_contents === true) { @@ -44,7 +44,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy $context->register('CurrentToken', $token); $e = false; - if ($config->get('Core', 'CollectErrors')) { + if ($config->get('Core.CollectErrors')) { $e =& $context->get('ErrorCollector'); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/Strategy/ValidateAttributes.php b/lib/htmlpurifier/library/HTMLPurifier/Strategy/ValidateAttributes.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/StringHash.php b/lib/htmlpurifier/library/HTMLPurifier/StringHash.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/StringHashParser.php b/lib/htmlpurifier/library/HTMLPurifier/StringHashParser.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/TagTransform.php b/lib/htmlpurifier/library/HTMLPurifier/TagTransform.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Font.php b/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Font.php old mode 100755 new mode 100644 index ed2463786..9db2db795 --- a/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Font.php +++ b/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Font.php @@ -63,13 +63,15 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform // handle size transform if (isset($attr['size'])) { // normalize large numbers - if ($attr['size']{0} == '+' || $attr['size']{0} == '-') { - $size = (int) $attr['size']; - if ($size < -2) $attr['size'] = '-2'; - if ($size > 4) $attr['size'] = '+4'; - } else { - $size = (int) $attr['size']; - if ($size > 7) $attr['size'] = '7'; + if ($attr['size'] !== '') { + if ($attr['size']{0} == '+' || $attr['size']{0} == '-') { + $size = (int) $attr['size']; + if ($size < -2) $attr['size'] = '-2'; + if ($size > 4) $attr['size'] = '+4'; + } else { + $size = (int) $attr['size']; + if ($size > 7) $attr['size'] = '7'; + } } if (isset($this->_size_lookup[$attr['size']])) { $prepend_style .= 'font-size:' . diff --git a/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Simple.php b/lib/htmlpurifier/library/HTMLPurifier/TagTransform/Simple.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token.php b/lib/htmlpurifier/library/HTMLPurifier/Token.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Comment.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Comment.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Empty.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Empty.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/End.php b/lib/htmlpurifier/library/HTMLPurifier/Token/End.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Start.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Start.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Tag.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Tag.php old mode 100755 new mode 100644 index 798be028e..f4d8f640e --- a/lib/htmlpurifier/library/HTMLPurifier/Token/Tag.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Tag.php @@ -33,7 +33,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token * @param $name String name. * @param $attr Associative array of attributes. */ - public function __construct($name, $attr = array(), $line = null, $col = null) { + public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) { $this->name = ctype_lower($name) ? $name : strtolower($name); foreach ($attr as $key => $value) { // normalization only necessary when key is not lowercase @@ -50,6 +50,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token $this->attr = $attr; $this->line = $line; $this->col = $col; + $this->armor = $armor; } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Text.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Text.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/TokenFactory.php b/lib/htmlpurifier/library/HTMLPurifier/TokenFactory.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URI.php b/lib/htmlpurifier/library/HTMLPurifier/URI.php old mode 100755 new mode 100644 index 8b50d0d18..efdfb2c68 --- a/lib/htmlpurifier/library/HTMLPurifier/URI.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URI.php @@ -67,14 +67,6 @@ class HTMLPurifier_URI $chars_gen_delims = ':/?#[]@'; $chars_pchar = $chars_sub_delims . ':@'; - // validate scheme (MUST BE FIRST!) - if (!is_null($this->scheme) && is_null($this->host)) { - $def = $config->getDefinition('URI'); - if ($def->defaultScheme === $this->scheme) { - $this->scheme = null; - } - } - // validate host if (!is_null($this->host)) { $host_def = new HTMLPurifier_AttrDef_URI_Host(); @@ -82,6 +74,21 @@ class HTMLPurifier_URI if ($this->host === false) $this->host = null; } + // validate scheme + // NOTE: It's not appropriate to check whether or not this + // scheme is in our registry, since a URIFilter may convert a + // URI that we don't allow into one we do. So instead, we just + // check if the scheme can be dropped because there is no host + // and it is our default scheme. + if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { + // support for relative paths is pretty abysmal when the + // scheme is present, so axe it when possible + $def = $config->getDefinition('URI'); + if ($def->defaultScheme === $this->scheme) { + $this->scheme = null; + } + } + // validate username if (!is_null($this->userinfo)) { $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); @@ -96,32 +103,48 @@ class HTMLPurifier_URI // validate path $path_parts = array(); $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); - if (!is_null($this->host)) { + if (!is_null($this->host)) { // this catches $this->host === '' // path-abempty (hier and relative) + // http://www.example.com/my/path + // //www.example.com/my/path (looks odd, but works, and + // recognized by most browsers) + // (this set is valid or invalid on a scheme by scheme + // basis, so we'll deal with it later) + // file:///my/path + // ///my/path $this->path = $segments_encoder->encode($this->path); - } elseif ($this->path !== '' && $this->path[0] === '/') { - // path-absolute (hier and relative) - if (strlen($this->path) >= 2 && $this->path[1] === '/') { - // This shouldn't ever happen! - $this->path = ''; - } else { + } elseif ($this->path !== '') { + if ($this->path[0] === '/') { + // path-absolute (hier and relative) + // http:/my/path + // /my/path + if (strlen($this->path) >= 2 && $this->path[1] === '/') { + // This could happen if both the host gets stripped + // out + // http://my/path + // //my/path + $this->path = ''; + } else { + $this->path = $segments_encoder->encode($this->path); + } + } elseif (!is_null($this->scheme)) { + // path-rootless (hier) + // http:my/path + // Short circuit evaluation means we don't need to check nz $this->path = $segments_encoder->encode($this->path); - } - } elseif (!is_null($this->scheme) && $this->path !== '') { - // path-rootless (hier) - // Short circuit evaluation means we don't need to check nz - $this->path = $segments_encoder->encode($this->path); - } elseif (is_null($this->scheme) && $this->path !== '') { - // path-noscheme (relative) - // (once again, not checking nz) - $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); - $c = strpos($this->path, '/'); - if ($c !== false) { - $this->path = - $segment_nc_encoder->encode(substr($this->path, 0, $c)) . - $segments_encoder->encode(substr($this->path, $c)); } else { - $this->path = $segment_nc_encoder->encode($this->path); + // path-noscheme (relative) + // my/path + // (once again, not checking nz) + $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); + $c = strpos($this->path, '/'); + if ($c !== false) { + $this->path = + $segment_nc_encoder->encode(substr($this->path, 0, $c)) . + $segments_encoder->encode(substr($this->path, $c)); + } else { + $this->path = $segment_nc_encoder->encode($this->path); + } } } else { // path-empty (hier and relative) @@ -150,6 +173,9 @@ class HTMLPurifier_URI public function toString() { // reconstruct authority $authority = null; + // there is a rendering difference between a null authority + // (http:foo-bar) and an empty string authority + // (http:///foo-bar). if (!is_null($this->host)) { $authority = ''; if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@'; @@ -157,7 +183,12 @@ class HTMLPurifier_URI if(!is_null($this->port)) $authority .= ':' . $this->port; } - // reconstruct the result + // Reconstruct the result + // One might wonder about parsing quirks from browsers after + // this reconstruction. Unfortunately, parsing behavior depends + // on what *scheme* was employed (file:///foo is handled *very* + // differently than http:///foo), so unfortunately we have to + // defer to the schemes to do the right thing. $result = ''; if (!is_null($this->scheme)) $result .= $this->scheme . ':'; if (!is_null($authority)) $result .= '//' . $authority; diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php b/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php old mode 100755 new mode 100644 index 183fcfc7e..ea2b8fe24 --- a/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIDefinition.php @@ -52,7 +52,7 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition protected function setupFilters($config) { foreach ($this->registeredFilters as $name => $filter) { - $conf = $config->get('URI', $name); + $conf = $config->get('URI.' . $name); if ($conf !== false && $conf !== null) { $this->addFilter($filter, $config); } @@ -61,15 +61,15 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition } protected function setupMemberVariables($config) { - $this->host = $config->get('URI', 'Host'); - $base_uri = $config->get('URI', 'Base'); + $this->host = $config->get('URI.Host'); + $base_uri = $config->get('URI.Base'); if (!is_null($base_uri)) { $parser = new HTMLPurifier_URIParser(); $this->base = $parser->parse($base_uri); $this->defaultScheme = $this->base->scheme; if (is_null($this->host)) $this->host = $this->base->host; } - if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI', 'DefaultScheme'); + if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme'); } public function filter(&$uri, $config, $context) { diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php new file mode 100644 index 000000000..67538c7bb --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php @@ -0,0 +1,11 @@ +get('EmbeddedURI', true); + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php old mode 100755 new mode 100644 index bac56e8b8..045aa0992 --- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php @@ -5,7 +5,7 @@ class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter public $name = 'HostBlacklist'; protected $blacklist = array(); public function prepare($config) { - $this->blacklist = $config->get('URI', 'HostBlacklist'); + $this->blacklist = $config->get('URI.HostBlacklist'); return true; } public function filter(&$uri, $config, $context) { diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/MakeAbsolute.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php old mode 100755 new mode 100644 index 29ed0ed1f..efa10a645 --- a/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php @@ -9,10 +9,10 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter protected $replace = array(); public function prepare($config) { - $this->target = $config->get('URI', $this->name); + $this->target = $config->get('URI.' . $this->name); $this->parser = new HTMLPurifier_URIParser(); - $this->doEmbed = $config->get('URI', 'MungeResources'); - $this->secretKey = $config->get('URI', 'MungeSecretKey'); + $this->doEmbed = $config->get('URI.MungeResources'); + $this->secretKey = $config->get('URI.MungeSecretKey'); return true; } public function filter(&$uri, $config, $context) { @@ -23,6 +23,10 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter if (is_null($uri->host) || empty($scheme_obj->browsable)) { return true; } + // don't redirect if target host is our host + if ($uri->host === $config->getDefinition('URI')->host) { + return true; + } $this->makeReplace($uri, $config, $context); $this->replace = array_map('rawurlencode', $this->replace); diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIParser.php b/lib/htmlpurifier/library/HTMLPurifier/URIParser.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php old mode 100755 new mode 100644 index 039710fd1..25eb8410b --- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme.php @@ -3,11 +3,13 @@ /** * Validator for the components of a URI for a specific scheme */ -class HTMLPurifier_URIScheme +abstract class HTMLPurifier_URIScheme { /** - * Scheme's default port (integer) + * Scheme's default port (integer). If an explicit port number is + * specified that coincides with the default port, it will be + * elided. */ public $default_port = null; @@ -24,17 +26,62 @@ class HTMLPurifier_URIScheme public $hierarchical = false; /** - * Validates the components of a URI - * @note This implementation should be called by children if they define - * a default port, as it does port processing. - * @param $uri Instance of HTMLPurifier_URI + * Whether or not the URI may omit a hostname when the scheme is + * explicitly specified, ala file:///path/to/file. As of writing, + * 'file' is the only scheme that browsers support his properly. + */ + public $may_omit_host = false; + + /** + * Validates the components of a URI for a specific scheme. + * @param $uri Reference to a HTMLPurifier_URI object + * @param $config HTMLPurifier_Config object + * @param $context HTMLPurifier_Context object + * @return Bool success or failure + */ + public abstract function doValidate(&$uri, $config, $context); + + /** + * Public interface for validating components of a URI. Performs a + * bunch of default actions. Don't overload this method. + * @param $uri Reference to a HTMLPurifier_URI object * @param $config HTMLPurifier_Config object * @param $context HTMLPurifier_Context object * @return Bool success or failure */ public function validate(&$uri, $config, $context) { if ($this->default_port == $uri->port) $uri->port = null; - return true; + // kludge: browsers do funny things when the scheme but not the + // authority is set + if (!$this->may_omit_host && + // if the scheme is present, a missing host is always in error + (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) || + // if the scheme is not present, a *blank* host is in error, + // since this translates into '///path' which most browsers + // interpret as being 'http://path'. + (is_null($uri->scheme) && $uri->host === '') + ) { + do { + if (is_null($uri->scheme)) { + if (substr($uri->path, 0, 2) != '//') { + $uri->host = null; + break; + } + // URI is '////path', so we cannot nullify the + // host to preserve semantics. Try expanding the + // hostname instead (fall through) + } + // first see if we can manually insert a hostname + $host = $config->get('URI.Host'); + if (!is_null($host)) { + $uri->host = $host; + } else { + // we can't do anything sensible, reject the URL. + return false; + } + } while (false); + } + return $this->doValidate($uri, $config, $context); } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/data.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/data.php new file mode 100644 index 000000000..a5c43989e --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/data.php @@ -0,0 +1,96 @@ + true, + 'image/gif' => true, + 'image/png' => true, + ); + // this is actually irrelevant since we only write out the path + // component + public $may_omit_host = true; + + public function doValidate(&$uri, $config, $context) { + $result = explode(',', $uri->path, 2); + $is_base64 = false; + $charset = null; + $content_type = null; + if (count($result) == 2) { + list($metadata, $data) = $result; + // do some legwork on the metadata + $metas = explode(';', $metadata); + while(!empty($metas)) { + $cur = array_shift($metas); + if ($cur == 'base64') { + $is_base64 = true; + break; + } + if (substr($cur, 0, 8) == 'charset=') { + // doesn't match if there are arbitrary spaces, but + // whatever dude + if ($charset !== null) continue; // garbage + $charset = substr($cur, 8); // not used + } else { + if ($content_type !== null) continue; // garbage + $content_type = $cur; + } + } + } else { + $data = $result[0]; + } + if ($content_type !== null && empty($this->allowed_types[$content_type])) { + return false; + } + if ($charset !== null) { + // error; we don't allow plaintext stuff + $charset = null; + } + $data = rawurldecode($data); + if ($is_base64) { + $raw_data = base64_decode($data); + } else { + $raw_data = $data; + } + // XXX probably want to refactor this into a general mechanism + // for filtering arbitrary content types + $file = tempnam("/tmp", ""); + file_put_contents($file, $raw_data); + if (function_exists('exif_imagetype')) { + $image_code = exif_imagetype($file); + } elseif (function_exists('getimagesize')) { + set_error_handler(array($this, 'muteErrorHandler')); + $info = getimagesize($file); + restore_error_handler(); + if ($info == false) return false; + $image_code = $info[2]; + } else { + trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR); + } + $real_content_type = image_type_to_mime_type($image_code); + if ($real_content_type != $content_type) { + // we're nice guys; if the content type is something else we + // support, change it over + if (empty($this->allowed_types[$real_content_type])) return false; + $content_type = $real_content_type; + } + // ok, it's kosher, rewrite what we need + $uri->userinfo = null; + $uri->host = null; + $uri->port = null; + $uri->fragment = null; + $uri->query = null; + $uri->path = "$content_type;base64," . base64_encode($raw_data); + return true; + } + + public function muteErrorHandler($errno, $errstr) {} + +} + diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/file.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/file.php new file mode 100644 index 000000000..d74a3f198 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/file.php @@ -0,0 +1,32 @@ +userinfo = null; + // file:// makes no provisions for accessing the resource + $uri->port = null; + // While it seems to work on Firefox, the querystring has + // no possible effect and is thus stripped. + $uri->query = null; + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php old mode 100755 new mode 100644 index 5849bf7ff..0fb2abf64 --- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/ftp.php @@ -9,8 +9,7 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme { public $browsable = true; // usually public $hierarchical = true; - public function validate(&$uri, $config, $context) { - parent::validate($uri, $config, $context); + public function doValidate(&$uri, $config, $context) { $uri->query = null; // typecode check diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php old mode 100755 new mode 100644 index b097a31d6..959b8daff --- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/http.php @@ -9,8 +9,7 @@ class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme { public $browsable = true; public $hierarchical = true; - public function validate(&$uri, $config, $context) { - parent::validate($uri, $config, $context); + public function doValidate(&$uri, $config, $context) { $uri->userinfo = null; return true; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/https.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/mailto.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/mailto.php old mode 100755 new mode 100644 index c1e2cd5aa..9db4cb23f --- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/mailto.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/mailto.php @@ -12,9 +12,9 @@ class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme { public $browsable = false; + public $may_omit_host = true; - public function validate(&$uri, $config, $context) { - parent::validate($uri, $config, $context); + public function doValidate(&$uri, $config, $context) { $uri->userinfo = null; $uri->host = null; $uri->port = null; diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php old mode 100755 new mode 100644 index f5f54f4f5..84a6748d8 --- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/news.php @@ -6,9 +6,9 @@ class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme { public $browsable = false; + public $may_omit_host = true; - public function validate(&$uri, $config, $context) { - parent::validate($uri, $config, $context); + public function doValidate(&$uri, $config, $context) { $uri->userinfo = null; $uri->host = null; $uri->port = null; diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php old mode 100755 new mode 100644 index 5bf93ea78..4ccea0dfc --- a/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php @@ -8,8 +8,7 @@ class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme { public $default_port = 119; public $browsable = false; - public function validate(&$uri, $config, $context) { - parent::validate($uri, $config, $context); + public function doValidate(&$uri, $config, $context) { $uri->userinfo = null; $uri->query = null; return true; diff --git a/lib/htmlpurifier/library/HTMLPurifier/URISchemeRegistry.php b/lib/htmlpurifier/library/HTMLPurifier/URISchemeRegistry.php old mode 100755 new mode 100644 index d24732c18..576bf7b6d --- a/lib/htmlpurifier/library/HTMLPurifier/URISchemeRegistry.php +++ b/lib/htmlpurifier/library/HTMLPurifier/URISchemeRegistry.php @@ -36,21 +36,20 @@ class HTMLPurifier_URISchemeRegistry */ public function getScheme($scheme, $config, $context) { if (!$config) $config = HTMLPurifier_Config::createDefault(); - $null = null; // for the sake of passing by reference // important, otherwise attacker could include arbitrary file - $allowed_schemes = $config->get('URI', 'AllowedSchemes'); - if (!$config->get('URI', 'OverrideAllowedSchemes') && + $allowed_schemes = $config->get('URI.AllowedSchemes'); + if (!$config->get('URI.OverrideAllowedSchemes') && !isset($allowed_schemes[$scheme]) ) { - return $null; + return; } if (isset($this->schemes[$scheme])) return $this->schemes[$scheme]; - if (!isset($allowed_schemes[$scheme])) return $null; + if (!isset($allowed_schemes[$scheme])) return; $class = 'HTMLPurifier_URIScheme_' . $scheme; - if (!class_exists($class)) return $null; + if (!class_exists($class)) return; $this->schemes[$scheme] = new $class(); return $this->schemes[$scheme]; } diff --git a/lib/htmlpurifier/library/HTMLPurifier/UnitConverter.php b/lib/htmlpurifier/library/HTMLPurifier/UnitConverter.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/VarParser.php b/lib/htmlpurifier/library/HTMLPurifier/VarParser.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/VarParser/Flexible.php b/lib/htmlpurifier/library/HTMLPurifier/VarParser/Flexible.php old mode 100755 new mode 100644 index c954250e9..21b87675a --- a/lib/htmlpurifier/library/HTMLPurifier/VarParser/Flexible.php +++ b/lib/htmlpurifier/library/HTMLPurifier/VarParser/Flexible.php @@ -62,7 +62,7 @@ class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser foreach ($var as $keypair) { $c = explode(':', $keypair, 2); if (!isset($c[1])) continue; - $nvar[$c[0]] = $c[1]; + $nvar[trim($c[0])] = trim($c[1]); } $var = $nvar; } @@ -79,8 +79,15 @@ class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser return $new; } else break; } + if ($type === self::ALIST) { + trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING); + return array_values($var); + } if ($type === self::LOOKUP) { foreach ($var as $key => $value) { + if ($value !== true) { + trigger_error("Lookup array has non-true value at key '$key'; maybe your input array was not indexed numerically", E_USER_WARNING); + } $var[$key] = true; } } diff --git a/lib/htmlpurifier/library/HTMLPurifier/VarParser/Native.php b/lib/htmlpurifier/library/HTMLPurifier/VarParser/Native.php old mode 100755 new mode 100644 diff --git a/lib/htmlpurifier/library/HTMLPurifier/VarParserException.php b/lib/htmlpurifier/library/HTMLPurifier/VarParserException.php old mode 100755 new mode 100644