- don't fail on non-ascii characters when validating URLs

- fix IDN hostnames not being converted properly
This commit is contained in:
Andrew Dolgov 2020-09-22 14:37:45 +03:00
parent e3780050e7
commit b5710baf34
1 changed files with 15 additions and 6 deletions

View File

@ -48,23 +48,32 @@ class UrlHelper {
// extended filtering involves validation for safe ports and loopback // extended filtering involves validation for safe ports and loopback
static function validate($url, $extended_filtering = false) { static function validate($url, $extended_filtering = false) {
$url = clean($url); $url = clean(rawurldecode($url));
# fix protocol-relative URLs # fix protocol-relative URLs
if (strpos($url, "//") === 0) if (strpos($url, "//") === 0)
$url = "https:" . $url; $url = "https:" . $url;
if (filter_var($url, FILTER_VALIDATE_URL) === false)
return false;
$tokens = parse_url($url); $tokens = parse_url($url);
// this isn't really necessary because filter_var(... FILTER_VALIDATE_URL) requires host and scheme
// as per https://php.watch/versions/7.3/filter-var-flag-deprecation but it might save time
if (!$tokens['host']) if (!$tokens['host'])
return false; return false;
if (!in_array(strtolower($tokens['scheme']), ['http', 'https'])) if (!in_array(strtolower($tokens['scheme']), ['http', 'https']))
return false; return false;
if ($tokens['path']) {
// urlencode path, but respect "/" path delimiters
$tokens['path'] = implode("/", array_map("rawurlencode", explode("/", $tokens['path'])));
}
$url = self::build_url($tokens);
if (filter_var($url, FILTER_VALIDATE_URL) === false)
return false;
if ($extended_filtering) { if ($extended_filtering) {
if (!in_array($tokens['port'], [80, 443, ''])) if (!in_array($tokens['port'], [80, 443, '']))
return false; return false;
@ -76,8 +85,8 @@ class UrlHelper {
//convert IDNA hostname to punycode if possible //convert IDNA hostname to punycode if possible
if (function_exists("idn_to_ascii")) { if (function_exists("idn_to_ascii")) {
if (mb_detect_encoding($tokens['host']) != 'ASCII') { if (mb_detect_encoding($tokens['host']) != 'ASCII') {
$parts['host'] = idn_to_ascii($tokens['host']); $tokens['host'] = idn_to_ascii($tokens['host']);
$url = UrlHelper::build_url($tokens); $url = self::build_url($tokens);
} }
} }