UrlHelper::rewrite_relative():

- support invoking specifying owner URL element/attribute
 - restrict mailto/magnet/tel schemes for A href
 - allow some data: base64 image types for IMG src

Sanitizer::sanitize():

 - when checking href and src attributes, pass element tagname and attribute to rewrite_relative()
This commit is contained in:
Andrew Dolgov 2021-06-18 11:20:57 +03:00
parent 34807bacd4
commit e9c062a189
2 changed files with 20 additions and 7 deletions

View File

@ -74,7 +74,7 @@ class Sanitizer {
if ($entry->hasAttribute('href')) {
$entry->setAttribute('href',
rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
UrlHelper::rewrite_relative($rewrite_base_url, $entry->getAttribute('href'), $entry->tagName, "href"));
$entry->setAttribute('rel', 'noopener noreferrer');
$entry->setAttribute("target", "_blank");
@ -82,7 +82,7 @@ class Sanitizer {
if ($entry->hasAttribute('src')) {
$entry->setAttribute('src',
rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src')));
UrlHelper::rewrite_relative($rewrite_base_url, $entry->getAttribute('src'), $entry->tagName, "src"));
}
if ($entry->nodeName == 'img') {
@ -94,7 +94,7 @@ class Sanitizer {
$matches = RSSUtils::decode_srcset($entry->getAttribute('srcset'));
for ($i = 0; $i < count($matches); $i++) {
$matches[$i]["url"] = rewrite_relative_url($rewrite_base_url, $matches[$i]["url"]);
$matches[$i]["url"] = UrlHelper::rewrite_relative($rewrite_base_url, $matches[$i]["url"]);
}
$entry->setAttribute("srcset", RSSUtils::encode_srcset($matches));

View File

@ -1,6 +1,6 @@
<?php
class UrlHelper {
const ALLOWED_RELATIVE_SCHEMES = [
const EXTRA_HREF_SCHEMES = [
"magnet",
"mailto",
"tel"
@ -27,22 +27,35 @@ class UrlHelper {
/**
* Converts a (possibly) relative URL to a absolute one, using provided base URL.
* Provides some exceptions for additional schemes like data: if called with owning element/attribute.
*
* @param string $base_url Base URL (i.e. from where the document is)
* @param string $rel_url Possibly relative URL in the document
* @param string $owner_element Owner node tag name (i.e. A) (optional)
* @param string $owner_attribute Owner attribute (i.e. href) (optional)
*
* @return string Absolute URL
*/
public static function rewrite_relative($base_url, $rel_url) {
public static function rewrite_relative($base_url, $rel_url, string $owner_element = "", string $owner_attribute = "") {
$rel_parts = parse_url($rel_url);
if (!empty($rel_parts['host']) && !empty($rel_parts['scheme'])) {
return self::validate($rel_url);
// protocol-relative URL (rare but they exist)
} else if (strpos($rel_url, "//") === 0) {
# protocol-relative URL (rare but they exist)
return self::validate("https:" . $rel_url);
} else if (array_search($rel_parts["scheme"] ?? "", self::ALLOWED_RELATIVE_SCHEMES, true) !== false) {
// allow some extra schemes for A href
} else if (in_array($rel_parts["scheme"] ?? "", self::EXTRA_HREF_SCHEMES) &&
$owner_element == "a" &&
$owner_attribute == "href") {
return $rel_url;
// allow limited subset of inline base64-encoded images for IMG elements
} else if ($rel_parts["scheme"] == "data" &&
preg_match('%^image/(webp|gif|jpg|png|svg);base64,%', $rel_parts["path"]) &&
$owner_element == "img" &&
$owner_attribute == "src") {
return $rel_url;
} else {
$base_parts = parse_url($base_url);