From 74a247fc5c4e458f1aed118ed346f7ec53de5c3d Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 17 Feb 2022 22:38:38 +0300 Subject: [PATCH] rewrite_relative: whitelist specific schemes for URLs with 'known' content-types i.e. specified for enclosures --- classes/rssutils.php | 2 +- classes/urlhelper.php | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/classes/rssutils.php b/classes/rssutils.php index d7284a7bc..9995b0e43 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -736,7 +736,7 @@ class RSSUtils { // TODO: Just use FeedEnclosure (and modify it to cover whatever justified this)? $e_item = array( - UrlHelper::rewrite_relative($site_url, $e->link), + UrlHelper::rewrite_relative($site_url, $e->link, "", "", $e->type), $e->type, $e->length, $e->title, $e->width, $e->height); // Yet another episode of "mysql utf8_general_ci is gimped" diff --git a/classes/urlhelper.php b/classes/urlhelper.php index 9ac7781ef..22fe067d5 100644 --- a/classes/urlhelper.php +++ b/classes/urlhelper.php @@ -6,6 +6,10 @@ class UrlHelper { "tel" ]; + const EXTRA_SCHEMES_BY_CONTENT_TYPE = [ + "application/x-bittorrent" => [ "magnet" ], + ]; + // TODO: class properties can be switched to PHP typing if/when the minimum PHP_VERSION is raised to 7.4.0+ /** @var string */ static $fetch_last_error; @@ -52,10 +56,16 @@ class UrlHelper { * @param string $rel_url Possibly relative URL in the document * @param string $owner_element Owner element tag name (i.e. "a") (optional) * @param string $owner_attribute Owner attribute (i.e. "href") (optional) + * @param string $content_type URL content type as specified by enclosures, etc. * * @return false|string Absolute URL or false on failure (either during URL parsing or validation) */ - public static function rewrite_relative($base_url, $rel_url, string $owner_element = "", string $owner_attribute = "") { + public static function rewrite_relative($base_url, + $rel_url, + string $owner_element = "", + string $owner_attribute = "", + string $content_type = "") { + $rel_parts = parse_url($rel_url); /** @@ -80,6 +90,11 @@ class UrlHelper { $owner_element == "a" && $owner_attribute == "href") { return $rel_url; + // allow some extra schemes for links with feed-specified content type i.e. enclosures + } else if ($content_type && + is_array(self::EXTRA_SCHEMES_BY_CONTENT_TYPE[$content_type]) && + in_array($rel_parts["scheme"], self::EXTRA_SCHEMES_BY_CONTENT_TYPE[$content_type])) { + return $rel_url; // allow limited subset of inline base64-encoded images for IMG elements } else if (($rel_parts["scheme"] ?? "") == "data" && preg_match('%^image/(webp|gif|jpg|png|svg);base64,%', $rel_parts["path"]) &&