From 6322fc6870f5df3878670a690f13a26a4ce76d20 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 7 Jul 2015 08:44:31 +0300 Subject: [PATCH] af_redditimgur: fix youtube regexp, try to inline images again if parsed using readability --- plugins/af_redditimgur/init.php | 411 ++++++++++++++++---------------- 1 file changed, 201 insertions(+), 210 deletions(-) diff --git a/plugins/af_redditimgur/init.php b/plugins/af_redditimgur/init.php index d42f4c601..265999e2b 100644 --- a/plugins/af_redditimgur/init.php +++ b/plugins/af_redditimgur/init.php @@ -69,239 +69,230 @@ class Af_RedditImgur extends Plugin { echo __("Configuration saved"); } + private function inline_stuff($article, &$doc, $xpath) { + + $entries = $xpath->query('(//a[@href]|//img[@src])'); + + $found = false; + + foreach ($entries as $entry) { + if ($entry->hasAttribute("href")) { + + $matches = array(); + + if (preg_match("/https?:\/\/gfycat.com\/([a-z]+)$/i", $entry->getAttribute("href"), $matches)) { + + $tmp = fetch_file_contents($entry->getAttribute("href")); + + if ($tmp) { + $tmpdoc = new DOMDocument(); + @$tmpdoc->loadHTML($tmp); + + if ($tmpdoc) { + $tmpxpath = new DOMXPath($tmpdoc); + $source_meta = $tmpxpath->query("//meta[@property='og:video']")->item(0); + + if ($source_meta) { + $source_stream = $source_meta->getAttribute("content"); + + if ($source_stream) { + $this->handle_as_video($doc, $entry, $source_stream); + $found = 1; + } + } + } + } + + } + + if (preg_match("/\.(gifv)$/i", $entry->getAttribute("href"))) { + + $source_stream = str_replace(".gifv", ".mp4", $entry->getAttribute("href")); + $this->handle_as_video($doc, $entry, $source_stream); + + $found = true; + } + + $matches = array(); + if (preg_match("/\/\/www\.youtube\.com\/v\/([\w-]+)/", $entry->getAttribute("href"), $matches) || + preg_match("/\/\/www\.youtube\.com\/watch\?v=([\w-]+)/", $entry->getAttribute("href"), $matches) || + preg_match("/\/\/youtu.be\/([\w-]+)/", $entry->getAttribute("href"), $matches)) { + + $vid_id = $matches[1]; + + $iframe = $doc->createElement("iframe"); + $iframe->setAttribute("class", "youtube-player"); + $iframe->setAttribute("type", "text/html"); + $iframe->setAttribute("width", "640"); + $iframe->setAttribute("height", "385"); + $iframe->setAttribute("src", "https://www.youtube.com/embed/$vid_id"); + $iframe->setAttribute("allowfullscreen", "1"); + $iframe->setAttribute("frameborder", "0"); + + $br = $doc->createElement('br'); + $entry->parentNode->insertBefore($iframe, $entry); + $entry->parentNode->insertBefore($br, $entry); + + $found = true; + } + + if (preg_match("/\.(jpg|jpeg|gif|png)(\?[0-9][0-9]*)?$/i", $entry->getAttribute("href"))) { + $img = $doc->createElement('img'); + $img->setAttribute("src", $entry->getAttribute("href")); + + $br = $doc->createElement('br'); + $entry->parentNode->insertBefore($img, $entry); + $entry->parentNode->insertBefore($br, $entry); + + $found = true; + } + + // links to imgur pages + $matches = array(); + if (preg_match("/^https?:\/\/(m\.)?imgur.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches)) { + + $token = $matches[2]; + + $album_content = fetch_file_contents($entry->getAttribute("href"), + false, false, false, false, 10); + + if ($album_content && $token) { + $adoc = new DOMDocument(); + @$adoc->loadHTML($album_content); + + if ($adoc) { + $axpath = new DOMXPath($adoc); + $aentries = $axpath->query('(//img[@src])'); + + foreach ($aentries as $aentry) { + if (preg_match("/\/\/i.imgur.com\/$token\./", $aentry->getAttribute("src"))) { + $img = $doc->createElement('img'); + $img->setAttribute("src", $aentry->getAttribute("src")); + + $br = $doc->createElement('br'); + + $entry->parentNode->insertBefore($img, $entry); + $entry->parentNode->insertBefore($br, $entry); + + $found = true; + + break; + } + } + } + } + } + + // linked albums, ffs + if (preg_match("/^https?:\/\/imgur.com\/(a|album|gallery)\/[^\.]+$/", $entry->getAttribute("href"), $matches)) { + + $album_content = fetch_file_contents($entry->getAttribute("href"), + false, false, false, false, 10); + + if ($album_content) { + $adoc = new DOMDocument(); + @$adoc->loadHTML($album_content); + + if ($adoc) { + $axpath = new DOMXPath($adoc); + $aentries = $axpath->query("//meta[@property='og:image']"); + $urls = array(); + + foreach ($aentries as $aentry) { + + if (!in_array($aentry->getAttribute("content"), $urls)) { + $img = $doc->createElement('img'); + $img->setAttribute("src", $aentry->getAttribute("content")); + $entry->parentNode->insertBefore($doc->createElement('br'), $entry); + + $br = $doc->createElement('br'); + + $entry->parentNode->insertBefore($img, $entry); + $entry->parentNode->insertBefore($br, $entry); + + array_push($urls, $aentry->getAttribute("content")); + + $found = true; + } + } + } + } + } + } + + // remove tiny thumbnails + if ($entry->hasAttribute("src")) { + if ($entry->parentNode && $entry->parentNode->parentNode) { + $entry->parentNode->parentNode->removeChild($entry->parentNode); + } + } + } + + return $found; + } + function hook_article_filter($article) { if (strpos($article["link"], "reddit.com/r/") !== FALSE) { - $doc = new DOMDocument(); - @$doc->loadHTML($article["content"]); + $doc = new DOMDocument(); + @$doc->loadHTML($article["content"]); + $xpath = new DOMXPath($doc); - if ($doc) { - $xpath = new DOMXPath($doc); - $entries = $xpath->query('(//a[@href]|//img[@src])'); + $found = $this->inline_stuff($article, $doc, $xpath); - $found = false; + if (!$found && $this->host->get($this, "enable_readability") && mb_strlen(strip_tags($article["content"])) <= 150) { + if (!class_exists("Readability")) require_once(__DIR__ . "/classes/Readability.php"); - foreach ($entries as $entry) { - if ($entry->hasAttribute("href")) { + $content_link = $xpath->query("(//a[contains(., '[link]')])")->item(0); - $matches = array(); + if ($content_link && strpos($content_link->getAttribute("href"), "reddit.com") === FALSE) { - if (preg_match("/https?:\/\/gfycat.com\/([a-z]+)$/i", $entry->getAttribute("href"), $matches)) { + $tmp = fetch_file_contents($content_link->getAttribute("href")); - $tmp = fetch_file_contents($entry->getAttribute("href")); + if ($tmp) { + $r = new Readability($tmp, $content_link->getAttribute("href")); - if ($tmp) { - $tmpdoc = new DOMDocument(); - @$tmpdoc->loadHTML($tmp); + if ($r->init()) { + //$article["content"] = $r->articleContent->innerHTML . "
" . $article["content"]; - if ($tmpdoc) { - $tmpxpath = new DOMXPath($tmpdoc); - $source_meta = $tmpxpath->query("//meta[@property='og:video']")->item(0); + $tmpxpath = new DOMXPath($r->dom); - if ($source_meta) { - $source_stream = $source_meta->getAttribute("content"); + $entries = $tmpxpath->query('(//a[@href]|//img[@src])'); + + foreach ($entries as $entry) { + if ($entry->hasAttribute("href")) { + $entry->setAttribute("href", + rewrite_relative_url($content_link->getAttribute("href"), $entry->getAttribute("href"))); + + } + + if ($entry->hasAttribute("src")) { + $entry->setAttribute("src", + rewrite_relative_url($content_link->getAttribute("href"), $entry->getAttribute("src"))); - if ($source_stream) { - $this->handle_as_video($doc, $entry, $source_stream); - $found = 1; - } - } - } } } - if (preg_match("/\.(gifv)$/i", $entry->getAttribute("href"))) { + $article["content"] = $r->articleContent->innerHTML . "
" . $article["content"]; - /*$video = $doc->createElement('video'); - $video->setAttribute("autoplay", "1"); - $video->setAttribute("loop", "1"); + $doc = new DOMDocument(); + @$doc->loadHTML($article["content"]); + $xpath = new DOMXPath($doc); - $source = $doc->createElement('source'); - $source->setAttribute("src", str_replace(".gifv", ".mp4", $entry->getAttribute("href"))); - $source->setAttribute("type", "video/mp4"); - - $video->appendChild($source); - - $br = $doc->createElement('br'); - $entry->parentNode->insertBefore($video, $entry); - $entry->parentNode->insertBefore($br, $entry); - - $img = $doc->createElement('img'); - $img->setAttribute("src", - "data:image/gif;base64,R0lGODlhAQABAAD/ACwAAAAAAQABAAACADs%3D"); - - $entry->parentNode->insertBefore($img, $entry);*/ - - $source_stream = str_replace(".gifv", ".mp4", $entry->getAttribute("href")); - $this->handle_as_video($doc, $entry, $source_stream); - - $found = true; - } - - $matches = array(); - if (preg_match("/\/\/www\.youtube\.com\/v\/([\w-]+)/", $entry->getAttribute("href"), $matches) || - preg_match("/\/\/www\.youtube\.com\/watch?v=([\w-]+)/", $entry->getAttribute("href"), $matches) || - preg_match("/\/\/youtu.be\/([\w-]+)/", $entry->getAttribute("href"), $matches)) { - - $vid_id = $matches[1]; - - $iframe = $doc->createElement("iframe"); - $iframe->setAttribute("class", "youtube-player"); - $iframe->setAttribute("type", "text/html"); - $iframe->setAttribute("width", "640"); - $iframe->setAttribute("height", "385"); - $iframe->setAttribute("src", "https://www.youtube.com/embed/$vid_id"); - $iframe->setAttribute("allowfullscreen", "1"); - $iframe->setAttribute("frameborder", "0"); - - $br = $doc->createElement('br'); - $entry->parentNode->insertBefore($iframe, $entry); - $entry->parentNode->insertBefore($br, $entry); - - $found = true; - } - - if (preg_match("/\.(jpg|jpeg|gif|png)(\?[0-9][0-9]*)?$/i", $entry->getAttribute("href"))) { - $img = $doc->createElement('img'); - $img->setAttribute("src", $entry->getAttribute("href")); - - $br = $doc->createElement('br'); - $entry->parentNode->insertBefore($img, $entry); - $entry->parentNode->insertBefore($br, $entry); - - $found = true; - } - - // links to imgur pages - $matches = array(); - if (preg_match("/^https?:\/\/(m\.)?imgur.com\/([^\.\/]+$)/", $entry->getAttribute("href"), $matches)) { - - $token = $matches[2]; - - $album_content = fetch_file_contents($entry->getAttribute("href"), - false, false, false, false, 10); - - if ($album_content && $token) { - $adoc = new DOMDocument(); - @$adoc->loadHTML($album_content); - - if ($adoc) { - $axpath = new DOMXPath($adoc); - $aentries = $axpath->query('(//img[@src])'); - - foreach ($aentries as $aentry) { - if (preg_match("/\/\/i.imgur.com\/$token\./", $aentry->getAttribute("src"))) { - $img = $doc->createElement('img'); - $img->setAttribute("src", $aentry->getAttribute("src")); - - $br = $doc->createElement('br'); - - $entry->parentNode->insertBefore($img, $entry); - $entry->parentNode->insertBefore($br, $entry); - - $found = true; - - break; - } - } - } - } - } - - // linked albums, ffs - if (preg_match("/^https?:\/\/imgur.com\/(a|album|gallery)\/[^\.]+$/", $entry->getAttribute("href"), $matches)) { - - $album_content = fetch_file_contents($entry->getAttribute("href"), - false, false, false, false, 10); - - if ($album_content) { - $adoc = new DOMDocument(); - @$adoc->loadHTML($album_content); - - if ($adoc) { - $axpath = new DOMXPath($adoc); - $aentries = $axpath->query("//meta[@property='og:image']"); - $urls = array(); - - foreach ($aentries as $aentry) { - - if (!in_array($aentry->getAttribute("content"), $urls)) { - $img = $doc->createElement('img'); - $img->setAttribute("src", $aentry->getAttribute("content")); - $entry->parentNode->insertBefore($doc->createElement('br'), $entry); - - $br = $doc->createElement('br'); - - $entry->parentNode->insertBefore($img, $entry); - $entry->parentNode->insertBefore($br, $entry); - - array_push($urls, $aentry->getAttribute("content")); - - $found = true; - } - } - } - } - } - } - - // remove tiny thumbnails - if ($entry->hasAttribute("src")) { - if ($entry->parentNode && $entry->parentNode->parentNode) { - $entry->parentNode->parentNode->removeChild($entry->parentNode); - } + $found = $this->inline_stuff($article, $doc, $xpath); } } - if (!$found && $this->host->get($this, "enable_readability") && mb_strlen(strip_tags($article["content"])) <= 150) { - if (!class_exists("Readability")) require_once(__DIR__ . "/classes/Readability.php"); - - $content_link = $xpath->query("(//a[contains(., '[link]')])")->item(0); - - if ($content_link && strpos($content_link->getAttribute("href"), "reddit.com") === FALSE) { - - $tmp = fetch_file_contents($content_link->getAttribute("href")); - - if ($tmp) { - $r = new Readability($tmp, $content_link->getAttribute("href")); - - if ($r->init()) { - //$article["content"] = $r->articleContent->innerHTML . "
" . $article["content"]; - - $tmpxpath = new DOMXPath($r->dom); - - $entries = $tmpxpath->query('(//a[@href]|//img[@src])'); - - foreach ($entries as $entry) { - if ($entry->hasAttribute("href")) { - $entry->setAttribute("href", - rewrite_relative_url($content_link->getAttribute("href"), $entry->getAttribute("href"))); - - } - - if ($entry->hasAttribute("src")) { - $entry->setAttribute("src", - rewrite_relative_url($content_link->getAttribute("href"), $entry->getAttribute("src"))); - - } - - } - - $article["content"] = $r->articleContent->innerHTML . "
" . $article["content"]; - } - } - - } - - } - - $node = $doc->getElementsByTagName('body')->item(0); - - if ($node && $found) { - $article["content"] = $doc->saveXML($node); - } } + + } + + $node = $doc->getElementsByTagName('body')->item(0); + + if ($node && $found) { + $article["content"] = $doc->saveXML($node); + } } return $article;