af_readability: Use new global fetch_effective_url for canonical URL.

This commit is contained in:
JustAMacUser 2018-02-11 21:02:17 +00:00
parent 1aeb282be1
commit e26cb06179
1 changed files with 6 additions and 27 deletions

View File

@ -137,29 +137,11 @@ class Af_Readability extends Plugin {
} }
public function extract_content($url) { public function extract_content($url) {
global $fetch_effective_url;
if (!class_exists("Readability")) require_once(dirname(dirname(__DIR__)). "/lib/readability/Readability.php"); if (!class_exists("Readability")) require_once(dirname(dirname(__DIR__)). "/lib/readability/Readability.php");
if (!defined('NO_CURL') && function_exists('curl_init') && !ini_get("open_basedir")) { $tmp = fetch_file_contents(array("url" => $url, "type" => "text/html"));
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
@curl_exec($ch);
$content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
if (strpos($content_type, "text/html") === FALSE)
return false;
$effective_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
}
$tmp = fetch_file_contents($url);
if ($tmp && mb_strlen($tmp) < 1024 * 500) { if ($tmp && mb_strlen($tmp) < 1024 * 500) {
$tmpdoc = new DOMDocument("1.0", "UTF-8"); $tmpdoc = new DOMDocument("1.0", "UTF-8");
@ -167,9 +149,6 @@ class Af_Readability extends Plugin {
if (!$tmpdoc->loadHTML('<?xml encoding="utf-8" ?>\n' . $tmp)) if (!$tmpdoc->loadHTML('<?xml encoding="utf-8" ?>\n' . $tmp))
return false; return false;
if (!isset($effective_url))
$effective_url = $url;
if (strtolower($tmpdoc->encoding) != 'utf-8') { if (strtolower($tmpdoc->encoding) != 'utf-8') {
$tmpxpath = new DOMXPath($tmpdoc); $tmpxpath = new DOMXPath($tmpdoc);
@ -180,7 +159,7 @@ class Af_Readability extends Plugin {
$tmp = $tmpdoc->saveHTML(); $tmp = $tmpdoc->saveHTML();
} }
$r = new Readability($tmp, $url); $r = new Readability($tmp, $fetch_effective_url);
if ($r->init()) { if ($r->init()) {
$tmpxpath = new DOMXPath($r->dom); $tmpxpath = new DOMXPath($r->dom);
@ -190,13 +169,13 @@ class Af_Readability extends Plugin {
foreach ($entries as $entry) { foreach ($entries as $entry) {
if ($entry->hasAttribute("href")) { if ($entry->hasAttribute("href")) {
$entry->setAttribute("href", $entry->setAttribute("href",
rewrite_relative_url($effective_url, $entry->getAttribute("href"))); rewrite_relative_url($fetch_effective_url, $entry->getAttribute("href")));
} }
if ($entry->hasAttribute("src")) { if ($entry->hasAttribute("src")) {
$entry->setAttribute("src", $entry->setAttribute("src",
rewrite_relative_url($effective_url, $entry->getAttribute("src"))); rewrite_relative_url($fetch_effective_url, $entry->getAttribute("src")));
} }