af_readability: add missing file
This commit is contained in:
parent
865c54abcb
commit
3e4701116d
|
@ -88,7 +88,7 @@ class Backend extends Handler {
|
|||
}
|
||||
|
||||
function help() {
|
||||
$topic = basename(clean($_REQUEST["topic"])); // only one for now
|
||||
$topic = clean_filename($_REQUEST["topic"]); // only one for now
|
||||
|
||||
if ($topic == "main") {
|
||||
$info = get_hotkeys_info();
|
||||
|
|
|
@ -1203,30 +1203,30 @@ class Handler_Public extends Handler {
|
|||
public function pluginhandler() {
|
||||
$host = new PluginHost();
|
||||
|
||||
$plugin = basename(clean($_REQUEST["plugin"]));
|
||||
$plugin_name = clean_filename($_REQUEST["plugin"]);
|
||||
$method = clean($_REQUEST["pmethod"]);
|
||||
|
||||
$host->load($plugin, PluginHost::KIND_USER, 0);
|
||||
$host->load($plugin_name, PluginHost::KIND_USER, 0);
|
||||
$host->load_data();
|
||||
|
||||
$pclass = $host->get_plugin($plugin);
|
||||
$plugin = $host->get_plugin($plugin_name);
|
||||
|
||||
if ($pclass) {
|
||||
if (method_exists($pclass, $method)) {
|
||||
if ($pclass->is_public_method($method)) {
|
||||
$pclass->$method();
|
||||
if ($plugin) {
|
||||
if (method_exists($plugin, $method)) {
|
||||
if ($plugin->is_public_method($method)) {
|
||||
$plugin->$method();
|
||||
} else {
|
||||
user_error("pluginhandler: Requested private method '$method' of plugin '$plugin'.");
|
||||
user_error("PluginHandler[PUBLIC]: Requested private method '$method' of plugin '$plugin_name'.", E_USER_WARNING);
|
||||
header("Content-Type: text/json");
|
||||
print error_json(6);
|
||||
}
|
||||
} else {
|
||||
user_error("pluginhandler: Requested unknown method '$method' of plugin '$plugin'.");
|
||||
user_error("PluginHandler[PUBLIC]: Requested unknown method '$method' of plugin '$plugin_name'.", E_USER_WARNING);
|
||||
header("Content-Type: text/json");
|
||||
print error_json(13);
|
||||
}
|
||||
} else {
|
||||
user_error("pluginhandler: Requested method '$method' of unknown plugin '$plugin'.");
|
||||
user_error("PluginHandler[PUBLIC]: Requested method '$method' of unknown plugin '$plugin_name'.", E_USER_WARNING);
|
||||
header("Content-Type: text/json");
|
||||
print error_json(14);
|
||||
}
|
||||
|
|
|
@ -5,15 +5,18 @@ class PluginHandler extends Handler_Protected {
|
|||
}
|
||||
|
||||
function catchall($method) {
|
||||
$plugin = PluginHost::getInstance()->get_plugin(clean($_REQUEST["plugin"]));
|
||||
$plugin_name = clean($_REQUEST["plugin"]);
|
||||
$plugin = PluginHost::getInstance()->get_plugin($plugin_name);
|
||||
|
||||
if ($plugin) {
|
||||
if (method_exists($plugin, $method)) {
|
||||
$plugin->$method();
|
||||
} else {
|
||||
user_error("PluginHandler: Requested unknown method '$method' of plugin '$plugin_name'.", E_USER_WARNING);
|
||||
print error_json(13);
|
||||
}
|
||||
} else {
|
||||
user_error("PluginHandler: Requested method '$method' of unknown plugin '$plugin_name'.", E_USER_WARNING);
|
||||
print error_json(14);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -186,7 +186,7 @@ class PluginHost {
|
|||
|
||||
foreach ($plugins as $class) {
|
||||
$class = trim($class);
|
||||
$class_file = strtolower(basename($class));
|
||||
$class_file = strtolower(clean_filename($class));
|
||||
|
||||
if (!is_dir(__DIR__."/../plugins/$class_file") &&
|
||||
!is_dir(__DIR__."/../plugins.local/$class_file")) continue;
|
||||
|
|
|
@ -572,7 +572,7 @@ class RPC extends Handler_Protected {
|
|||
|
||||
function log() {
|
||||
$msg = clean($_REQUEST['msg']);
|
||||
$file = basename(clean($_REQUEST['file']));
|
||||
$file = clean_filename($_REQUEST['file']);
|
||||
$line = (int) clean($_REQUEST['line']);
|
||||
$context = clean($_REQUEST['context']);
|
||||
|
||||
|
|
|
@ -593,7 +593,7 @@
|
|||
}
|
||||
|
||||
function clean_filename($filename) {
|
||||
return basename(preg_replace("/\.\.|[\/\\\]/", "", $filename));
|
||||
return basename(preg_replace("/\.\.|[\/\\\]/", "", clean($filename)));
|
||||
}
|
||||
|
||||
function make_password($length = 12) {
|
||||
|
|
|
@ -166,32 +166,6 @@ class Configuration
|
|||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use getCharThreshold. Will be removed in version 2.0
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function getWordThreshold()
|
||||
{
|
||||
@trigger_error('getWordThreshold was replaced with getCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED);
|
||||
|
||||
return $this->charThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $charThreshold
|
||||
*
|
||||
* @return $this
|
||||
*/
|
||||
public function setWordThreshold($charThreshold)
|
||||
{
|
||||
@trigger_error('setWordThreshold was replaced with setCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED);
|
||||
|
||||
$this->charThreshold = $charThreshold;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
|
|
82
plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php
vendored
Normal file
82
plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
<?php
|
||||
|
||||
namespace andreskrey\Readability\Nodes\DOM;
|
||||
|
||||
/**
|
||||
* Class DOMNodeList.
|
||||
*
|
||||
* This is a fake DOMNodeList class that allows adding items to the list. The original class is static and the nodes
|
||||
* are defined automagically when instantiating it. This fake version behaves exactly the same way but adds the function
|
||||
* add() that allows to insert new DOMNodes into the DOMNodeList.
|
||||
*
|
||||
* It cannot extend the original DOMNodeList class because the functionality behind the property ->length is hidden
|
||||
* from the user and cannot be extended, changed, or tweaked.
|
||||
*/
|
||||
class DOMNodeList implements \Countable, \IteratorAggregate
|
||||
{
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $items = [];
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
protected $length = 0;
|
||||
|
||||
/**
|
||||
* To allow access to length in the same way that DOMNodeList allows.
|
||||
*
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function __get($name)
|
||||
{
|
||||
switch ($name) {
|
||||
case 'length':
|
||||
return $this->length;
|
||||
default:
|
||||
trigger_error(sprintf('Undefined property: %s::%s', static::class, $name));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param DOMNode|DOMElement|DOMComment $node
|
||||
*
|
||||
* @return DOMNodeList
|
||||
*/
|
||||
public function add($node)
|
||||
{
|
||||
$this->items[] = $node;
|
||||
$this->length++;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $offset
|
||||
*
|
||||
* @return DOMNode|DOMElement|DOMComment
|
||||
*/
|
||||
public function item(int $offset)
|
||||
{
|
||||
return $this->items[$offset];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int|void
|
||||
*/
|
||||
public function count(): int
|
||||
{
|
||||
return $this->length;
|
||||
}
|
||||
|
||||
/**
|
||||
* To make it compatible with iterator_to_array() function.
|
||||
*
|
||||
* {@inheritdoc}
|
||||
*/
|
||||
public function getIterator(): \ArrayIterator
|
||||
{
|
||||
return new \ArrayIterator($this->items);
|
||||
}
|
||||
}
|
|
@ -181,11 +181,11 @@ trait NodeTrait
|
|||
/**
|
||||
* Override for native hasAttribute.
|
||||
*
|
||||
* @see getAttribute
|
||||
*
|
||||
* @param $attributeName
|
||||
*
|
||||
* @return bool
|
||||
*
|
||||
* @see getAttribute
|
||||
*/
|
||||
public function hasAttribute($attributeName)
|
||||
{
|
||||
|
@ -317,10 +317,14 @@ trait NodeTrait
|
|||
*
|
||||
* @param bool $filterEmptyDOMText Filter empty DOMText nodes?
|
||||
*
|
||||
* @deprecated Use NodeUtility::filterTextNodes, function will be removed in version 3.0
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getChildren($filterEmptyDOMText = false)
|
||||
{
|
||||
@trigger_error('getChildren was replaced with NodeUtility::filterTextNodes and will be removed in version 3.0', E_USER_DEPRECATED);
|
||||
|
||||
$ret = iterator_to_array($this->childNodes);
|
||||
if ($filterEmptyDOMText) {
|
||||
// Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
|
||||
|
@ -418,12 +422,12 @@ trait NodeTrait
|
|||
public function hasSingleTagInsideElement($tag)
|
||||
{
|
||||
// There should be exactly 1 element child with given tag
|
||||
if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) {
|
||||
if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children->item(0)->nodeName !== $tag) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// And there should be no text nodes with real content
|
||||
return array_reduce($children, function ($carry, $child) {
|
||||
return array_reduce(iterator_to_array($children), function ($carry, $child) {
|
||||
if (!$carry === false) {
|
||||
return false;
|
||||
}
|
||||
|
@ -443,7 +447,7 @@ trait NodeTrait
|
|||
{
|
||||
$result = false;
|
||||
if ($this->hasChildNodes()) {
|
||||
foreach ($this->getChildren() as $child) {
|
||||
foreach ($this->childNodes as $child) {
|
||||
if (in_array($child->nodeName, $this->divToPElements)) {
|
||||
$result = true;
|
||||
} else {
|
||||
|
@ -500,18 +504,22 @@ trait NodeTrait
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* In the original JS project they check if the node has the style display=none, which unfortunately
|
||||
* in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
|
||||
*
|
||||
* Might be a good idea to check for classes or other attributes like 'aria-hidden'
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isProbablyVisible()
|
||||
{
|
||||
/*
|
||||
* In the original JS project they check if the node has the style display=none, which unfortunately
|
||||
* in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
|
||||
*
|
||||
* Might be a good idea to check for classes or other attributes like 'aria-hidden'
|
||||
*/
|
||||
|
||||
return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function isWhitespace()
|
||||
{
|
||||
return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) ||
|
||||
|
@ -557,4 +565,23 @@ trait NodeTrait
|
|||
$count -= ($count - $nodes->length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this
|
||||
* function to get the first one that is an DOMElement node.
|
||||
*
|
||||
* @return \DOMElement|null
|
||||
*/
|
||||
public function getFirstElementChild()
|
||||
{
|
||||
if ($this->childNodes instanceof \Traversable) {
|
||||
foreach ($this->childNodes as $node) {
|
||||
if ($node instanceof \DOMElement) {
|
||||
return $node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ namespace andreskrey\Readability\Nodes;
|
|||
use andreskrey\Readability\Nodes\DOM\DOMDocument;
|
||||
use andreskrey\Readability\Nodes\DOM\DOMElement;
|
||||
use andreskrey\Readability\Nodes\DOM\DOMNode;
|
||||
use andreskrey\Readability\Nodes\DOM\DOMNodeList;
|
||||
|
||||
/**
|
||||
* Class NodeUtility.
|
||||
|
@ -157,4 +158,23 @@ class NodeUtility
|
|||
|
||||
return ($originalNode) ? $originalNode->nextSibling : $originalNode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all empty DOMNodes from DOMNodeLists.
|
||||
*
|
||||
* @param \DOMNodeList $list
|
||||
*
|
||||
* @return DOMNodeList
|
||||
*/
|
||||
public static function filterTextNodes(\DOMNodeList $list)
|
||||
{
|
||||
$newList = new DOMNodeList();
|
||||
foreach ($list as $node) {
|
||||
if ($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))) {
|
||||
$newList->add($node);
|
||||
}
|
||||
}
|
||||
|
||||
return $newList;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,6 +56,13 @@ class Readability
|
|||
*/
|
||||
protected $author = null;
|
||||
|
||||
/**
|
||||
* Website name.
|
||||
*
|
||||
* @var string|null
|
||||
*/
|
||||
protected $siteName = null;
|
||||
|
||||
/**
|
||||
* Direction of the text.
|
||||
*
|
||||
|
@ -287,10 +294,10 @@ class Readability
|
|||
|
||||
$values = [];
|
||||
// property is a space-separated list of values
|
||||
$propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image)\s*/i';
|
||||
$propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image|site_name)(?!:)\s*/i';
|
||||
|
||||
// name is a single value
|
||||
$namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image)\s*$/i';
|
||||
$namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image|site_name)(?!:)\s*$/i';
|
||||
|
||||
// Find description tags.
|
||||
foreach ($this->dom->getElementsByTagName('meta') as $meta) {
|
||||
|
@ -332,7 +339,6 @@ class Readability
|
|||
* This could be easily replaced with an ugly set of isset($values['key']) or a bunch of ??s.
|
||||
* Will probably replace it with ??s after dropping support of PHP5.6
|
||||
*/
|
||||
|
||||
$key = current(array_intersect([
|
||||
'dc:title',
|
||||
'dcterm:title',
|
||||
|
@ -373,11 +379,18 @@ class Readability
|
|||
|
||||
// get main image
|
||||
$key = current(array_intersect([
|
||||
'image',
|
||||
'og:image',
|
||||
'twitter:image'
|
||||
], array_keys($values)));
|
||||
|
||||
$this->setImage(isset($values[$key]) ? $values[$key] : null);
|
||||
|
||||
$key = current(array_intersect([
|
||||
'og:site_name'
|
||||
], array_keys($values)));
|
||||
|
||||
$this->setSiteName(isset($values[$key]) ? $values[$key] : null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -722,7 +735,7 @@ class Readability
|
|||
*/
|
||||
if ($node->hasSingleTagInsideElement('p') && $node->getLinkDensity() < 0.25) {
|
||||
$this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128)));
|
||||
$pNode = $node->getChildren(true)[0];
|
||||
$pNode = NodeUtility::filterTextNodes($node->childNodes)->item(0);
|
||||
$node->parentNode->replaceChild($pNode, $node);
|
||||
$node = $pNode;
|
||||
$elementsToScore[] = $node;
|
||||
|
@ -1082,7 +1095,7 @@ class Readability
|
|||
// If the top candidate is the only child, use parent instead. This will help sibling
|
||||
// joining logic when adjacent content is actually located in parent's sibling node.
|
||||
$parentOfTopCandidate = $topCandidate->parentNode;
|
||||
while ($parentOfTopCandidate->nodeName !== 'body' && count($parentOfTopCandidate->getChildren(true)) === 1) {
|
||||
while ($parentOfTopCandidate->nodeName !== 'body' && count(NodeUtility::filterTextNodes($parentOfTopCandidate->childNodes)) === 1) {
|
||||
$topCandidate = $parentOfTopCandidate;
|
||||
$parentOfTopCandidate = $topCandidate->parentNode;
|
||||
}
|
||||
|
@ -1102,14 +1115,16 @@ class Readability
|
|||
$siblingScoreThreshold = max(10, $topCandidate->contentScore * 0.2);
|
||||
// Keep potential top candidate's parent node to try to get text direction of it later.
|
||||
$parentOfTopCandidate = $topCandidate->parentNode;
|
||||
$siblings = $parentOfTopCandidate->getChildren();
|
||||
$siblings = $parentOfTopCandidate->childNodes;
|
||||
|
||||
$hasContent = false;
|
||||
|
||||
$this->logger->info('[Rating] Adding top candidate siblings...');
|
||||
|
||||
/** @var DOMElement $sibling */
|
||||
foreach ($siblings as $sibling) {
|
||||
/* @var DOMElement $sibling */
|
||||
// Can't foreach here because down there we might change the tag name and that causes the foreach to skip items
|
||||
for ($i = 0; $i < $siblings->length; $i++) {
|
||||
$sibling = $siblings[$i];
|
||||
$append = false;
|
||||
|
||||
if ($sibling === $topCandidate) {
|
||||
|
@ -1147,7 +1162,6 @@ class Readability
|
|||
* We have a node that isn't a common block level element, like a form or td tag.
|
||||
* Turn it into a div so it doesn't get filtered out later by accident.
|
||||
*/
|
||||
|
||||
$sibling = NodeUtility::setNodeTag($sibling, 'div');
|
||||
}
|
||||
|
||||
|
@ -1266,11 +1280,11 @@ class Readability
|
|||
// Remove single-cell tables
|
||||
foreach ($article->shiftingAwareGetElementsByTagName('table') as $table) {
|
||||
/** @var DOMNode $table */
|
||||
$tbody = $table->hasSingleTagInsideElement('tbody') ? $table->childNodes[0] : $table;
|
||||
$tbody = $table->hasSingleTagInsideElement('tbody') ? $table->getFirstElementChild() : $table;
|
||||
if ($tbody->hasSingleTagInsideElement('tr')) {
|
||||
$row = $tbody->firstChild;
|
||||
$row = $tbody->getFirstElementChild();
|
||||
if ($row->hasSingleTagInsideElement('td')) {
|
||||
$cell = $row->firstChild;
|
||||
$cell = $row->getFirstElementChild();
|
||||
$cell = NodeUtility::setNodeTag($cell, (array_reduce(iterator_to_array($cell->childNodes), function ($carry, $node) {
|
||||
return $node->isPhrasingContent() && $carry;
|
||||
}, true)) ? 'p' : 'div');
|
||||
|
@ -1597,7 +1611,7 @@ class Readability
|
|||
$node->removeAttribute('class');
|
||||
}
|
||||
|
||||
for ($node = $node->firstChild; $node !== null; $node = $node->nextSibling) {
|
||||
for ($node = $node->getFirstElementChild(); $node !== null; $node = $node->nextSibling) {
|
||||
$this->_cleanClasses($node);
|
||||
}
|
||||
}
|
||||
|
@ -1756,6 +1770,22 @@ class Readability
|
|||
$this->author = $author;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string|null
|
||||
*/
|
||||
public function getSiteName()
|
||||
{
|
||||
return $this->siteName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $siteName
|
||||
*/
|
||||
protected function setSiteName($siteName)
|
||||
{
|
||||
$this->siteName = $siteName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return null|string
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue