mirror of
https://github.com/YunoHost/doc.git
synced 2024-09-03 20:06:26 +02:00
506 lines
19 KiB
PHP
506 lines
19 KiB
PHP
|
<?php
|
||
|
/**
|
||
|
* External Links
|
||
|
*
|
||
|
* This file is part of Grav External Links plugin.
|
||
|
*
|
||
|
* Dual licensed under the MIT or GPL Version 3 licenses, see LICENSE.
|
||
|
* http://benjamin-regler.de/license/
|
||
|
*/
|
||
|
|
||
|
namespace Grav\Plugin;
|
||
|
|
||
|
use Grav\Common\Utils;
|
||
|
use Grav\Common\Grav;
|
||
|
|
||
|
/**
|
||
|
* External Links
|
||
|
*
|
||
|
* Helper class to add small icons to external and mailto links, informing
|
||
|
* users the link will take them to a new site or open their email client.
|
||
|
*/
|
||
|
class ExternalLinks
|
||
|
{
|
||
|
/**
|
||
|
* @var ExternalLinks
|
||
|
*/
|
||
|
|
||
|
/** -------------
|
||
|
* Public methods
|
||
|
* --------------
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* Process contents i.e. apply filer to the content.
|
||
|
*
|
||
|
* @param string $content The content to render.
|
||
|
* @param array $options Options to be passed to the renderer.
|
||
|
* @param null|Page $page Null or an instance of \Grav\Common\Page.
|
||
|
*
|
||
|
* @return string The rendered contents.
|
||
|
*/
|
||
|
public function render($content, $options = [], $page = null)
|
||
|
{
|
||
|
// Get all <a> tags and process them
|
||
|
$content = preg_replace_callback('~<a(?:\s[^>]*)?>.*?</a>~i',
|
||
|
function($match) use ($options, $page) {
|
||
|
// Load PHP built-in DOMDocument class
|
||
|
if (($dom = $this->loadDOMDocument($match[0])) === null) {
|
||
|
return $match[0];
|
||
|
}
|
||
|
|
||
|
// Check that there is really a link tag
|
||
|
$a = $dom->getElementsByTagName('a');
|
||
|
if ($a->length == 0) {
|
||
|
return $match[0];
|
||
|
}
|
||
|
$a = $a->item(0);
|
||
|
|
||
|
// Process links with non-empty href attribute
|
||
|
$href = $a->getAttribute('href');
|
||
|
if (strlen($href) == 0) {
|
||
|
return $match[0];
|
||
|
}
|
||
|
|
||
|
// Get the class of the <a> element
|
||
|
$class = $a->hasAttribute('class') ? $a->getAttribute('class') : '';
|
||
|
$classes = array_filter(explode(' ', $class));
|
||
|
|
||
|
// Exclude links with specific class from processing
|
||
|
$exclude = $options->get('exclude.classes', null);
|
||
|
if ($exclude && !!array_intersect($exclude, $classes)) {
|
||
|
return $match[0];
|
||
|
}
|
||
|
|
||
|
// Get domains to be seen as internal
|
||
|
$domains = $options->get('exclude.domains', []);
|
||
|
|
||
|
// This is a mailto link.
|
||
|
if (strpos($href, 'mailto:') === 0) {
|
||
|
$classes[] = 'mailto';
|
||
|
}
|
||
|
|
||
|
// The link is external
|
||
|
elseif ($url = $this->isExternalUrl($href, $domains, $page)) {
|
||
|
// Add external class
|
||
|
$classes[] = 'external-link';
|
||
|
$a->setAttribute('href', $url);
|
||
|
|
||
|
// Add target="_blank"
|
||
|
$target = $options->get('target');
|
||
|
if ($target) {
|
||
|
$a->setAttribute('target', $target);
|
||
|
}
|
||
|
|
||
|
// Add no-follow.
|
||
|
$nofollow = $options->get('no_follow');
|
||
|
if ($nofollow) {
|
||
|
$rel = array_filter(explode(' ', $a->getAttribute('rel')));
|
||
|
if (!in_array('nofollow', $rel)) {
|
||
|
$rel[] = 'nofollow';
|
||
|
$a->setAttribute('rel', implode(' ', $rel));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Set rel="noopener noreferrer"
|
||
|
$rel = $a->hasAttribute('rel') ? $a->getAttribute('rel') : '';
|
||
|
$rel = array_filter(explode(' ', $rel));
|
||
|
|
||
|
$rel[] = 'noopener';
|
||
|
$rel[] = 'noreferrer';
|
||
|
$a->setAttribute('rel', implode(' ', array_unique($rel)));
|
||
|
|
||
|
// Add image class to <a> if it has at least one <img> child element
|
||
|
$imgs = $a->getElementsByTagName('img');
|
||
|
if ($imgs->length > 1) {
|
||
|
// Add "images" class to <a> element, if it has multiple child images
|
||
|
$classes[] = 'images';
|
||
|
} elseif ($imgs->length == 1) {
|
||
|
$imgNode = $imgs->item(0);
|
||
|
|
||
|
// Get image size
|
||
|
list($width, $height) = $this->getImageSize($imgNode);
|
||
|
|
||
|
// Determine maximum dimension of image size
|
||
|
$size = max($width, $height);
|
||
|
|
||
|
// Depending on size determine image type
|
||
|
$classes[] = ((0 < $size) && ($size <= 32)) ? 'icon' : 'image';
|
||
|
} else {
|
||
|
// Add "no-image" class to <a> element, if it has no child images
|
||
|
$classes[] = 'no-image';
|
||
|
}
|
||
|
|
||
|
// Add title (aka alert text)
|
||
|
if ($options->get('title')) {
|
||
|
$language = Grav::instance()['language'];
|
||
|
$message = $language->translate(['PLUGINS.EXTERNAL_LINKS.TITLE_MESSAGE']);
|
||
|
|
||
|
// Set default title to link else, set title as data attribute
|
||
|
$key = $a->hasAttribute('title') ? 'data-title' : 'title';
|
||
|
$a->setAttribute($key, $message);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Set class attribute
|
||
|
if (count($classes) && ($options->get('mode') === 'active')) {
|
||
|
$a->setAttribute('class', implode(' ', $classes));
|
||
|
}
|
||
|
|
||
|
// Save Dom document back to HTML representation
|
||
|
$html = $this->saveDOMDocument($dom);
|
||
|
return $html;
|
||
|
}, $content);
|
||
|
|
||
|
// Write content back to page
|
||
|
return $content;
|
||
|
}
|
||
|
|
||
|
/** -------------------------------
|
||
|
* Private/protected helper methods
|
||
|
* --------------------------------
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* Test if a URL is external
|
||
|
*
|
||
|
* @param string $url The URL to test.
|
||
|
* @param array $domains An array of domains to be seen as internal.
|
||
|
* @param null|Page $page Null or an instance of \Grav\Common\Page.
|
||
|
*
|
||
|
* @return mixed Returns the URL as a string, if it is external,
|
||
|
* false otherwise.
|
||
|
*/
|
||
|
protected function isExternalUrl($url, $domains = [], $page = null)
|
||
|
{
|
||
|
static $allowed_protocols;
|
||
|
static $pattern;
|
||
|
|
||
|
/** @var Config $config */
|
||
|
$config = Grav::instance()['config'];
|
||
|
|
||
|
/** @var Page $page */
|
||
|
$page = $page ?: Grav::instance()['page'];
|
||
|
|
||
|
// Statically store allowed protocols
|
||
|
if (!isset($allowed_protocols)) {
|
||
|
$allowed_protocols = array_flip(
|
||
|
$config->get('plugins.external_links.links.schemes', ['http', 'https'])
|
||
|
);
|
||
|
}
|
||
|
|
||
|
// Statically store internal domains as a PCRE pattern.
|
||
|
if (!isset($pattern) || (count($domains) > 0)) {
|
||
|
$domains = array_merge($domains,
|
||
|
array(Grav::instance()['base_url_absolute']));
|
||
|
|
||
|
foreach ($domains as $domain) {
|
||
|
$domains[] = preg_quote($domain, '#');
|
||
|
}
|
||
|
$pattern = '#(' . str_replace(array('\*', '/*'), '.*?',
|
||
|
implode('|', $domains)) . ')#i';
|
||
|
}
|
||
|
|
||
|
$external = false;
|
||
|
// Check for URLs that don't match any excluded domain
|
||
|
if (!preg_match($pattern, $url)) {
|
||
|
// Check if URL is external by extracting colon position
|
||
|
$colonpos = strpos($url, ':');
|
||
|
if ($colonpos > 0) {
|
||
|
// We found a colon, possibly a protocol. Verify.
|
||
|
$protocol = strtolower(substr($url, 0, $colonpos));
|
||
|
if (isset($allowed_protocols[$protocol])) {
|
||
|
// The protocol turns out be an allowed protocol
|
||
|
$external = $url;
|
||
|
}
|
||
|
} else {
|
||
|
if ($config->get('plugins.external_links.links.www')) {
|
||
|
// Remove possible path duplicate
|
||
|
$route = Grav::instance()['base_url'] . $page->route();
|
||
|
$href = Utils::startsWith($url, $route)
|
||
|
? ltrim(mb_substr($url, mb_strlen($route)), '/')
|
||
|
: $url;
|
||
|
|
||
|
// We found an url without protocol, but with starting 'www' (sub-)domain
|
||
|
if (Utils::startsWith($url, 'www.')) {
|
||
|
$external = 'http://' . $url;
|
||
|
} elseif (Utils::startsWith($href, 'www.')) {
|
||
|
$external = 'http://' . $href;
|
||
|
}
|
||
|
}
|
||
|
if ($config->get('plugins.external_links.links.redirects')) {
|
||
|
$targetPage = Grav::instance()['pages']->find($url);
|
||
|
if ($targetPage && $targetPage->redirect()) {
|
||
|
$external = $this->isExternalUrl($targetPage->redirect(), $domains, $page);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Only if a valid protocol or an URL starting with 'www.' was found return true
|
||
|
return $external;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Determine the size of an image
|
||
|
*
|
||
|
* @param DOMNode $imgNode The image already parsed as a DOMNode
|
||
|
* @param integer $limit Load first $limit KB of remote image
|
||
|
*
|
||
|
* @return array Return the dimension of the image of the
|
||
|
* format array(width, height)
|
||
|
*/
|
||
|
protected function getImageSize($imgNode, $limit = 32)
|
||
|
{
|
||
|
// Hold units (assume standard font with 16px base pixel size)
|
||
|
// Calculations based on pixels
|
||
|
$units = array(
|
||
|
'px' => 1, /* base unit: pixel */
|
||
|
'pt' => 16 / 12, /* 12 point = 16 pixel = 1/72 inch */
|
||
|
'pc' => 16, /* 1 pica = 16 pixel = 12 points */
|
||
|
|
||
|
'in' => 96, /* 1 inch = 96 pixel = 2.54 centimeters */
|
||
|
'mm' => 96 / 25.4, /* 1 millimeter = 96 pixel / 1 inch [mm] */
|
||
|
'cm' => 96 / 2.54, /* 1 centimeter = 96 pixel / 1 inch [cm] */
|
||
|
'm' => 96 / 0.0254, /* 1 centimeter = 96 pixel / 1 inch [m] */
|
||
|
|
||
|
'ex' => 7, /* 1 ex = 7 pixel */
|
||
|
'em' => 16, /* 1 em = 16 pixel */
|
||
|
'rem' => 16, /* 1 rem = 16 pixel */
|
||
|
|
||
|
'%' => 16 / 100, /* 100 percent = 16 pixel */
|
||
|
);
|
||
|
|
||
|
// Initialize dimensions
|
||
|
$width = 0;
|
||
|
$height = 0;
|
||
|
|
||
|
// Determine image dimensions based on "src" atrribute
|
||
|
if ($imgNode->hasAttribute('src')) {
|
||
|
$src = $imgNode->getAttribute('src');
|
||
|
|
||
|
// Simple check if the URL is internal i.e. check if path exists
|
||
|
$path = $_SERVER['DOCUMENT_ROOT'] . $src;
|
||
|
if (realpath($path) && is_file($path)) {
|
||
|
$size = @getimagesize($path);
|
||
|
} else {
|
||
|
// The URL is external; try to load it (default: 32 KB)
|
||
|
$size = $this->getRemoteImageSize($src, $limit * 1024);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Read out width and height from <img> attributes
|
||
|
$width = $imgNode->hasAttribute('width') ?
|
||
|
$imgNode->getAttribute('width') : $size[0];
|
||
|
$height = $imgNode->hasAttribute('height') ?
|
||
|
$imgNode->getAttribute('height') : $size[1];
|
||
|
|
||
|
// Get width and height from style attribute
|
||
|
if ( $imgNode->hasAttribute('style') ) {
|
||
|
$style = $imgNode->getAttribute('style');
|
||
|
|
||
|
// Width
|
||
|
if (preg_match('~width:\s*(\d+)([a-z]+)~i', $style, $matches)) {
|
||
|
$width = $matches[1];
|
||
|
// Convert unit to pixel
|
||
|
if ( isset($units[$matches[2]]) ) {
|
||
|
$width *= $units[$matches[2]];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Height
|
||
|
if (preg_match('~height:\s*(\d+)([a-z]+)~i', $style, $matches)) {
|
||
|
$height = $matches[1];
|
||
|
// Convert unit to pixel
|
||
|
if (isset($units[$matches[2]])) {
|
||
|
$height *= $units[$matches[2]];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Update width and height
|
||
|
$size[0] = $width;
|
||
|
$size[1] = $height;
|
||
|
|
||
|
// Return image dimensions
|
||
|
return $size;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get the size of a remote image
|
||
|
*
|
||
|
* @param string $uri The URI of the remote image
|
||
|
* @param integer $limit Load first $limit bytes of remote image
|
||
|
*
|
||
|
* @return mixed Returns an array with up to 7 elements
|
||
|
*/
|
||
|
protected function getRemoteImageSize($uri, $limit = -1)
|
||
|
{
|
||
|
// Create temporary file to store data from $uri
|
||
|
$tmp_name = tempnam(sys_get_temp_dir(), uniqid('ris'));
|
||
|
if ($tmp_name === false) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// Open temporary file
|
||
|
$tmp = fopen($tmp_name, 'wb');
|
||
|
|
||
|
// Check which method we should use to get remote image sizes
|
||
|
$allow_url_fopen = ini_get('allow_url_fopen') ? true : false;
|
||
|
$use_curl = function_exists('curl_version');
|
||
|
|
||
|
// Use stream copy
|
||
|
if ($allow_url_fopen) {
|
||
|
$options = [];
|
||
|
if ( $limit > 0 ) {
|
||
|
// Loading number of $limit bytes
|
||
|
$options['http']['header'] = array('Range: bytes=0-' . $limit);
|
||
|
}
|
||
|
|
||
|
// Create stream context
|
||
|
$context = stream_context_create($options);
|
||
|
@copy($uri, $tmp_name, $context);
|
||
|
|
||
|
// Use Curl
|
||
|
} elseif ($use_curl) {
|
||
|
// Initialize Curl
|
||
|
$options = array(
|
||
|
CURLOPT_HEADER => false, // Don't return headers
|
||
|
CURLOPT_FOLLOWLOCATION => true, // Follow redirects
|
||
|
CURLOPT_AUTOREFERER => true, // Set referrer on redirect
|
||
|
CURLOPT_CONNECTTIMEOUT => 120, // Timeout on connect
|
||
|
CURLOPT_TIMEOUT => 120, // Timeout on response
|
||
|
CURLOPT_MAXREDIRS => 10, // Stop after 10 redirects
|
||
|
CURLOPT_ENCODING => '', // Handle all encodings
|
||
|
CURLOPT_BINARYTRANSFER => true, // Transfer as binary file
|
||
|
CURLOPT_FILE => $tmp, // Curl file
|
||
|
CURLOPT_URL => $uri, // URI
|
||
|
);
|
||
|
|
||
|
$curl = curl_init();
|
||
|
curl_setopt_array($curl, $options);
|
||
|
|
||
|
if ($limit > 0) {
|
||
|
// Loading number of $limit
|
||
|
$headers = array('Range: bytes=0-' . $limit);
|
||
|
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
|
||
|
curl_setopt($curl, CURLOPT_RANGE, '0-' . $limit);
|
||
|
|
||
|
// Abort request when more data is received
|
||
|
curl_setopt($curl, CURLOPT_BUFFERSIZE, 512); // More progress info
|
||
|
curl_setopt($curl, CURLOPT_NOPROGRESS, false); // Monitor progress
|
||
|
curl_setopt($curl, CURLOPT_PROGRESSFUNCTION,
|
||
|
function($download_size, $downloaded, $upload_size, $uploaded) use ($limit) {
|
||
|
// If $downloaded exceeds $limit, returning non-zero breaks
|
||
|
// the connection!
|
||
|
return ( $downloaded > $limit ) ? 1 : 0;
|
||
|
});
|
||
|
}
|
||
|
|
||
|
// Execute Curl
|
||
|
curl_exec($curl);
|
||
|
curl_close($curl);
|
||
|
}
|
||
|
|
||
|
// Close temporary file
|
||
|
fclose($tmp);
|
||
|
|
||
|
// Retrieve image information
|
||
|
$info = array(0, 0, 'width="0" height="0"');
|
||
|
if (filesize($tmp_name) > 0) {
|
||
|
$info = @getimagesize($tmp_name);
|
||
|
}
|
||
|
|
||
|
// Delete temporary file
|
||
|
unlink($tmp_name);
|
||
|
|
||
|
return $info;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Load contents into PHP built-in DOMDocument object
|
||
|
*
|
||
|
* Two Really good resources to handle DOMDocument with HTML(5)
|
||
|
* correctly.
|
||
|
*
|
||
|
* @see http://stackoverflow.com/questions/3577641/how-do-you-parse-and-process-html-xml-in-php
|
||
|
* @see http://stackoverflow.com/questions/7997936/how-do-you-format-dom-structures-in-php
|
||
|
*
|
||
|
* @param string $content The content to be loaded into the
|
||
|
* DOMDocument object
|
||
|
*
|
||
|
* @return DOMDocument DOMDocument object of content
|
||
|
*/
|
||
|
protected function loadDOMDocument($content)
|
||
|
{
|
||
|
// Clear previous errors
|
||
|
if (libxml_use_internal_errors(true) === true) {
|
||
|
libxml_clear_errors();
|
||
|
}
|
||
|
|
||
|
// Parse content using PHP built-in DOMDocument class
|
||
|
$document = new \DOMDocument('1.0', 'UTF-8');
|
||
|
|
||
|
// Encode contents as UTF-8, strip whitespaces & normalize newlines
|
||
|
$content = mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8');
|
||
|
|
||
|
// $whitespaces = array(
|
||
|
// '~\R~u' => "\n", // Normalize new line
|
||
|
// '~\>[^\S ]+~s' => '>', // Strip whitespaces after tags, except space
|
||
|
// '~[^\S ]+\<~s' => '<', // Strip whitespaces before tags, except space
|
||
|
// '~(\s)+~s' => '\\1' // Shorten multiple whitespace sequences
|
||
|
// );
|
||
|
// $content = preg_replace(array_keys($whitespaces), $whitespaces, $content);
|
||
|
|
||
|
// Parse the HTML using UTF-8
|
||
|
// The @ before the method call suppresses any warnings that
|
||
|
// loadHTML might throw because of invalid HTML in the page.
|
||
|
@$document->loadHTML($content);
|
||
|
|
||
|
// Do nothing, if DOM is empty
|
||
|
if (is_null($document->documentElement)) {
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
return $document;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Save contents of PHP built-in DOMDocument object as HTML5
|
||
|
*
|
||
|
* @param DOMDocument $document DOMDocument object with nodes
|
||
|
*
|
||
|
* @return string The outputted DOM document as HTML(5)
|
||
|
* compliant string
|
||
|
*/
|
||
|
protected function saveDOMDocument($document)
|
||
|
{
|
||
|
// Pretty print output
|
||
|
$document->preserveWhiteSpace = false;
|
||
|
$document->formatOutput = true;
|
||
|
|
||
|
// Transform DOM document to valid HTML(5)
|
||
|
$content = '';
|
||
|
$body = $document->getElementsByTagName('body')->item(0);
|
||
|
foreach ($body->childNodes as $node) {
|
||
|
// Expand empty tags (e.g. <br/> to <br></br>)
|
||
|
if (($html = $document->saveXML($node, LIBXML_NOEMPTYTAG)) !== false) {
|
||
|
$content .= $html;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Fix formatting for self-closing tags in HTML5 and removing
|
||
|
// encapsulated (uncommented) CDATA blocks in <script> and
|
||
|
// <style> tags
|
||
|
$regex = array(
|
||
|
'~' . preg_quote('<![CDATA[', '~') . '~' => '',
|
||
|
'~' . preg_quote(']]>', '~') . '~' => '',
|
||
|
'~></(?:area|base(?:font)?|br|col|command|embed|frame|hr|img|input|keygen|link|meta|param|source|track|wbr)>~' => ' />',
|
||
|
);
|
||
|
|
||
|
// Make XML HTML5 compliant
|
||
|
$content = preg_replace(array_keys($regex), $regex, $content);
|
||
|
return $content;
|
||
|
}
|
||
|
}
|