1
0
Fork 0
mirror of https://github.com/YunoHost-Apps/mediawiki_ynh.git synced 2024-09-03 19:46:05 +02:00
mediawiki_ynh/sources/mediawiki/includes/libs/IEUrlExtension.php

271 lines
9.1 KiB
PHP

<?php
/**
* Checks for validity of requested URL's extension.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
/**
* Internet Explorer derives a cache filename from a URL, and then in certain
* circumstances, uses the extension of the resulting file to determine the
* content type of the data, ignoring the Content-Type header.
*
* This can be a problem, especially when non-HTML content is sent by MediaWiki,
* and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
*
* Usually the script filename (e.g. api.php) is present in the URL, and this
* makes Internet Explorer think the extension is a harmless script extension.
* But Internet Explorer 6 and earlier allows the script extension to be
* obscured by encoding the dot as "%2E".
*
* This class contains functions which help in detecting and dealing with this
* situation.
*
* Checking the URL for a bad extension is somewhat complicated due to the fact
* that CGI doesn't provide a standard method to determine the URL. Instead it
* is necessary to pass a subset of $_SERVER variables, which we then attempt
* to use to guess parts of the URL.
*/
class IEUrlExtension {
/**
* Check a subset of $_SERVER (or the whole of $_SERVER if you like)
* to see if it indicates that the request was sent with a bad file
* extension. Returns true if the request should be denied or modified,
* false otherwise. The relevant $_SERVER elements are:
*
* - SERVER_SOFTWARE
* - REQUEST_URI
* - QUERY_STRING
* - PATH_INFO
*
* If the a variable is unset in $_SERVER, it should be unset in $vars.
*
* @param array $vars A subset of $_SERVER.
* @param array $extWhitelist Extensions which are allowed, assumed harmless.
* @return bool
*/
public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
// Check QUERY_STRING or REQUEST_URI
if ( isset( $vars['SERVER_SOFTWARE'] )
&& isset( $vars['REQUEST_URI'] )
&& self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] ) )
{
$urlPart = $vars['REQUEST_URI'];
} elseif ( isset( $vars['QUERY_STRING'] ) ) {
$urlPart = $vars['QUERY_STRING'];
} else {
$urlPart = '';
}
if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
return true;
}
// Some servers have PATH_INFO but not REQUEST_URI, so we check both
// to be on the safe side.
if ( isset( $vars['PATH_INFO'] )
&& self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
{
return true;
}
// All checks passed
return false;
}
/**
* Given a right-hand portion of a URL, determine whether IE would detect
* a potentially harmful file extension.
*
* @param string $urlPart The right-hand portion of a URL
* @param array $extWhitelist An array of file extensions which may occur in this
* URL, and which should be allowed.
* @return bool
*/
public static function isUrlExtensionBad( $urlPart, $extWhitelist = array() ) {
if ( strval( $urlPart ) === '' ) {
return false;
}
$extension = self::findIE6Extension( $urlPart );
if ( strval( $extension ) === '' ) {
// No extension or empty extension
return false;
}
if ( in_array( $extension, array( 'php', 'php5' ) ) ) {
// Script extension, OK
return false;
}
if ( in_array( $extension, $extWhitelist ) ) {
// Whitelisted extension
return false;
}
if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
// Non-alphanumeric extension, unlikely to be registered.
//
// The regex above is known to match all registered file extensions
// in a default Windows XP installation. It's important to allow
// extensions with ampersands and percent signs, since that reduces
// the number of false positives substantially.
return false;
}
// Possibly bad extension
return true;
}
/**
* Returns a variant of $url which will pass isUrlExtensionBad() but has the
* same GET parameters, or false if it can't figure one out.
* @param $url
* @param $extWhitelist array
* @return bool|string
*/
public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
$questionPos = strpos( $url, '?' );
if ( $questionPos === false ) {
$beforeQuery = $url . '?';
$query = '';
} elseif ( $questionPos === strlen( $url ) - 1 ) {
$beforeQuery = $url;
$query = '';
} else {
$beforeQuery = substr( $url, 0, $questionPos + 1 );
$query = substr( $url, $questionPos + 1 );
}
// Multiple question marks cause problems. Encode the second and
// subsequent question mark.
$query = str_replace( '?', '%3E', $query );
// Append an invalid path character so that IE6 won't see the end of the
// query string as an extension
$query .= '&*';
// Put the URL back together
$url = $beforeQuery . $query;
if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
// Avoid a redirect loop
return false;
}
return $url;
}
/**
* Determine what extension IE6 will infer from a certain query string.
* If the URL has an extension before the question mark, IE6 will use
* that and ignore the query string, but per the comment at
* isPathInfoBad() we don't have a reliable way to determine the URL,
* so isPathInfoBad() just passes in the query string for $url.
* All entry points have safe extensions (php, php5) anyway, so
* checking the query string is possibly overly paranoid but never
* insecure.
*
* The criteria for finding an extension are as follows:
* - a possible extension is a dot followed by one or more characters not
* in <>\"/:|?.#
* - if we find a possible extension followed by the end of the string or
* a #, that's our extension
* - if we find a possible extension followed by a ?, that's our extension
* - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
* searching for another possible extension
* - if we find a possible extension followed by a dot or another illegal
* character, we ignore it and continue searching
*
* @param string $url URL
* @return mixed Detected extension (string), or false if none found
*/
public static function findIE6Extension( $url ) {
$pos = 0;
$hashPos = strpos( $url, '#' );
if ( $hashPos !== false ) {
$urlLength = $hashPos;
} else {
$urlLength = strlen( $url );
}
$remainingLength = $urlLength;
while ( $remainingLength > 0 ) {
// Skip ahead to the next dot
$pos += strcspn( $url, '.', $pos, $remainingLength );
if ( $pos >= $urlLength ) {
// End of string, we're done
return false;
}
// We found a dot. Skip past it
$pos++;
$remainingLength = $urlLength - $pos;
// Check for illegal characters in our prospective extension,
// or for another dot
$nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength );
if ( $nextPos >= $urlLength ) {
// No illegal character or next dot
// We have our extension
return substr( $url, $pos, $urlLength - $pos );
}
if ( $url[$nextPos] === '?' ) {
// We've found a legal extension followed by a question mark
// If the extension is NOT exe, dll or cgi, return it
$extension = substr( $url, $pos, $nextPos - $pos );
if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) &&
strcasecmp( $extension, 'cgi' ) )
{
return $extension;
}
// Else continue looking
}
// We found an illegal character or another dot
// Skip to that character and continue the loop
$pos = $nextPos;
$remainingLength = $urlLength - $pos;
}
return false;
}
/**
* When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
* returns true if that server is known to have a REQUEST_URI variable
* with %2E not decoded to ".". On such a server, it is possible to detect
* whether the script filename has been obscured.
*
* The function returns false if the server is not known to have this
* behavior. Microsoft IIS in particular is known to decode escaped script
* filenames.
*
* SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
* or a specification in the style of a User-Agent header, such as
* "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
*
* @param $serverSoftware
* @return bool
*
*/
public static function haveUndecodedRequestUri( $serverSoftware ) {
static $whitelist = array(
'Apache',
'Zeus',
'LiteSpeed' );
if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
return in_array( $m[1], $whitelist );
} else {
return false;
}
}
}