1
0
Fork 0
mirror of https://github.com/YunoHost-Apps/mediawiki_ynh.git synced 2024-09-03 19:46:05 +02:00
mediawiki_ynh/sources/mediawiki/includes/ZipDirectoryReader.php

712 lines
21 KiB
PHP

<?php
/**
* ZIP file directories reader, for the purposes of upload verification.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
/**
* A class for reading ZIP file directories, for the purposes of upload
* verification.
*
* Only a functional interface is provided: ZipFileReader::read(). No access is
* given to object instances.
*
*/
class ZipDirectoryReader {
/**
* Read a ZIP file and call a function for each file discovered in it.
*
* Because this class is aimed at verification, an error is raised on
* suspicious or ambiguous input, instead of emulating some standard
* behavior.
*
* @param string $fileName The archive file name
* @param array $callback The callback function. It will be called for each file
* with a single associative array each time, with members:
*
* - name: The file name. Directories conventionally have a trailing
* slash.
*
* - mtime: The file modification time, in MediaWiki 14-char format
*
* - size: The uncompressed file size
*
* @param array $options An associative array of read options, with the option
* name in the key. This may currently contain:
*
* - zip64: If this is set to true, then we will emulate a
* library with ZIP64 support, like OpenJDK 7. If it is set to
* false, then we will emulate a library with no knowledge of
* ZIP64.
*
* NOTE: The ZIP64 code is untested and probably doesn't work. It
* turned out to be easier to just reject ZIP64 archive uploads,
* since they are likely to be very rare. Confirming safety of a
* ZIP64 file is fairly complex. What do you do with a file that is
* ambiguous and broken when read with a non-ZIP64 reader, but valid
* when read with a ZIP64 reader? This situation is normal for a
* valid ZIP64 file, and working out what non-ZIP64 readers will make
* of such a file is not trivial.
*
* @return Status object. The following fatal errors are defined:
*
* - zip-file-open-error: The file could not be opened.
*
* - zip-wrong-format: The file does not appear to be a ZIP file.
*
* - zip-bad: There was something wrong or ambiguous about the file
* data.
*
* - zip-unsupported: The ZIP file uses features which
* ZipDirectoryReader does not support.
*
* The default messages for those fatal errors are written in a way that
* makes sense for upload verification.
*
* If a fatal error is returned, more information about the error will be
* available in the debug log.
*
* Note that the callback function may be called any number of times before
* a fatal error is returned. If this occurs, the data sent to the callback
* function should be discarded.
*/
public static function read( $fileName, $callback, $options = array() ) {
$zdr = new self( $fileName, $callback, $options );
return $zdr->execute();
}
/** The file name */
var $fileName;
/** The opened file resource */
var $file;
/** The cached length of the file, or null if it has not been loaded yet. */
var $fileLength;
/** A segmented cache of the file contents */
var $buffer;
/** The file data callback */
var $callback;
/** The ZIP64 mode */
var $zip64 = false;
/** Stored headers */
var $eocdr, $eocdr64, $eocdr64Locator;
var $data;
/** The "extra field" ID for ZIP64 central directory entries */
const ZIP64_EXTRA_HEADER = 0x0001;
/** The segment size for the file contents cache */
const SEGSIZE = 16384;
/** The index of the "general field" bit for UTF-8 file names */
const GENERAL_UTF8 = 11;
/** The index of the "general field" bit for central directory encryption */
const GENERAL_CD_ENCRYPTED = 13;
/**
* Private constructor
*/
protected function __construct( $fileName, $callback, $options ) {
$this->fileName = $fileName;
$this->callback = $callback;
if ( isset( $options['zip64'] ) ) {
$this->zip64 = $options['zip64'];
}
}
/**
* Read the directory according to settings in $this.
*
* @return Status
*/
function execute() {
$this->file = fopen( $this->fileName, 'r' );
$this->data = array();
if ( !$this->file ) {
return Status::newFatal( 'zip-file-open-error' );
}
$status = Status::newGood();
try {
$this->readEndOfCentralDirectoryRecord();
if ( $this->zip64 ) {
list( $offset, $size ) = $this->findZip64CentralDirectory();
$this->readCentralDirectory( $offset, $size );
} else {
if ( $this->eocdr['CD size'] == 0xffffffff
|| $this->eocdr['CD offset'] == 0xffffffff
|| $this->eocdr['CD entries total'] == 0xffff )
{
$this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
'opening vulnerabilities on clients using OpenJDK 7 or later.' );
}
list( $offset, $size ) = $this->findOldCentralDirectory();
$this->readCentralDirectory( $offset, $size );
}
} catch ( ZipDirectoryReaderError $e ) {
$status->fatal( $e->getErrorCode() );
}
fclose( $this->file );
return $status;
}
/**
* Throw an error, and log a debug message
*/
function error( $code, $debugMessage ) {
wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
throw new ZipDirectoryReaderError( $code );
}
/**
* Read the header which is at the end of the central directory,
* unimaginatively called the "end of central directory record" by the ZIP
* spec.
*/
function readEndOfCentralDirectoryRecord() {
$info = array(
'signature' => 4,
'disk' => 2,
'CD start disk' => 2,
'CD entries this disk' => 2,
'CD entries total' => 2,
'CD size' => 4,
'CD offset' => 4,
'file comment length' => 2,
);
$structSize = $this->getStructSize( $info );
$startPos = $this->getFileLength() - 65536 - $structSize;
if ( $startPos < 0 ) {
$startPos = 0;
}
$block = $this->getBlock( $startPos );
$sigPos = strrpos( $block, "PK\x05\x06" );
if ( $sigPos === false ) {
$this->error( 'zip-wrong-format',
"zip file lacks EOCDR signature. It probably isn't a zip file." );
}
$this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
$this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
$this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
}
if ( $this->eocdr['disk'] !== 0
|| $this->eocdr['CD start disk'] !== 0 )
{
$this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
}
$this->eocdr += $this->unpack(
$block,
array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
$sigPos + $structSize );
$this->eocdr['position'] = $startPos + $sigPos;
}
/**
* Read the header called the "ZIP64 end of central directory locator". An
* error will be raised if it does not exist.
*/
function readZip64EndOfCentralDirectoryLocator() {
$info = array(
'signature' => array( 'string', 4 ),
'eocdr64 start disk' => 4,
'eocdr64 offset' => 8,
'number of disks' => 4,
);
$structSize = $this->getStructSize( $info );
$block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size']
- $structSize, $structSize );
$this->eocdr64Locator = $data = $this->unpack( $block, $info );
if ( $data['signature'] !== "PK\x06\x07" ) {
// Note: Java will allow this and continue to read the
// EOCDR64, so we have to reject the upload, we can't
// just use the EOCDR header instead.
$this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
}
}
/**
* Read the header called the "ZIP64 end of central directory record". It
* may replace the regular "end of central directory record" in ZIP64 files.
*/
function readZip64EndOfCentralDirectoryRecord() {
if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
|| $this->eocdr64Locator['number of disks'] != 0 )
{
$this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
}
$info = array(
'signature' => array( 'string', 4 ),
'EOCDR64 size' => 8,
'version made by' => 2,
'version needed' => 2,
'disk' => 4,
'CD start disk' => 4,
'CD entries this disk' => 8,
'CD entries total' => 8,
'CD size' => 8,
'CD offset' => 8
);
$structSize = $this->getStructSize( $info );
$block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
$this->eocdr64 = $data = $this->unpack( $block, $info );
if ( $data['signature'] !== "PK\x06\x06" ) {
$this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
}
if ( $data['disk'] !== 0
|| $data['CD start disk'] !== 0 )
{
$this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
}
}
/**
* Find the location of the central directory, as would be seen by a
* non-ZIP64 reader.
*
* @return List containing offset, size and end position.
*/
function findOldCentralDirectory() {
$size = $this->eocdr['CD size'];
$offset = $this->eocdr['CD offset'];
$endPos = $this->eocdr['position'];
// Some readers use the EOCDR position instead of the offset field
// to find the directory, so to be safe, we check if they both agree.
if ( $offset + $size != $endPos ) {
$this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
'of central directory record' );
}
return array( $offset, $size );
}
/**
* Find the location of the central directory, as would be seen by a
* ZIP64-compliant reader.
*
* @return array List containing offset, size and end position.
*/
function findZip64CentralDirectory() {
// The spec is ambiguous about the exact rules of precedence between the
// ZIP64 headers and the original headers. Here we follow zip_util.c
// from OpenJDK 7.
$size = $this->eocdr['CD size'];
$offset = $this->eocdr['CD offset'];
$numEntries = $this->eocdr['CD entries total'];
$endPos = $this->eocdr['position'];
if ( $size == 0xffffffff
|| $offset == 0xffffffff
|| $numEntries == 0xffff )
{
$this->readZip64EndOfCentralDirectoryLocator();
if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
$this->readZip64EndOfCentralDirectoryRecord();
if ( isset( $this->eocdr64['CD offset'] ) ) {
$size = $this->eocdr64['CD size'];
$offset = $this->eocdr64['CD offset'];
$endPos = $this->eocdr64Locator['eocdr64 offset'];
}
}
}
// Some readers use the EOCDR position instead of the offset field
// to find the directory, so to be safe, we check if they both agree.
if ( $offset + $size != $endPos ) {
$this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
'of central directory record' );
}
return array( $offset, $size );
}
/**
* Read the central directory at the given location
*/
function readCentralDirectory( $offset, $size ) {
$block = $this->getBlock( $offset, $size );
$fixedInfo = array(
'signature' => array( 'string', 4 ),
'version made by' => 2,
'version needed' => 2,
'general bits' => 2,
'compression method' => 2,
'mod time' => 2,
'mod date' => 2,
'crc-32' => 4,
'compressed size' => 4,
'uncompressed size' => 4,
'name length' => 2,
'extra field length' => 2,
'comment length' => 2,
'disk number start' => 2,
'internal attrs' => 2,
'external attrs' => 4,
'local header offset' => 4,
);
$fixedSize = $this->getStructSize( $fixedInfo );
$pos = 0;
while ( $pos < $size ) {
$data = $this->unpack( $block, $fixedInfo, $pos );
$pos += $fixedSize;
if ( $data['signature'] !== "PK\x01\x02" ) {
$this->error( 'zip-bad', 'Invalid signature found in directory entry' );
}
$variableInfo = array(
'name' => array( 'string', $data['name length'] ),
'extra field' => array( 'string', $data['extra field length'] ),
'comment' => array( 'string', $data['comment length'] ),
);
$data += $this->unpack( $block, $variableInfo, $pos );
$pos += $this->getStructSize( $variableInfo );
if ( $this->zip64 && (
$data['compressed size'] == 0xffffffff
|| $data['uncompressed size'] == 0xffffffff
|| $data['local header offset'] == 0xffffffff ) )
{
$zip64Data = $this->unpackZip64Extra( $data['extra field'] );
if ( $zip64Data ) {
$data = $zip64Data + $data;
}
}
if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
$this->error( 'zip-unsupported', 'central directory encryption is not supported' );
}
// Convert the timestamp into MediaWiki format
// For the format, please see the MS-DOS 2.0 Programmer's Reference,
// pages 3-5 and 3-6.
$time = $data['mod time'];
$date = $data['mod date'];
$year = 1980 + ( $date >> 9 );
$month = ( $date >> 5 ) & 15;
$day = $date & 31;
$hour = ( $time >> 11 ) & 31;
$minute = ( $time >> 5 ) & 63;
$second = ( $time & 31 ) * 2;
$timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
$year, $month, $day, $hour, $minute, $second );
// Convert the character set in the file name
if ( !function_exists( 'iconv' )
|| $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) )
{
$name = $data['name'];
} else {
$name = iconv( 'CP437', 'UTF-8', $data['name'] );
}
// Compile a data array for the user, with a sensible format
$userData = array(
'name' => $name,
'mtime' => $timestamp,
'size' => $data['uncompressed size'],
);
call_user_func( $this->callback, $userData );
}
}
/**
* Interpret ZIP64 "extra field" data and return an associative array.
* @return array|bool
*/
function unpackZip64Extra( $extraField ) {
$extraHeaderInfo = array(
'id' => 2,
'size' => 2,
);
$extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
$zip64ExtraInfo = array(
'uncompressed size' => 8,
'compressed size' => 8,
'local header offset' => 8,
'disk number start' => 4,
);
$extraPos = 0;
while ( $extraPos < strlen( $extraField ) ) {
$extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
$extraPos += $extraHeaderSize;
$extra += $this->unpack( $extraField,
array( 'data' => array( 'string', $extra['size'] ) ),
$extraPos );
$extraPos += $extra['size'];
if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
return $this->unpack( $extra['data'], $zip64ExtraInfo );
}
}
return false;
}
/**
* Get the length of the file.
*/
function getFileLength() {
if ( $this->fileLength === null ) {
$stat = fstat( $this->file );
$this->fileLength = $stat['size'];
}
return $this->fileLength;
}
/**
* Get the file contents from a given offset. If there are not enough bytes
* in the file to satisfy the request, an exception will be thrown.
*
* @param int $start The byte offset of the start of the block.
* @param int $length The number of bytes to return. If omitted, the remainder
* of the file will be returned.
*
* @return string
*/
function getBlock( $start, $length = null ) {
$fileLength = $this->getFileLength();
if ( $start >= $fileLength ) {
$this->error( 'zip-bad', "getBlock() requested position $start, " .
"file length is $fileLength" );
}
if ( $length === null ) {
$length = $fileLength - $start;
}
$end = $start + $length;
if ( $end > $fileLength ) {
$this->error( 'zip-bad', "getBlock() requested end position $end, " .
"file length is $fileLength" );
}
$startSeg = floor( $start / self::SEGSIZE );
$endSeg = ceil( $end / self::SEGSIZE );
$block = '';
for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
$block .= $this->getSegment( $segIndex );
}
$block = substr( $block,
$start - $startSeg * self::SEGSIZE,
$length );
if ( strlen( $block ) < $length ) {
$this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
}
return $block;
}
/**
* Get a section of the file starting at position $segIndex * self::SEGSIZE,
* of length self::SEGSIZE. The result is cached. This is a helper function
* for getBlock().
*
* If there are not enough bytes in the file to satisfy the request, the
* return value will be truncated. If a request is made for a segment beyond
* the end of the file, an empty string will be returned.
* @return string
*/
function getSegment( $segIndex ) {
if ( !isset( $this->buffer[$segIndex] ) ) {
$bytePos = $segIndex * self::SEGSIZE;
if ( $bytePos >= $this->getFileLength() ) {
$this->buffer[$segIndex] = '';
return '';
}
if ( fseek( $this->file, $bytePos ) ) {
$this->error( 'zip-bad', "seek to $bytePos failed" );
}
$seg = fread( $this->file, self::SEGSIZE );
if ( $seg === false ) {
$this->error( 'zip-bad', "read from $bytePos failed" );
}
$this->buffer[$segIndex] = $seg;
}
return $this->buffer[$segIndex];
}
/**
* Get the size of a structure in bytes. See unpack() for the format of $struct.
* @return int
*/
function getStructSize( $struct ) {
$size = 0;
foreach ( $struct as $type ) {
if ( is_array( $type ) ) {
list( , $fieldSize ) = $type;
$size += $fieldSize;
} else {
$size += $type;
}
}
return $size;
}
/**
* Unpack a binary structure. This is like the built-in unpack() function
* except nicer.
*
* @param string $string The binary data input
*
* @param array $struct An associative array giving structure members and their
* types. In the key is the field name. The value may be either an
* integer, in which case the field is a little-endian unsigned integer
* encoded in the given number of bytes, or an array, in which case the
* first element of the array is the type name, and the subsequent
* elements are type-dependent parameters. Only one such type is defined:
* - "string": The second array element gives the length of string.
* Not null terminated.
*
* @param int $offset The offset into the string at which to start unpacking.
*
* @throws MWException
* @return array Unpacked associative array. Note that large integers in the input
* may be represented as floating point numbers in the return value, so
* the use of weak comparison is advised.
*/
function unpack( $string, $struct, $offset = 0 ) {
$size = $this->getStructSize( $struct );
if ( $offset + $size > strlen( $string ) ) {
$this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
}
$data = array();
$pos = $offset;
foreach ( $struct as $key => $type ) {
if ( is_array( $type ) ) {
list( $typeName, $fieldSize ) = $type;
switch ( $typeName ) {
case 'string':
$data[$key] = substr( $string, $pos, $fieldSize );
$pos += $fieldSize;
break;
default:
throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
}
} else {
// Unsigned little-endian integer
$length = intval( $type );
// Calculate the value. Use an algorithm which automatically
// upgrades the value to floating point if necessary.
$value = 0;
for ( $i = $length - 1; $i >= 0; $i-- ) {
$value *= 256;
$value += ord( $string[$pos + $i] );
}
// Throw an exception if there was loss of precision
if ( $value > pow( 2, 52 ) ) {
$this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
'This could happen if we tried to unpack a 64-bit structure ' .
'at an invalid location.' );
}
$data[$key] = $value;
$pos += $length;
}
}
return $data;
}
/**
* Returns a bit from a given position in an integer value, converted to
* boolean.
*
* @param $value integer
* @param int $bitIndex The index of the bit, where 0 is the LSB.
* @return bool
*/
function testBit( $value, $bitIndex ) {
return (bool)( ( $value >> $bitIndex ) & 1 );
}
/**
* Debugging helper function which dumps a string in hexdump -C format.
*/
function hexDump( $s ) {
$n = strlen( $s );
for ( $i = 0; $i < $n; $i += 16 ) {
printf( "%08X ", $i );
for ( $j = 0; $j < 16; $j++ ) {
print " ";
if ( $j == 8 ) {
print " ";
}
if ( $i + $j >= $n ) {
print " ";
} else {
printf( "%02X", ord( $s[$i + $j] ) );
}
}
print " |";
for ( $j = 0; $j < 16; $j++ ) {
if ( $i + $j >= $n ) {
print " ";
} elseif ( ctype_print( $s[$i + $j] ) ) {
print $s[$i + $j];
} else {
print '.';
}
}
print "|\n";
}
}
}
/**
* Internal exception class. Will be caught by private code.
*/
class ZipDirectoryReaderError extends Exception {
var $errorCode;
function __construct( $code ) {
$this->errorCode = $code;
parent::__construct( "ZipDirectoryReader error: $code" );
}
/**
* @return mixed
*/
function getErrorCode() {
return $this->errorCode;
}
}