dolibarr_ynh/sources/dolibarr/htdocs/includes/tcpdi/tcpdi_parser.php

<?php
//============================================================+
// File name   : tcpdi_parser.php
// Version     : 1.0
// Begin       : 2013-09-25
// Last Update : 2013-09-25
// Author      : Paul Nicholls - https://github.com/pauln
// License     : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
//
// Based on    : tcpdf_parser.php
// Version     : 1.0.003
// Begin       : 2011-05-23
// Last Update : 2013-03-17
// Author      : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
// License     : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
// -------------------------------------------------------------------
// Copyright (C) 2011-2013 Nicola Asuni - Tecnick.com LTD
//
// This file is for use with the TCPDF software library.
//
// tcpdi_parser is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// tcpdi_parser is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the License
// along with tcpdi_parser. If not, see
// <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
//
// See LICENSE file for more information.
// -------------------------------------------------------------------
//
// Description : This is a PHP class for parsing PDF documents.
//
//============================================================+

/**
 * @file
 * This is a PHP class for parsing PDF documents.<br>
 * @author Paul Nicholls
 * @author Nicola Asuni
 * @version 1.0
 */

// include class for decoding filters
require_once(dirname(__FILE__).'/../tcpdf/include/tcpdf_filters.php');

if (!defined ('PDF_TYPE_NULL'))
    define ('PDF_TYPE_NULL', 0);
if (!defined ('PDF_TYPE_NUMERIC'))
    define ('PDF_TYPE_NUMERIC', 1);
if (!defined ('PDF_TYPE_TOKEN'))
    define ('PDF_TYPE_TOKEN', 2);
if (!defined ('PDF_TYPE_HEX'))
    define ('PDF_TYPE_HEX', 3);
if (!defined ('PDF_TYPE_STRING'))
    define ('PDF_TYPE_STRING', 4);
if (!defined ('PDF_TYPE_DICTIONARY'))
    define ('PDF_TYPE_DICTIONARY', 5);
if (!defined ('PDF_TYPE_ARRAY'))
    define ('PDF_TYPE_ARRAY', 6);
if (!defined ('PDF_TYPE_OBJDEC'))
    define ('PDF_TYPE_OBJDEC', 7);
if (!defined ('PDF_TYPE_OBJREF'))
    define ('PDF_TYPE_OBJREF', 8);
if (!defined ('PDF_TYPE_OBJECT'))
    define ('PDF_TYPE_OBJECT', 9);
if (!defined ('PDF_TYPE_STREAM'))
    define ('PDF_TYPE_STREAM', 10);
if (!defined ('PDF_TYPE_BOOLEAN'))
    define ('PDF_TYPE_BOOLEAN', 11);
if (!defined ('PDF_TYPE_REAL'))
    define ('PDF_TYPE_REAL', 12);

/**
 * @class tcpdi_parser
 * This is a PHP class for parsing PDF documents.<br>
 * Based on TCPDF_PARSER, part of the TCPDF project by Nicola Asuni.
 * @brief This is a PHP class for parsing PDF documents..
 * @version 1.0
 * @author Paul Nicholls - github.com/pauln
 * @author Nicola Asuni - info@tecnick.com
 */
class tcpdi_parser {
	/**
	 * Unique parser ID
	 * @public
	 */
	public $uniqueid = '';

	/**
	 * Raw content of the PDF document.
	 * @private
	 */
	private $pdfdata = '';

	/**
	 * XREF data.
	 * @protected
	 */
	protected $xref = array();

	/**
	 * Object streams.
	 * @protected
	 */
	protected $objstreams = array();

	/**
	 * Objects in objstreams.
	 * @protected
	 */
	protected $objstreamobjs = array();

	/**
	 * List of seen XREF data locations.
	 * @protected
	 */
	protected $xref_seen_offsets = array();

	/**
	 * Array of PDF objects.
	 * @protected
	 */
	protected $objects = array();

	/**
	 * Array of object offsets.
	 * @private
	 */
	private $objoffsets = array();

	/**
	 * Class object for decoding filters.
	 * @private
	 */
	private $FilterDecoders;

    /**
     * Pages
     *
     * @private array
     */
    private $pages;

    /**
     * Page count
     * @private integer
     */
    private $page_count;

    /**
     * actual page number
     * @private integer
     */
    private $pageno;

    /**
     * PDF version of the loaded document
     * @private string
     */
    private $pdfVersion;

    /**
     * Available BoxTypes
     *
     * @public array
     */
    public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');

// -----------------------------------------------------------------------------

	/**
	 * Parse a PDF document an return an array of objects.
	 * @param $data (string) PDF data to parse.
	 * @public
	 * @since 1.0.000 (2011-05-24)
	 */
	public function __construct($data, $uniqueid) {
		if (empty($data)) {
			$this->Error('Empty PDF data.');
		}
		$this->uniqueid = $uniqueid;
		$this->pdfdata = $data;
		// get length
		$pdflen = strlen($this->pdfdata);
		// initialize class for decoding filters
		$this->FilterDecoders = new TCPDF_FILTERS();
		// get xref and trailer data
		$this->xref = $this->getXrefData();
		$this->findObjectOffsets();
		// parse all document objects
		$this->objects = array();
		/*foreach ($this->xref['xref'] as $obj => $offset) {
			if (!isset($this->objects[$obj]) AND ($offset > 0)) {
				// decode only objects with positive offset
				//$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
			}
		}*/
        $this->getPDFVersion();
		$this->readPages();
	}

	/**
	 * Clean up when done, to free memory etc
	 */
	public function cleanUp() {
		unset($this->pdfdata);
		$this->pdfdata = '';
		unset($this->objstreams);
		$this->objstreams = array();
		unset($this->objects);
		$this->objects = array();
		unset($this->objstreamobjs);
		$this->objstreamobjs = array();
		unset($this->xref);
		$this->xref = array();
		unset($this->objoffsets);
		$this->objoffsets = array();
		unset($this->pages);
		$this->pages = array();
	}

	/**
	 * Return an array of parsed PDF document objects.
	 * @return (array) Array of parsed PDF document objects.
	 * @public
	 * @since 1.0.000 (2011-06-26)
	 */
	public function getParsedData() {
		return array($this->xref, $this->objects, $this->pages);
	}

    /**
     * Get PDF-Version
     *
     * And reset the PDF Version used in FPDI if needed
     * @public
     */
    public function getPDFVersion() {
        preg_match('/\d\.\d/', substr($this->pdfdata, 0, 16), $m);
        if (isset($m[0]))
            $this->pdfVersion = $m[0];
        return $this->pdfVersion;
    }

    /**
     * Read all /Page(es)
     *
     */
    function readPages() {
    	$params = $this->getObjectVal($this->xref['trailer'][1]['/Root']);
    	$objref = null;
    	foreach ($params[1][1] as $k=>$v) {
    		if ($k == '/Pages') {
    			$objref = $v;
    			break;
    		}
    	}
    	if ($objref == null || $objref[0] !== PDF_TYPE_OBJREF) {
    		// Offset not found.
    		return;
    	}

		$dict = $this->getObjectVal($objref);
		if ($dict[0] == PDF_TYPE_OBJECT && $dict[1][0] == PDF_TYPE_DICTIONARY) {
			// Dict wrapped in an object
			$dict = $dict[1];
		}

		if ($dict[0] !== PDF_TYPE_DICTIONARY) {
			return;
		}

		$this->pages = array();
		if (isset($dict[1]['/Kids'])) {
			$v = $dict[1]['/Kids'];
			if ($v[0] == PDF_TYPE_ARRAY) {
				foreach ($v[1] as $ref) {
					$page = $this->getObjectVal($ref);
					$this->readPage($page);
				}
			}
		}

		$this->page_count = count($this->pages);
    }

    /**
     * Read a single /Page element, recursing through /Kids if necessary
     *
     */
    private function readPage($page) {
		if (isset($page[1][1]['/Kids'])) {
			// Nested pages!
			foreach ($page[1][1]['/Kids'][1] as $subref) {
				$subpage = $this->getObjectVal($subref);
				$this->readPage($subpage);
			}
		} else {
			$this->pages[] = $page;
		}
    }

    /**
     * Get pagecount from sourcefile
     *
     * @return int
     */
    function getPageCount() {
        return $this->page_count;
    }

	/**
	 * Get Cross-Reference (xref) table and trailer data from PDF document data.
	 * @param $offset (int) xref offset (if know).
	 * @param $xref (array) previous xref array (if any).
	 * @return Array containing xref and trailer data.
	 * @protected
	 * @since 1.0.000 (2011-05-24)
	 */
	protected function getXrefData($offset=0, $xref=array()) {
		if ($offset == 0) {
			// find last startxref
			if (preg_match('/.*[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/is', $this->pdfdata, $matches) == 0) {
				$this->Error('Unable to find startxref');
			}
			$startxref = $matches[1];
		} else {
			if (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
				// Cross-Reference Stream object
				$startxref = $offset;
			} elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
				// startxref found
				$startxref = $matches[1][0];
			} else {
				$this->Error('Unable to find startxref');
			}
		}
		unset($matches);

		// DOMPDF gets the startxref wrong, giving us the linebreak before the xref starts.
		$startxref += strspn($this->pdfdata, "\r\n", $startxref);

		// check xref position
		if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
			// Cross-Reference
			$xref = $this->decodeXref($startxref, $xref);
		} else {
			// Cross-Reference Stream
			$xref = $this->decodeXrefStream($startxref, $xref);
		}
		if (empty($xref)) {
			$this->Error('Unable to find xref');
		}

		return $xref;
	}

	/**
	 * Decode the Cross-Reference section
	 * @param $startxref (int) Offset at which the xref section starts.
	 * @param $xref (array) Previous xref array (if any).
	 * @return Array containing xref and trailer data.
	 * @protected
	 * @since 1.0.000 (2011-06-20)
	 */
	protected function decodeXref($startxref, $xref=array()) {
		$this->xref_seen_offsets[] = $startxref;
        if (!isset($xref['xref_location'])) {
            $xref['xref_location'] = $startxref;
            $xref['max_object'] = 0;
    	}
		// extract xref data (object indexes and offsets)
		$xoffset = $startxref + 5;
		// initialize object number
		$obj_num = 0;
		$offset = $xoffset;
		while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
			$offset = (strlen($matches[0][0]) + $matches[0][1]);
			if ($matches[3][0] == 'n') {
				// create unique object index: [object number]_[generation number]
				$gen_num = intval($matches[2][0]);
				$index = $obj_num.'_'.$gen_num;
				// check if object already exist
				if (!isset($xref['xref'][$obj_num][$gen_num])) {
					// store object offset position
					$xref['xref'][$obj_num][$gen_num] = intval($matches[1][0]);
				}
				++$obj_num;
				$offset += 2;
			} elseif ($matches[3][0] == 'f') {
				++$obj_num;
				$offset += 2;
			} else {
				// object number (index)
				$obj_num = intval($matches[1][0]);
			}
		}
		unset($matches);
		$xref['max_object'] = max($xref['max_object'], $obj_num);
		// get trailer data
		if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $xoffset) > 0) {
			$trailer_data = $matches[1][0];
			if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
				// get only the last updated version
				$xref['trailer'] = array();
				$xref['trailer'][0] = PDF_TYPE_DICTIONARY;
				$xref['trailer'][1] = array();
				// parse trailer_data
				if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
					$xref['trailer'][1]['/Size'] = array(PDF_TYPE_NUMERIC, intval($matches[1]));
				}
				if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
					$xref['trailer'][1]['/Root'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
				}
				if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
					$xref['trailer'][1]['/Encrypt'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
				}
				if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
					$xref['trailer'][1]['/Info'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
				}
				if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
					$xref['trailer'][1]['/ID'] = array(PDF_TYPE_ARRAY, array());
					$xref['trailer'][1]['/ID'][1][0] = array(PDF_TYPE_HEX, $matches[1]);
					$xref['trailer'][1]['/ID'][1][1] = array(PDF_TYPE_HEX, $matches[2]);
				}
			}
			if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
				// get previous xref
				$prevoffset = intval($matches[1]);
				if (!in_array($prevoffset, $this->xref_seen_offsets)) {
					$this->xref_seen_offsets[] = $prevoffset;
					$xref = $this->getXrefData($prevoffset, $xref);
				}
			}
			unset($matches);
		} else {
			$this->Error('Unable to find trailer');
		}
		return $xref;
	}

	/**
	 * Decode the Cross-Reference Stream section
	 * @param $startxref (int) Offset at which the xref section starts.
	 * @param $xref (array) Previous xref array (if any).
	 * @return Array containing xref and trailer data.
	 * @protected
	 * @since 1.0.003 (2013-03-16)
	 */
	protected function decodeXrefStream($startxref, $xref=array()) {
		// try to read Cross-Reference Stream
		list($xrefobj, $unused) = $this->getRawObject($startxref);
		$xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
        if (!isset($xref['xref_location'])) {
            $xref['xref_location'] = $startxref;
            $xref['max_object'] = 0;
    	}
        if (!isset($xref['xref'])) {
            $xref['xref'] = array();
    	}
		if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
			// get only the last updated version
			$xref['trailer'] = array();
			$xref['trailer'][0] = PDF_TYPE_DICTIONARY;
			$xref['trailer'][1] = array();
			$filltrailer = true;
		} else {
			$filltrailer = false;
		}
		$valid_crs = false;
		$sarr = $xrefcrs[0][1];
		$keys = array_keys($sarr);
		$columns = 1; // Default as per PDF 32000-1:2008.
		$predictor = 1; // Default as per PDF 32000-1:2008.
		foreach ($keys as $k=>$key) {
			$v = $sarr[$key];
			if (($key == '/Type') AND ($v[0] == PDF_TYPE_TOKEN AND ($v[1] == 'XRef'))) {
				$valid_crs = true;
			} elseif (($key == '/Index') AND ($v[0] == PDF_TYPE_ARRAY AND count($v[1] >= 2))) {
				// first object number in the subsection
				$index_first = intval($v[1][0][1]);
				// number of entries in the subsection
				$index_entries = intval($v[1][1][1]);
			} elseif (($key == '/Prev') AND ($v[0] == PDF_TYPE_NUMERIC)) {
				// get previous xref offset
				$prevxref = intval($v[1]);
			} elseif (($key == '/W') AND ($v[0] == PDF_TYPE_ARRAY)) {
				// number of bytes (in the decoded stream) of the corresponding field
				$wb = array();
				$wb[0] = intval($v[1][0][1]);
				$wb[1] = intval($v[1][1][1]);
				$wb[2] = intval($v[1][2][1]);
			} elseif (($key == '/DecodeParms') AND ($v[0] == PDF_TYPE_DICTIONARY)) {
				$decpar = $v[1];
				foreach ($decpar as $kdc => $vdc) {
					if (($kdc == '/Columns') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
						$columns = intval($vdc[1]);
					} elseif (($kdc == '/Predictor') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
						$predictor = intval($vdc[1]);
					}
				}
			} elseif ($filltrailer) {
				switch($key) {
					case '/Size':
					case '/Root':
					case '/Info':
					case '/ID':
						$xref['trailer'][1][$key] = $v;
						break;
					default:
						break;
				}
			}
		}
		// decode data
		$obj_num = 0;
		if ($valid_crs AND isset($xrefcrs[1][3][0])) {
			// number of bytes in a row
			$rowlen = ($columns + 1);
			// convert the stream into an array of integers
			$sdata = unpack('C*', $xrefcrs[1][3][0]);
			// split the rows
			$sdata = array_chunk($sdata, $rowlen);
			// initialize decoded array
			$ddata = array();
			// initialize first row with zeros
			$prev_row = array_fill (0, $rowlen, 0);
			// for each row apply PNG unpredictor
			foreach ($sdata as $k => $row) {
				// initialize new row
				$ddata[$k] = array();
				// get PNG predictor value
				if (empty($predictor)) {
					$predictor = (10 + $row[0]);
				}
				// for each byte on the row
				for ($i=1; $i<=$columns; ++$i) {
					// new index
					$j = ($i - 1);
					$row_up = $prev_row[$j];
					if ($i == 1) {
						$row_left = 0;
						$row_upleft = 0;
					} else {
						$row_left = $row[($i - 1)];
						$row_upleft = $prev_row[($j - 1)];
					}
					switch ($predictor) {
						case 1: // No prediction (equivalent to PNG None)
						case 10: { // PNG prediction (on encoding, PNG None on all rows)
							$ddata[$k][$j] = $row[$i];
							break;
						}
						case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
							$ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
							break;
						}
						case 12: { // PNG prediction (on encoding, PNG Up on all rows)
							$ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
							break;
						}
						case 13: { // PNG prediction (on encoding, PNG Average on all rows)
							$ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
							break;
						}
						case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
							// initial estimate
							$p = ($row_left + $row_up - $row_upleft);
							// distances
							$pa = abs($p - $row_left);
							$pb = abs($p - $row_up);
							$pc = abs($p - $row_upleft);
							$pmin = min($pa, $pb, $pc);
							// return minumum distance
							switch ($pmin) {
								case $pa: {
									$ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
									break;
								}
								case $pb: {
									$ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
									break;
								}
								case $pc: {
									$ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
									break;
								}
							}
							break;
						}
						default: { // PNG prediction (on encoding, PNG optimum)
							$this->Error("Unknown PNG predictor $predictor");
							break;
						}
					}
				}
				$prev_row = $ddata[$k];
			} // end for each row
			// complete decoding
			unset($sdata);
			$sdata = array();
			// for every row
			foreach ($ddata as $k => $row) {
				// initialize new row
				$sdata[$k] = array(0, 0, 0);
				if ($wb[0] == 0) {
					// default type field
					$sdata[$k][0] = 1;
				}
				$i = 0; // count bytes on the row
				// for every column
				for ($c = 0; $c < 3; ++$c) {
					// for every byte on the column
					for ($b = 0; $b < $wb[$c]; ++$b) {
						$sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
						++$i;
					}
				}
			}
			unset($ddata);
			// fill xref
			if (isset($index_first)) {
				$obj_num = $index_first;
			} else {
				$obj_num = 0;
			}
			foreach ($sdata as $k => $row) {
				switch ($row[0]) {
					case 0: { // (f) linked list of free objects
						++$obj_num;
						break;
					}
					case 1: { // (n) objects that are in use but are not compressed
						// create unique object index: [object number]_[generation number]
						$index = $obj_num.'_'.$row[2];
						// check if object already exist
						if (!isset($xref['xref'][$obj_num][$row[2]])) {
							// store object offset position
							$xref['xref'][$obj_num][$row[2]] = $row[1];
						}
						++$obj_num;
						break;
					}
					case 2: { // compressed objects
						// $row[1] = object number of the object stream in which this object is stored
						// $row[2] = index of this object within the object stream
						/*$index = $row[1].'_0_'.$row[2];
						$xref['xref'][$row[1]][0][$row[2]] = -1;*/
						break;
					}
					default: { // null objects
						break;
					}
				}
			}
		} // end decoding data
		$xref['max_object'] = max($xref['max_object'], $obj_num);
		if (isset($prevxref)) {
			// get previous xref
			$xref = $this->getXrefData($prevxref, $xref);
		}
		return $xref;
	}

	/**
	 * Get raw stream data
	 * @param $offset (int) Stream offset.
	 * @param $length (int) Stream length.
	 * @return string Steam content
	 * @protected
	 */
	protected function getRawStream($offset, $length) {
		$offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
		$offset += 6; // "stream"
		$offset += strspn($this->pdfdata, "\r\n", $offset);

		$obj = array();
		$obj[] = PDF_TYPE_STREAM;
		$obj[] = substr($this->pdfdata, $offset, $length);

		return array($obj, $offset+$length);
	}

	/**
	 * Get object type, raw value and offset to next object
	 * @param $offset (int) Object offset.
	 * @return array containing object type, raw value and offset to next object
	 * @protected
	 * @since 1.0.000 (2011-06-20)
	 */
	protected function getRawObject($offset=0, $data=null) {
		if ($data == null) {
			$data =& $this->pdfdata;
		}
		$objtype = ''; // object type to be returned
		$objval = ''; // object value to be returned
		// skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
		while (strspn($data{$offset}, "\x00\x09\x0a\x0c\x0d\x20") == 1) {
			$offset++;
		}
		// get first char
		$char = $data{$offset};
		// get object type
		switch ($char) {
			case '%': { // \x25 PERCENT SIGN
				// skip comment and search for next token
				$next = strcspn($data, "\r\n", $offset);
				if ($next > 0) {
					$offset += $next;
					list($obj, $unused) = $this->getRawObject($offset, $data);
					return $obj;
				}
				break;
			}
			case '/': { // \x2F SOLIDUS
				// name object
				$objtype = PDF_TYPE_TOKEN;
				++$offset;
				$length = strcspn($data, "\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset);
				$objval = substr($data, $offset, $length);
				$offset += $length;
				break;
			}
			case '(':   // \x28 LEFT PARENTHESIS
			case ')': { // \x29 RIGHT PARENTHESIS
				// literal string object
				$objtype = PDF_TYPE_STRING;
				++$offset;
				$strpos = $offset;
				if ($char == '(') {
					$open_bracket = 1;
					while ($open_bracket > 0) {
						if (!isset($data{$strpos})) {
							break;
						}
						$ch = $data{$strpos};
						switch ($ch) {
							case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
								// skip next character
								++$strpos;
								break;
							}
							case '(': { // LEFT PARENHESIS (28h)
								++$open_bracket;
								break;
							}
							case ')': { // RIGHT PARENTHESIS (29h)
								--$open_bracket;
								break;
							}
						}
						++$strpos;
					}
					$objval = substr($data, $offset, ($strpos - $offset - 1));
					$offset = $strpos;
				}
				break;
			}
			case '[':   // \x5B LEFT SQUARE BRACKET
			case ']': { // \x5D RIGHT SQUARE BRACKET
				// array object
				$objtype = PDF_TYPE_ARRAY;
				++$offset;
				if ($char == '[') {
					// get array content
					$objval = array();
					do {
						// get element
						list($element, $offset) = $this->getRawObject($offset, $data);
						$objval[] = $element;
					} while ($element[0] !== ']');
					// remove closing delimiter
					array_pop($objval);
				} else {
					$objtype = ']';
				}
				break;
			}
			case '<':   // \x3C LESS-THAN SIGN
			case '>': { // \x3E GREATER-THAN SIGN
				if (isset($data{($offset + 1)}) AND ($data{($offset + 1)} == $char)) {
					// dictionary object
					$objtype = PDF_TYPE_DICTIONARY;
					if ($char == '<') {
						list ($objval, $offset) = $this->getDictValue($offset, $data);
					} else {
						$objtype = '>>';
						$offset += 2;
					}
				} else {
					// hexadecimal string object
					$objtype = PDF_TYPE_HEX;
					++$offset;
					// The "Panose" entry in the FontDescriptor Style dict seems to have hex bytes separated by spaces.
					if (($char == '<') AND (preg_match('/^([0-9A-Fa-f ]+)[>]/iU', substr($data, $offset), $matches) == 1)) {
						$objval = $matches[1];
						$offset += strlen($matches[0]);
						unset($matches);
					}
				}
				break;
			}
			default: {
				$frag = $data{$offset} . @$data{$offset+1} . @$data{$offset+2} . @$data{$offset+3};
				switch ($frag) {
					case 'endo':
						// indirect object
						$objtype = 'endobj';
						$offset += 6;
						break;
					case 'stre':
						// Streams should always be indirect objects, and thus processed by getRawStream().
						// If we get here, treat it as a null object as something has gone wrong.
					case 'null':
						// null object
						$objtype = PDF_TYPE_NULL;
						$offset += 4;
						$objval = 'null';
						break;
					case 'true':
						// boolean true object
						$objtype = PDF_TYPE_BOOLEAN;
						$offset += 4;
						$objval = true;
						break;
					case 'fals':
						// boolean false object
						$objtype = PDF_TYPE_BOOLEAN;
						$offset += 5;
						$objval = false;
						break;
					case 'ends':
						// end stream object
						$objtype = 'endstream';
						$offset += 9;
						break;
					default:
						if (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+([Robj]{1,3})/i', substr($data, $offset, 33), $matches) == 1) {
							if ($matches[3] == 'R') {
								// indirect object reference
								$objtype = PDF_TYPE_OBJREF;
								$offset += strlen($matches[0]);
								$objval = array(intval($matches[1]), intval($matches[2]));
							} elseif ($matches[3] == 'obj') {
								// object start
								$objtype = PDF_TYPE_OBJECT;
								$objval = intval($matches[1]).'_'.intval($matches[2]);
								$offset += strlen ($matches[0]);
							}
						} elseif (($numlen = strspn($data, '+-.0123456789', $offset)) > 0) {
							// numeric object
							$objval = substr($data, $offset, $numlen);
							$objtype = (intval($objval) != $objval) ? PDF_TYPE_REAL : PDF_TYPE_NUMERIC;
							$offset += $numlen;
						}
						unset($matches);
						break;
				}
				break;
			}
		}
		$obj = array();
		$obj[] = $objtype;
		if ($objtype == PDF_TYPE_OBJREF && is_array($objval)) {
			foreach ($objval as $val) {
				$obj[] = $val;
			}
		} else {
			$obj[] = $objval;
		}
		return array($obj, $offset);
	}
	private function getDictValue($offset, &$data) {
		$objval = array();

		// Extract dict from data.
		$i=1;
		$dict = '';
		$offset += 2;
		do {
			if ($data{$offset} == '>' && $data{$offset+1} == '>') {
				$i--;
				$dict .= '>>';
				$offset += 2;
			} else if ($data{$offset} == '<' && $data{$offset+1} == '<') {
				$i++;
				$dict .= '<<';
				$offset += 2;
			} else {
				$dict .= $data{$offset};
				$offset++;
			}
		} while ($i>0);

		// Now that we have just the dict, parse it.
		$dictoffset = 0;
		do {
			// Get dict element.
			list($key, $eloffset) = $this->getRawObject($dictoffset, $dict);
			if ($key[0] == '>>') {
				break;
			}
			list($element, $dictoffset) = $this->getRawObject($eloffset, $dict);
			$objval['/'.$key[1]] = $element;
			unset($key);
			unset($element);
		} while (true);

		return array($objval, $offset);
	}

	/**
	 * Get content of indirect object.
	 * @param $obj_ref (string) Object number and generation number separated by underscore character.
	 * @param $offset (int) Object offset.
	 * @param $decoding (boolean) If true decode streams.
	 * @return array containing object data.
	 * @protected
	 * @since 1.0.000 (2011-05-24)
	 */
	protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
		$obj = explode('_', $obj_ref);
		if (($obj === false) OR (count($obj) != 2)) {
			$this->Error('Invalid object reference: '.$obj);
			return;
		}
		$objref = $obj[0].' '.$obj[1].' obj';

		if (strpos($this->pdfdata, $objref, $offset) != $offset) {
			// an indirect reference to an undefined object shall be considered a reference to the null object
			return array('null', 'null', $offset);
		}
		// starting position of object content
		$offset += strlen($objref);
		// get array of object content
		$objdata = array();
		$i = 0; // object main index
		do {
			if (($i > 0) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY) AND array_key_exists('/Length', $objdata[($i - 1)][1])) {
				// Stream - get using /Length in stream's dict
				$lengthobj = $objdata[($i-1)][1]['/Length'];
				if ($lengthobj[0] === PDF_TYPE_OBJREF) {
					$lengthobj = $this->getObjectVal($lengthobj);
					if ($lengthobj[0] === PDF_TYPE_OBJECT) {
						$lengthobj = $lengthobj[1];
					}
				}
				$streamlength = $lengthobj[1];
				list($element, $offset) = $this->getRawStream($offset, $streamlength);
			} else {
				// get element
				list($element, $offset) = $this->getRawObject($offset);
			}
			// decode stream using stream's dictionary information
			if ($decoding AND ($element[0] == PDF_TYPE_STREAM) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY)) {
				$element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
			}
			$objdata[$i] = $element;
			++$i;
		} while ($element[0] != 'endobj');
		// remove closing delimiter
		array_pop($objdata);
		// return raw object content
		return $objdata;
	}

	/**
	 * Get the content of object, resolving indect object reference if necessary.
	 * @param $obj (string) Object value.
	 * @return array containing object data.
	 * @public
	 * @since 1.0.000 (2011-06-26)
	 */
	public function getObjectVal($obj) {
		if ($obj[0] == PDF_TYPE_OBJREF) {
			if (strpos($obj[1], '_') !== false) {
				$key = explode('_', $obj[1]);
			} else {
				$key = array($obj[1], $obj[2]);
			}

			$ret = array(0=>PDF_TYPE_OBJECT, 'obj'=>$key[0], 'gen'=>$key[1]);

			// reference to indirect object
			$object = null;
			if (isset($this->objects[$key[0]][$key[1]])) {
				// this object has been already parsed
				$object = $this->objects[$key[0]][$key[1]];
			} elseif (($offset = $this->findObjectOffset($key)) !== false) {
				// parse new object
				$this->objects[$key[0]][$key[1]] = $this->getIndirectObject($key[0].'_'.$key[1], $offset, false);
				$object = $this->objects[$key[0]][$key[1]];
			} elseif (($key[1] == 0) && isset($this->objstreamobjs[$key[0]])) {
				// Object is in an object stream
				$streaminfo = $this->objstreamobjs[$key[0]];
				$objs = $streaminfo[0];
				if (!isset($this->objstreams[$objs[0]][$objs[1]])) {
					// Fetch and decode object stream
        			$offset = $this->findObjectOffset($objs);;
        			$objstream = $this->getObjectVal(array(PDF_TYPE_OBJREF, $objs[0], $objs[1]));
					$decoded = $this->decodeStream($objstream[1][1], $objstream[2][1]);
					$this->objstreams[$objs[0]][$objs[1]] = $decoded[0]; // Store just the data, in case we need more from this objstream
					// Free memory
					unset($objstream);
					unset($decoded);
				}
				$this->objects[$key[0]][$key[1]] = $this->getRawObject($streaminfo[1], $this->objstreams[$objs[0]][$objs[1]]);
				$object = $this->objects[$key[0]][$key[1]];
			}
			if (!is_null($object)) {
				$ret[1] = $object[0];
				if (isset($object[1][0]) && $object[1][0] == PDF_TYPE_STREAM) {
					$ret[0] = PDF_TYPE_STREAM;
					$ret[2] = $object[1];
				}
				return $ret;
			}
		}
		return $obj;
	}

    /**
     * Extract object stream to find out what it contains.
     *
     */
    function extractObjectStream($key) {
		$objref = array(PDF_TYPE_OBJREF, $key[0], $key[1]);
    	$obj = $this->getObjectVal($objref);
    	if ($obj[0] !== PDF_TYPE_STREAM || !isset($obj[1][1]['/First'][1])) {
    		// Not a valid object stream dictionary - skip it.
    		return;
    	}
    	$stream = $this->decodeStream($obj[1][1], $obj[2][1]);// Decode object stream, as we need the first bit
    	$first = intval($obj[1][1]['/First'][1]);
    	$ints = explode(' ', substr($stream[0], 0, $first)); // Get list of object / offset pairs
    	for ($j=1; $j<count($ints); $j++) {
    		if (($j % 2) == 1) {
    			$this->objstreamobjs[$ints[$j-1]] = array($key, $ints[$j]+$first);
    		}
    	}

    	// Free memory - we may not need this at all.
    	unset($obj);
    	unset($stream);
    }

	/**
	 * Find all object offsets.  Saves having to scour the file multiple times.
	 * @private
	 */
	private function findObjectOffsets() {
		$this->objoffsets = array();
		if (preg_match_all('/(*ANYCRLF)^[\s]*([0-9]+)[\s]+([0-9]+)[\s]+obj/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE) >= 1) {
			$i = 0;
			foreach($matches[0] as $match) {
				$offset = $match[1] + strspn($match[0], "\x00\x09\x0a\x0c\x0d\x20");
				$this->objoffsets[trim($match[0])] = $offset;
				$dictoffset = $match[1] + strlen($match[0]);
				if (preg_match('|^\s+<<[^>]+/ObjStm|', substr($this->pdfdata, $dictoffset, 256), $objstm) == 1) {
					$this->extractObjectStream(array($matches[1][$i][0], $matches[2][$i][0]));
				}
				$i++;
			}
		}
		unset($matches);
	}

	/**
	 * Get offset of an object.  Checks xref first, then offsets found by scouring the file.
	 * @param $key (array) Object key to find (obj, gen).
	 * @return int Offset of the object in $this->pdfdata.
	 * @private
	 */
	private function findObjectOffset($key) {
		$objref = $key[0].' '.$key[1].' obj';
		if (isset($this->xref['xref'][$key[0]][$key[1]])) {
			$offset = $this->xref['xref'][$key[0]][$key[1]];
			if (strpos($this->pdfdata, $objref, $offset) === $offset) {
				// Offset is in xref table and matches actual position in file
				//echo "Offset in XREF is correct, returning<br>";
				return $this->xref['xref'][$key[0]][$key[1]];
			}
		}
		if (array_key_exists($objref, $this->objoffsets)) {
			//echo "Offset found in internal reftable<br>";
			return $this->objoffsets[$objref];
		}
		return false;
	}

	/**
	 * Decode the specified stream.
	 * @param $sdic (array) Stream's dictionary array.
	 * @param $stream (string) Stream to decode.
	 * @return array containing decoded stream data and remaining filters.
	 * @protected
	 * @since 1.0.000 (2011-06-22)
	 */
	protected function decodeStream($sdic, $stream) {
		// get stream lenght and filters
		$slength = strlen($stream);
		if ($slength <= 0) {
			return array('', array());
		}
		$filters = array();
		foreach ($sdic as $k => $v) {
			if ($v[0] == PDF_TYPE_TOKEN) {
				if (($k == '/Length') AND ($v[0] == PDF_TYPE_NUMERIC)) {
					// get declared stream lenght
					$declength = intval($v[1]);
					if ($declength < $slength) {
						$stream = substr($stream, 0, $declength);
						$slength = $declength;
					}
				} elseif ($k == '/Filter') {
					if ($v[0] == PDF_TYPE_TOKEN) {
						// single filter
						$filters[] = $v[1];
					} elseif ($v[0] == PDF_TYPE_ARRAY) {
						// array of filters
						foreach ($v[1] as $flt) {
							if ($flt[0] == PDF_TYPE_TOKEN) {
								$filters[] = $flt[1];
							}
						}
					}
				}
			}
		}
		// decode the stream
		$remaining_filters = array();
		foreach ($filters as $filter) {
			if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
				$stream = $this->FilterDecoders->decodeFilter($filter, $stream);
			} else {
				// add missing filter to array
				$remaining_filters[] = $filter;
			}
		}
		return array($stream, $remaining_filters);
	}


    /**
     * Set pageno
     *
     * @param int $pageno Pagenumber to use
     */
    public function setPageno($pageno) {
        $pageno = ((int) $pageno) - 1;

        if ($pageno < 0 || $pageno >= $this->getPageCount()) {
            $this->error("Pagenumber is wrong! (Requested $pageno, max ".$this->getPageCount().")");
        }

        $this->pageno = $pageno;
    }

    /**
     * Get page-resources from current page
     *
     * @return array
     */
    public function getPageResources() {
        return $this->_getPageResources($this->pages[$this->pageno]);
    }

    /**
     * Get page-resources from /Page
     *
     * @param array $obj Array of pdf-data
     */
    private function _getPageResources ($obj) { // $obj = /Page
    	$obj = $this->getObjectVal($obj);

        // If the current object has a resources
    	// dictionary associated with it, we use
    	// it. Otherwise, we move back to its
    	// parent object.
        if (isset ($obj[1][1]['/Resources'])) {
    		$res = $obj[1][1]['/Resources'];
    		if ($res[0] == PDF_TYPE_OBJECT)
                return $res[1];
            return $res;
    	} else {
    		if (!isset ($obj[1][1]['/Parent'])) {
    			return false;
    		} else {
                $res = $this->_getPageResources($obj[1][1]['/Parent']);
                if ($res[0] == PDF_TYPE_OBJECT)
                    return $res[1];
                return $res;
    		}
    	}
    }


    /**
     * Get content of current page
     *
     * If more /Contents is an array, the streams are concated
     *
     * @return string
     */
    public function getContent() {
        $buffer = '';

        if (isset($this->pages[$this->pageno][1][1]['/Contents'])) {
            $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
            foreach($contents AS $tmp_content) {
                $buffer .= $this->_rebuildContentStream($tmp_content) . ' ';
            }
        }

        return $buffer;
    }


    /**
     * Resolve all content-objects
     *
     * @param array $content_ref
     * @return array
     */
    private function _getPageContent($content_ref) {
        $contents = array();

        if ($content_ref[0] == PDF_TYPE_OBJREF) {
            $content = $this->getObjectVal($content_ref);
            if ($content[1][0] == PDF_TYPE_ARRAY) {
                $contents = $this->_getPageContent($content[1]);
            } else {
                $contents[] = $content;
            }
        } elseif ($content_ref[0] == PDF_TYPE_ARRAY) {
            foreach ($content_ref[1] AS $tmp_content_ref) {
                $contents = array_merge($contents,$this->_getPageContent($tmp_content_ref));
            }
        }

        return $contents;
    }


    /**
     * Rebuild content-streams
     *
     * @param array $obj
     * @return string
     */
    private function _rebuildContentStream($obj) {
        $filters = array();

        if (isset($obj[1][1]['/Filter'])) {
            $_filter = $obj[1][1]['/Filter'];

            if ($_filter[0] == PDF_TYPE_OBJREF) {
                $tmpFilter = $this->getObjectVal($_filter);
                $_filter = $tmpFilter[1];
            }

            if ($_filter[0] == PDF_TYPE_TOKEN) {
                $filters[] = $_filter;
            } elseif ($_filter[0] == PDF_TYPE_ARRAY) {
                $filters = $_filter[1];
            }
        }

        $stream = $obj[2][1];

        foreach ($filters AS $_filter) {
        	$stream = $this->FilterDecoders->decodeFilter($_filter[1], $stream);
        }

        return $stream;
    }


    /**
     * Get a Box from a page
     * Arrayformat is same as used by fpdf_tpl
     *
     * @param array $page a /Page
     * @param string $box_index Type of Box @see $availableBoxes
     * @param float Scale factor from user space units to points
     * @return array
     */
    public function getPageBox($page, $box_index, $k) {
        $page = $this->getObjectVal($page);
        $box = null;
        if (isset($page[1][1][$box_index]))
            $box =& $page[1][1][$box_index];

        if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
            $tmp_box = $this->getObjectVal($box);
            $box = $tmp_box[1];
        }

        if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
            $b =& $box[1];
            return array('x' => $b[0][1] / $k,
                         'y' => $b[1][1] / $k,
                         'w' => abs($b[0][1] - $b[2][1]) / $k,
                         'h' => abs($b[1][1] - $b[3][1]) / $k,
                         'llx' => min($b[0][1], $b[2][1]) / $k,
                         'lly' => min($b[1][1], $b[3][1]) / $k,
                         'urx' => max($b[0][1], $b[2][1]) / $k,
                         'ury' => max($b[1][1], $b[3][1]) / $k,
                         );
        } elseif (!isset ($page[1][1]['/Parent'])) {
            return false;
        } else {
            return $this->getPageBox($this->getObjectVal($page[1][1]['/Parent']), $box_index, $k);
        }
    }

    /**
     * Get all page boxes by page no
     *
     * @param int The page number
     * @param float Scale factor from user space units to points
     * @return array
     */
    public function getPageBoxes($pageno, $k) {
        return $this->_getPageBoxes($this->pages[$pageno - 1], $k);
    }

    /**
     * Get all boxes from /Page
     *
     * @param array a /Page
     * @return array
     */
    private function _getPageBoxes($page, $k) {
        $boxes = array();

        foreach($this->availableBoxes AS $box) {
            if ($_box = $this->getPageBox($page, $box, $k)) {
                $boxes[$box] = $_box;
            }
        }

        return $boxes;
    }

    /**
     * Get the page rotation by pageno
     *
     * @param integer $pageno
     * @return array
     */
    public function getPageRotation($pageno) {
        return $this->_getPageRotation($this->pages[$pageno - 1]);
    }

    private function _getPageRotation($obj) { // $obj = /Page
    	$obj = $this->getObjectVal($obj);
    	if (isset ($obj[1][1]['/Rotate'])) {
    		$res = $this->getObjectVal($obj[1][1]['/Rotate']);
    		if ($res[0] == PDF_TYPE_OBJECT)
                return $res[1];
            return $res;
    	} else {
    		if (!isset ($obj[1][1]['/Parent'])) {
    			return false;
    		} else {
                $res = $this->_getPageRotation($obj[1][1]['/Parent']);
                if ($res[0] == PDF_TYPE_OBJECT)
                    return $res[1];
                return $res;
    		}
    	}
    }

	/**
	 * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
	 * @param $msg (string) The error message
	 * @public
	 * @since 1.0.000 (2011-05-23)
	 */
	public function Error($msg) {
		// exit program and print error
		die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
	}

} // END OF TCPDF_PARSER CLASS

//============================================================+
// END OF FILE
//============================================================+