You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

472 lines
20 KiB

11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /** Internally used classes */
  22. // require_once 'Zend/Pdf/Element.php';
  23. // require_once 'Zend/Pdf/Element/Numeric.php';
  24. /** Zend_Pdf_StringParser */
  25. // require_once 'Zend/Pdf/StringParser.php';
  26. /**
  27. * PDF file parser
  28. *
  29. * @package Zend_Pdf
  30. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Pdf_Parser
  34. {
  35. /**
  36. * String parser
  37. *
  38. * @var Zend_Pdf_StringParser
  39. */
  40. private $_stringParser;
  41. /**
  42. * Last PDF file trailer
  43. *
  44. * @var Zend_Pdf_Trailer_Keeper
  45. */
  46. private $_trailer;
  47. /**
  48. * PDF version specified in the file header
  49. *
  50. * @var string
  51. */
  52. private $_pdfVersion;
  53. /**
  54. * Get length of source PDF
  55. *
  56. * @return integer
  57. */
  58. public function getPDFLength()
  59. {
  60. return strlen($this->_stringParser->data);
  61. }
  62. /**
  63. * Get PDF String
  64. *
  65. * @return string
  66. */
  67. public function getPDFString()
  68. {
  69. return $this->_stringParser->data;
  70. }
  71. /**
  72. * PDF version specified in the file header
  73. *
  74. * @return string
  75. */
  76. public function getPDFVersion()
  77. {
  78. return $this->_pdfVersion;
  79. }
  80. /**
  81. * Load XReference table and referenced objects
  82. *
  83. * @param integer $offset
  84. * @throws Zend_Pdf_Exception
  85. * @return Zend_Pdf_Trailer_Keeper
  86. */
  87. private function _loadXRefTable($offset)
  88. {
  89. $this->_stringParser->offset = $offset;
  90. // require_once 'Zend/Pdf/Element/Reference/Table.php';
  91. $refTable = new Zend_Pdf_Element_Reference_Table();
  92. // require_once 'Zend/Pdf/Element/Reference/Context.php';
  93. $context = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable);
  94. $this->_stringParser->setContext($context);
  95. $nextLexeme = $this->_stringParser->readLexeme();
  96. if ($nextLexeme == 'xref') {
  97. /**
  98. * Common cross-reference table
  99. */
  100. $this->_stringParser->skipWhiteSpace();
  101. while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) {
  102. if (!ctype_digit($nextLexeme)) {
  103. // require_once 'Zend/Pdf/Exception.php';
  104. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme)));
  105. }
  106. $objNum = (int)$nextLexeme;
  107. $refCount = $this->_stringParser->readLexeme();
  108. if (!ctype_digit($refCount)) {
  109. // require_once 'Zend/Pdf/Exception.php';
  110. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount)));
  111. }
  112. $this->_stringParser->skipWhiteSpace();
  113. while ($refCount > 0) {
  114. $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10);
  115. if (!ctype_digit($objectOffset)) {
  116. // require_once 'Zend/Pdf/Exception.php';
  117. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
  118. }
  119. // Force $objectOffset to be treated as decimal instead of octal number
  120. for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) {
  121. if ($objectOffset[$numStart] != '0') {
  122. break;
  123. }
  124. }
  125. $objectOffset = substr($objectOffset, $numStart);
  126. $this->_stringParser->offset += 10;
  127. if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
  128. // require_once 'Zend/Pdf/Exception.php';
  129. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  130. }
  131. $this->_stringParser->offset++;
  132. $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5);
  133. if (!ctype_digit($objectOffset)) {
  134. // require_once 'Zend/Pdf/Exception.php';
  135. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
  136. }
  137. // Force $objectOffset to be treated as decimal instead of octal number
  138. for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) {
  139. if ($genNumber[$numStart] != '0') {
  140. break;
  141. }
  142. }
  143. $genNumber = substr($genNumber, $numStart);
  144. $this->_stringParser->offset += 5;
  145. if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
  146. // require_once 'Zend/Pdf/Exception.php';
  147. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  148. }
  149. $this->_stringParser->offset++;
  150. $inUseKey = $this->_stringParser->data[$this->_stringParser->offset];
  151. $this->_stringParser->offset++;
  152. switch ($inUseKey) {
  153. case 'f':
  154. // free entry
  155. unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] );
  156. $refTable->addReference($objNum . ' ' . $genNumber . ' R',
  157. $objectOffset,
  158. false);
  159. break;
  160. case 'n':
  161. // in-use entry
  162. $refTable->addReference($objNum . ' ' . $genNumber . ' R',
  163. $objectOffset,
  164. true);
  165. }
  166. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  167. // require_once 'Zend/Pdf/Exception.php';
  168. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  169. }
  170. $this->_stringParser->offset++;
  171. if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
  172. // require_once 'Zend/Pdf/Exception.php';
  173. throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
  174. }
  175. $this->_stringParser->offset++;
  176. $refCount--;
  177. $objNum++;
  178. }
  179. }
  180. $trailerDictOffset = $this->_stringParser->offset;
  181. $trailerDict = $this->_stringParser->readElement();
  182. if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) {
  183. // require_once 'Zend/Pdf/Exception.php';
  184. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Dictionary expected after \'trailer\' keyword.', $trailerDictOffset));
  185. }
  186. } else {
  187. $xrefStream = $this->_stringParser->getObject($offset, $context);
  188. if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) {
  189. // require_once 'Zend/Pdf/Exception.php';
  190. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream expected.', $offset));
  191. }
  192. $trailerDict = $xrefStream->dictionary;
  193. if ($trailerDict->Type->value != 'XRef') {
  194. // require_once 'Zend/Pdf/Exception.php';
  195. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream object must have /Type property assigned to /XRef.', $offset));
  196. }
  197. if ($trailerDict->W === null || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
  198. // require_once 'Zend/Pdf/Exception.php';
  199. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset));
  200. }
  201. $entryField1Size = $trailerDict->W->items[0]->value;
  202. $entryField2Size = $trailerDict->W->items[1]->value;
  203. $entryField3Size = $trailerDict->W->items[2]->value;
  204. if ($entryField2Size == 0 || $entryField3Size == 0) {
  205. // require_once 'Zend/Pdf/Exception.php';
  206. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset));
  207. }
  208. $xrefStreamData = $xrefStream->value;
  209. if ($trailerDict->Index !== null) {
  210. if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
  211. // require_once 'Zend/Pdf/Exception.php';
  212. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset));
  213. }
  214. $sections = count($trailerDict->Index->items)/2;
  215. } else {
  216. $sections = 1;
  217. }
  218. $streamOffset = 0;
  219. $size = $entryField1Size + $entryField2Size + $entryField3Size;
  220. $entries = strlen($xrefStreamData)/$size;
  221. for ($count = 0; $count < $sections; $count++) {
  222. if ($trailerDict->Index !== null) {
  223. $objNum = $trailerDict->Index->items[$count*2 ]->value;
  224. $entries = $trailerDict->Index->items[$count*2 + 1]->value;
  225. } else {
  226. $objNum = 0;
  227. $entries = $trailerDict->Size->value;
  228. }
  229. for ($count2 = 0; $count2 < $entries; $count2++) {
  230. if ($entryField1Size == 0) {
  231. $type = 1;
  232. } else if ($entryField1Size == 1) { // Optimyze one-byte field case
  233. $type = ord($xrefStreamData[$streamOffset++]);
  234. } else {
  235. $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size);
  236. $streamOffset += $entryField1Size;
  237. }
  238. if ($entryField2Size == 1) { // Optimyze one-byte field case
  239. $field2 = ord($xrefStreamData[$streamOffset++]);
  240. } else {
  241. $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size);
  242. $streamOffset += $entryField2Size;
  243. }
  244. if ($entryField3Size == 1) { // Optimyze one-byte field case
  245. $field3 = ord($xrefStreamData[$streamOffset++]);
  246. } else {
  247. $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size);
  248. $streamOffset += $entryField3Size;
  249. }
  250. switch ($type) {
  251. case 0:
  252. // Free object
  253. $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false);
  254. // Debug output:
  255. // echo "Free object - $objNum $field3 R, next free - $field2\n";
  256. break;
  257. case 1:
  258. // In use object
  259. $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true);
  260. // Debug output:
  261. // echo "In-use object - $objNum $field3 R, offset - $field2\n";
  262. break;
  263. case 2:
  264. // Object in an object stream
  265. // Debug output:
  266. // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n";
  267. break;
  268. }
  269. $objNum++;
  270. }
  271. }
  272. // $streamOffset . ' ' . strlen($xrefStreamData) . "\n";
  273. // "$entries\n";
  274. // require_once 'Zend/Pdf/Exception.php';
  275. throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.');
  276. }
  277. // require_once 'Zend/Pdf/Trailer/Keeper.php';
  278. $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context);
  279. if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric ||
  280. $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) {
  281. $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value));
  282. $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable());
  283. }
  284. /**
  285. * We set '/Prev' dictionary property to the current cross-reference section offset.
  286. * It doesn't correspond to the actual data, but is true when trailer will be used
  287. * as a trailer for next generated PDF section.
  288. */
  289. $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset);
  290. return $trailerObj;
  291. }
  292. /**
  293. * Get Trailer object
  294. *
  295. * @return Zend_Pdf_Trailer_Keeper
  296. */
  297. public function getTrailer()
  298. {
  299. return $this->_trailer;
  300. }
  301. /**
  302. * Object constructor
  303. *
  304. * Note: PHP duplicates string, which is sent by value, only of it's updated.
  305. * Thus we don't need to care about overhead
  306. *
  307. * @param mixed $source
  308. * @param Zend_Pdf_ElementFactory_Interface $factory
  309. * @param boolean $load
  310. * @throws Zend_Exception
  311. */
  312. public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load)
  313. {
  314. if ($load) {
  315. if (($pdfFile = @fopen($source, 'rb')) === false ) {
  316. // require_once 'Zend/Pdf/Exception.php';
  317. throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." );
  318. }
  319. $data = '';
  320. $byteCount = filesize($source);
  321. while ($byteCount > 0 && !feof($pdfFile)) {
  322. $nextBlock = fread($pdfFile, $byteCount);
  323. if ($nextBlock === false) {
  324. // require_once 'Zend/Pdf/Exception.php';
  325. throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
  326. }
  327. $data .= $nextBlock;
  328. $byteCount -= strlen($nextBlock);
  329. }
  330. if ($byteCount != 0) {
  331. // require_once 'Zend/Pdf/Exception.php';
  332. throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
  333. }
  334. fclose($pdfFile);
  335. $this->_stringParser = new Zend_Pdf_StringParser($data, $factory);
  336. } else {
  337. $this->_stringParser = new Zend_Pdf_StringParser($source, $factory);
  338. }
  339. $pdfVersionComment = $this->_stringParser->readComment();
  340. if (substr($pdfVersionComment, 0, 5) != '%PDF-') {
  341. // require_once 'Zend/Pdf/Exception.php';
  342. throw new Zend_Pdf_Exception('File is not a PDF.');
  343. }
  344. $pdfVersion = substr($pdfVersionComment, 5);
  345. if (version_compare($pdfVersion, '0.9', '<') ||
  346. version_compare($pdfVersion, '1.61', '>=')
  347. ) {
  348. /**
  349. * @todo
  350. * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7)
  351. * Stream compression filter must be implemented (for compressed object streams).
  352. * Cross reference streams must be implemented
  353. */
  354. // require_once 'Zend/Pdf/Exception.php';
  355. throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion));
  356. }
  357. $this->_pdfVersion = $pdfVersion;
  358. $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF');
  359. if ($this->_stringParser->offset === false ||
  360. strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) {
  361. // require_once 'Zend/Pdf/Exception.php';
  362. throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.');
  363. }
  364. $this->_stringParser->offset--;
  365. /**
  366. * Go to end of cross-reference table offset
  367. */
  368. while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
  369. ($this->_stringParser->offset > 0)) {
  370. $this->_stringParser->offset--;
  371. }
  372. /**
  373. * Go to the start of cross-reference table offset
  374. */
  375. while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&&
  376. ($this->_stringParser->offset > 0)) {
  377. $this->_stringParser->offset--;
  378. }
  379. /**
  380. * Go to the end of 'startxref' keyword
  381. */
  382. while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
  383. ($this->_stringParser->offset > 0)) {
  384. $this->_stringParser->offset--;
  385. }
  386. /**
  387. * Go to the white space (eol marker) before 'startxref' keyword
  388. */
  389. $this->_stringParser->offset -= 9;
  390. $nextLexeme = $this->_stringParser->readLexeme();
  391. if ($nextLexeme != 'startxref') {
  392. // require_once 'Zend/Pdf/Exception.php';
  393. throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
  394. }
  395. $startXref = $this->_stringParser->readLexeme();
  396. if (!ctype_digit($startXref)) {
  397. // require_once 'Zend/Pdf/Exception.php';
  398. throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
  399. }
  400. $this->_trailer = $this->_loadXRefTable($startXref);
  401. $factory->setObjectCount($this->_trailer->Size->value);
  402. }
  403. /**
  404. * Object destructor
  405. */
  406. public function __destruct()
  407. {
  408. $this->_stringParser->cleanUp();
  409. }
  410. }