You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

731 lines
22 KiB

11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /** Internally used classes */
  22. // require_once 'Zend/Pdf/Element/Array.php';
  23. // require_once 'Zend/Pdf/Element/String/Binary.php';
  24. // require_once 'Zend/Pdf/Element/Boolean.php';
  25. // require_once 'Zend/Pdf/Element/Dictionary.php';
  26. // require_once 'Zend/Pdf/Element/Name.php';
  27. // require_once 'Zend/Pdf/Element/Null.php';
  28. // require_once 'Zend/Pdf/Element/Numeric.php';
  29. // require_once 'Zend/Pdf/Element/Object.php';
  30. // require_once 'Zend/Pdf/Element/Object/Stream.php';
  31. // require_once 'Zend/Pdf/Element/Reference.php';
  32. // require_once 'Zend/Pdf/Element/String.php';
  33. /**
  34. * PDF string parser
  35. *
  36. * @package Zend_Pdf
  37. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  38. * @license http://framework.zend.com/license/new-bsd New BSD License
  39. */
  40. class Zend_Pdf_StringParser
  41. {
  42. /**
  43. * Source PDF
  44. *
  45. * @var string
  46. */
  47. public $data = '';
  48. /**
  49. * Current position in a data
  50. *
  51. * @var integer
  52. */
  53. public $offset = 0;
  54. /**
  55. * Current reference context
  56. *
  57. * @var Zend_Pdf_Element_Reference_Context
  58. */
  59. private $_context = null;
  60. /**
  61. * Array of elements of the currently parsed object/trailer
  62. *
  63. * @var array
  64. */
  65. private $_elements = array();
  66. /**
  67. * PDF objects factory.
  68. *
  69. * @var Zend_Pdf_ElementFactory_Interface
  70. */
  71. private $_objFactory = null;
  72. /**
  73. * Clean up resources.
  74. *
  75. * Clear current state to remove cyclic object references
  76. */
  77. public function cleanUp()
  78. {
  79. $this->_context = null;
  80. $this->_elements = array();
  81. $this->_objFactory = null;
  82. }
  83. /**
  84. * Character with code $chCode is white space
  85. *
  86. * @param integer $chCode
  87. * @return boolean
  88. */
  89. public static function isWhiteSpace($chCode)
  90. {
  91. if ($chCode == 0x00 || // null character
  92. $chCode == 0x09 || // Tab
  93. $chCode == 0x0A || // Line feed
  94. $chCode == 0x0C || // Form Feed
  95. $chCode == 0x0D || // Carriage return
  96. $chCode == 0x20 // Space
  97. ) {
  98. return true;
  99. } else {
  100. return false;
  101. }
  102. }
  103. /**
  104. * Character with code $chCode is a delimiter character
  105. *
  106. * @param integer $chCode
  107. * @return boolean
  108. */
  109. public static function isDelimiter($chCode )
  110. {
  111. if ($chCode == 0x28 || // '('
  112. $chCode == 0x29 || // ')'
  113. $chCode == 0x3C || // '<'
  114. $chCode == 0x3E || // '>'
  115. $chCode == 0x5B || // '['
  116. $chCode == 0x5D || // ']'
  117. $chCode == 0x7B || // '{'
  118. $chCode == 0x7D || // '}'
  119. $chCode == 0x2F || // '/'
  120. $chCode == 0x25 // '%'
  121. ) {
  122. return true;
  123. } else {
  124. return false;
  125. }
  126. }
  127. /**
  128. * Skip white space
  129. *
  130. * @param boolean $skipComment
  131. */
  132. public function skipWhiteSpace($skipComment = true)
  133. {
  134. if ($skipComment) {
  135. while (true) {
  136. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  137. if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
  138. // Skip comment
  139. $this->offset += strcspn($this->data, "\r\n", $this->offset);
  140. } else {
  141. // Non white space character not equal to '%' is found
  142. return;
  143. }
  144. }
  145. } else {
  146. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  147. }
  148. // /** Original (non-optimized) implementation. */
  149. //
  150. // while ($this->offset < strlen($this->data)) {
  151. // if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
  152. // $this->offset++;
  153. // } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
  154. // $this->skipComment();
  155. // } else {
  156. // return;
  157. // }
  158. // }
  159. }
  160. /**
  161. * Skip comment
  162. */
  163. public function skipComment()
  164. {
  165. while ($this->offset < strlen($this->data))
  166. {
  167. if (ord($this->data[$this->offset]) != 0x0A || // Line feed
  168. ord($this->data[$this->offset]) != 0x0d // Carriage return
  169. ) {
  170. $this->offset++;
  171. } else {
  172. return;
  173. }
  174. }
  175. }
  176. /**
  177. * Read comment line
  178. *
  179. * @return string
  180. */
  181. public function readComment()
  182. {
  183. $this->skipWhiteSpace(false);
  184. /** Check if it's a comment line */
  185. if ($this->data[$this->offset] != '%') {
  186. return '';
  187. }
  188. for ($start = $this->offset;
  189. $this->offset < strlen($this->data);
  190. $this->offset++) {
  191. if (ord($this->data[$this->offset]) == 0x0A || // Line feed
  192. ord($this->data[$this->offset]) == 0x0d // Carriage return
  193. ) {
  194. break;
  195. }
  196. }
  197. return substr($this->data, $start, $this->offset-$start);
  198. }
  199. /**
  200. * Returns next lexeme from a pdf stream
  201. *
  202. * @return string
  203. */
  204. public function readLexeme()
  205. {
  206. // $this->skipWhiteSpace();
  207. while (true) {
  208. $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
  209. if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') {
  210. $this->offset += strcspn($this->data, "\r\n", $this->offset);
  211. } else {
  212. break;
  213. }
  214. }
  215. if ($this->offset >= strlen($this->data)) {
  216. return '';
  217. }
  218. if ( /* self::isDelimiter( ord($this->data[$start]) ) */
  219. strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
  220. switch (substr($this->data, $this->offset, 2)) {
  221. case '<<':
  222. $this->offset += 2;
  223. return '<<';
  224. break;
  225. case '>>':
  226. $this->offset += 2;
  227. return '>>';
  228. break;
  229. default:
  230. return $this->data[$this->offset++];
  231. break;
  232. }
  233. } else {
  234. $start = $this->offset;
  235. $compare = '';
  236. if( version_compare( phpversion(), '5.2.5' ) >= 0) {
  237. $compare = "()<>[]{}/%\x00\t\n\f\r ";
  238. } else {
  239. $compare = "()<>[]{}/%\x00\t\n\r ";
  240. }
  241. $this->offset += strcspn($this->data, $compare, $this->offset);
  242. return substr($this->data, $start, $this->offset - $start);
  243. }
  244. }
  245. /**
  246. * Read elemental object from a PDF stream
  247. *
  248. * @return Zend_Pdf_Element
  249. * @throws Zend_Pdf_Exception
  250. */
  251. public function readElement($nextLexeme = null)
  252. {
  253. if ($nextLexeme === null) {
  254. $nextLexeme = $this->readLexeme();
  255. }
  256. /**
  257. * Note: readElement() method is a public method and could be invoked from other classes.
  258. * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
  259. * about _elements member management.
  260. */
  261. switch ($nextLexeme) {
  262. case '(':
  263. return ($this->_elements[] = $this->_readString());
  264. case '<':
  265. return ($this->_elements[] = $this->_readBinaryString());
  266. case '/':
  267. return ($this->_elements[] = new Zend_Pdf_Element_Name(
  268. Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
  269. ));
  270. case '[':
  271. return ($this->_elements[] = $this->_readArray());
  272. case '<<':
  273. return ($this->_elements[] = $this->_readDictionary());
  274. case ')':
  275. // fall through to next case
  276. case '>':
  277. // fall through to next case
  278. case ']':
  279. // fall through to next case
  280. case '>>':
  281. // fall through to next case
  282. case '{':
  283. // fall through to next case
  284. case '}':
  285. // require_once 'Zend/Pdf/Exception.php';
  286. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
  287. $this->offset));
  288. default:
  289. if (strcasecmp($nextLexeme, 'true') == 0) {
  290. return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
  291. } else if (strcasecmp($nextLexeme, 'false') == 0) {
  292. return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
  293. } else if (strcasecmp($nextLexeme, 'null') == 0) {
  294. return ($this->_elements[] = new Zend_Pdf_Element_Null());
  295. }
  296. $ref = $this->_readReference($nextLexeme);
  297. if ($ref !== null) {
  298. return ($this->_elements[] = $ref);
  299. }
  300. return ($this->_elements[] = $this->_readNumeric($nextLexeme));
  301. }
  302. }
  303. /**
  304. * Read string PDF object
  305. * Also reads trailing ')' from a pdf stream
  306. *
  307. * @return Zend_Pdf_Element_String
  308. * @throws Zend_Pdf_Exception
  309. */
  310. private function _readString()
  311. {
  312. $start = $this->offset;
  313. $openedBrackets = 1;
  314. $this->offset += strcspn($this->data, '()\\', $this->offset);
  315. while ($this->offset < strlen($this->data)) {
  316. switch (ord( $this->data[$this->offset] )) {
  317. case 0x28: // '(' - opened bracket in the string, needs balanced pair.
  318. $this->offset++;
  319. $openedBrackets++;
  320. break;
  321. case 0x29: // ')' - pair to the opened bracket
  322. $this->offset++;
  323. $openedBrackets--;
  324. break;
  325. case 0x5C: // '\\' - escape sequence, skip next char from a check
  326. $this->offset += 2;
  327. }
  328. if ($openedBrackets == 0) {
  329. break; // end of string
  330. }
  331. $this->offset += strcspn($this->data, '()\\', $this->offset);
  332. }
  333. if ($openedBrackets != 0) {
  334. // require_once 'Zend/Pdf/Exception.php';
  335. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
  336. }
  337. return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
  338. $start,
  339. $this->offset - $start - 1) ));
  340. }
  341. /**
  342. * Read binary string PDF object
  343. * Also reads trailing '>' from a pdf stream
  344. *
  345. * @return Zend_Pdf_Element_String_Binary
  346. * @throws Zend_Pdf_Exception
  347. */
  348. private function _readBinaryString()
  349. {
  350. $start = $this->offset;
  351. $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
  352. if ($this->offset >= strlen($this->data) - 1) {
  353. // require_once 'Zend/Pdf/Exception.php';
  354. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
  355. }
  356. if ($this->data[$this->offset++] != '>') {
  357. // require_once 'Zend/Pdf/Exception.php';
  358. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
  359. }
  360. return new Zend_Pdf_Element_String_Binary(
  361. Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
  362. $start,
  363. $this->offset - $start - 1) ));
  364. }
  365. /**
  366. * Read array PDF object
  367. * Also reads trailing ']' from a pdf stream
  368. *
  369. * @return Zend_Pdf_Element_Array
  370. * @throws Zend_Pdf_Exception
  371. */
  372. private function _readArray()
  373. {
  374. $elements = array();
  375. while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
  376. if ($nextLexeme != ']') {
  377. $elements[] = $this->readElement($nextLexeme);
  378. } else {
  379. return new Zend_Pdf_Element_Array($elements);
  380. }
  381. }
  382. // require_once 'Zend/Pdf/Exception.php';
  383. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
  384. }
  385. /**
  386. * Read dictionary PDF object
  387. * Also reads trailing '>>' from a pdf stream
  388. *
  389. * @return Zend_Pdf_Element_Dictionary
  390. * @throws Zend_Pdf_Exception
  391. */
  392. private function _readDictionary()
  393. {
  394. $dictionary = new Zend_Pdf_Element_Dictionary();
  395. while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
  396. if ($nextLexeme != '>>') {
  397. $nameStart = $this->offset - strlen($nextLexeme);
  398. $name = $this->readElement($nextLexeme);
  399. $value = $this->readElement();
  400. if (!$name instanceof Zend_Pdf_Element_Name) {
  401. // require_once 'Zend/Pdf/Exception.php';
  402. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
  403. }
  404. $dictionary->add($name, $value);
  405. } else {
  406. return $dictionary;
  407. }
  408. }
  409. // require_once 'Zend/Pdf/Exception.php';
  410. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
  411. }
  412. /**
  413. * Read reference PDF object
  414. *
  415. * @param string $nextLexeme
  416. * @return Zend_Pdf_Element_Reference
  417. */
  418. private function _readReference($nextLexeme = null)
  419. {
  420. $start = $this->offset;
  421. if ($nextLexeme === null) {
  422. $objNum = $this->readLexeme();
  423. } else {
  424. $objNum = $nextLexeme;
  425. }
  426. if (!ctype_digit($objNum)) { // it's not a reference
  427. $this->offset = $start;
  428. return null;
  429. }
  430. $genNum = $this->readLexeme();
  431. if (!ctype_digit($genNum)) { // it's not a reference
  432. $this->offset = $start;
  433. return null;
  434. }
  435. $rMark = $this->readLexeme();
  436. if ($rMark != 'R') { // it's not a reference
  437. $this->offset = $start;
  438. return null;
  439. }
  440. $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
  441. return $ref;
  442. }
  443. /**
  444. * Read numeric PDF object
  445. *
  446. * @param string $nextLexeme
  447. * @return Zend_Pdf_Element_Numeric
  448. */
  449. private function _readNumeric($nextLexeme = null)
  450. {
  451. if ($nextLexeme === null) {
  452. $nextLexeme = $this->readLexeme();
  453. }
  454. return new Zend_Pdf_Element_Numeric($nextLexeme);
  455. }
  456. /**
  457. * Read inderect object from a PDF stream
  458. *
  459. * @param integer $offset
  460. * @param Zend_Pdf_Element_Reference_Context $context
  461. * @return Zend_Pdf_Element_Object
  462. */
  463. public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
  464. {
  465. if ($offset === null ) {
  466. return new Zend_Pdf_Element_Null();
  467. }
  468. // Save current offset to make getObject() reentrant
  469. $offsetSave = $this->offset;
  470. $this->offset = $offset;
  471. $this->_context = $context;
  472. $this->_elements = array();
  473. $objNum = $this->readLexeme();
  474. if (!ctype_digit($objNum)) {
  475. // require_once 'Zend/Pdf/Exception.php';
  476. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
  477. }
  478. $genNum = $this->readLexeme();
  479. if (!ctype_digit($genNum)) {
  480. // require_once 'Zend/Pdf/Exception.php';
  481. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
  482. }
  483. $objKeyword = $this->readLexeme();
  484. if ($objKeyword != 'obj') {
  485. // require_once 'Zend/Pdf/Exception.php';
  486. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
  487. }
  488. $objValue = $this->readElement();
  489. $nextLexeme = $this->readLexeme();
  490. if( $nextLexeme == 'endobj' ) {
  491. /**
  492. * Object is not generated by factory (thus it's not marked as modified object).
  493. * But factory is assigned to the obect.
  494. */
  495. $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
  496. foreach ($this->_elements as $element) {
  497. $element->setParentObject($obj);
  498. }
  499. // Restore offset value
  500. $this->offset = $offsetSave;
  501. return $obj;
  502. }
  503. /**
  504. * It's a stream object
  505. */
  506. if ($nextLexeme != 'stream') {
  507. // require_once 'Zend/Pdf/Exception.php';
  508. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
  509. }
  510. if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
  511. // require_once 'Zend/Pdf/Exception.php';
  512. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
  513. }
  514. /**
  515. * References are automatically dereferenced at this moment.
  516. */
  517. $streamLength = $objValue->Length->value;
  518. /**
  519. * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
  520. * This restriction gives the possibility to recognize all cases exactly
  521. */
  522. if ($this->data[$this->offset] == "\r" &&
  523. $this->data[$this->offset + 1] == "\n" ) {
  524. $this->offset += 2;
  525. } else if ($this->data[$this->offset] == "\n" ) {
  526. $this->offset++;
  527. } else {
  528. // require_once 'Zend/Pdf/Exception.php';
  529. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
  530. }
  531. $dataOffset = $this->offset;
  532. $this->offset += $streamLength;
  533. $nextLexeme = $this->readLexeme();
  534. if ($nextLexeme != 'endstream') {
  535. // require_once 'Zend/Pdf/Exception.php';
  536. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
  537. }
  538. $nextLexeme = $this->readLexeme();
  539. if ($nextLexeme != 'endobj') {
  540. // require_once 'Zend/Pdf/Exception.php';
  541. throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
  542. }
  543. $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
  544. $dataOffset,
  545. $streamLength),
  546. (int)$objNum,
  547. (int)$genNum,
  548. $this->_objFactory->resolve(),
  549. $objValue);
  550. foreach ($this->_elements as $element) {
  551. $element->setParentObject($obj);
  552. }
  553. // Restore offset value
  554. $this->offset = $offsetSave;
  555. return $obj;
  556. }
  557. /**
  558. * Get length of source string
  559. *
  560. * @return integer
  561. */
  562. public function getLength()
  563. {
  564. return strlen($this->data);
  565. }
  566. /**
  567. * Get source string
  568. *
  569. * @return string
  570. */
  571. public function getString()
  572. {
  573. return $this->data;
  574. }
  575. /**
  576. * Parse integer value from a binary stream
  577. *
  578. * @param string $stream
  579. * @param integer $offset
  580. * @param integer $size
  581. * @return integer
  582. */
  583. public static function parseIntFromStream($stream, $offset, $size)
  584. {
  585. $value = 0;
  586. for ($count = 0; $count < $size; $count++) {
  587. $value *= 256;
  588. $value += ord($stream[$offset + $count]);
  589. }
  590. return $value;
  591. }
  592. /**
  593. * Set current context
  594. *
  595. * @param Zend_Pdf_Element_Reference_Context $context
  596. */
  597. public function setContext(Zend_Pdf_Element_Reference_Context $context)
  598. {
  599. $this->_context = $context;
  600. }
  601. /**
  602. * Object constructor
  603. *
  604. * Note: PHP duplicates string, which is sent by value, only of it's updated.
  605. * Thus we don't need to care about overhead
  606. *
  607. * @param string $pdfString
  608. * @param Zend_Pdf_ElementFactory_Interface $factory
  609. */
  610. public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
  611. {
  612. $this->data = $source;
  613. $this->_objFactory = $factory;
  614. }
  615. }