You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

485 lines
16 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @subpackage FileParser
  18. * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * Abstract utility class for parsing binary files.
  24. *
  25. * Provides a library of methods to quickly navigate and extract various data
  26. * types (signed and unsigned integers, floating- and fixed-point numbers,
  27. * strings, etc.) from the file.
  28. *
  29. * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object.
  30. * This allows the same parser code to work with many different data sources:
  31. * in-memory objects, filesystem files, etc.
  32. *
  33. * @package Zend_Pdf
  34. * @subpackage FileParser
  35. * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
  36. * @license http://framework.zend.com/license/new-bsd New BSD License
  37. */
  38. abstract class Zend_Pdf_FileParser
  39. {
  40. /**** Class Constants ****/
  41. /**
  42. * Little-endian byte order (0x04 0x03 0x02 0x01).
  43. */
  44. const BYTE_ORDER_LITTLE_ENDIAN = 0;
  45. /**
  46. * Big-endian byte order (0x01 0x02 0x03 0x04).
  47. */
  48. const BYTE_ORDER_BIG_ENDIAN = 1;
  49. /**** Instance Variables ****/
  50. /**
  51. * Flag indicating that the file has passed a cursory validation check.
  52. * @var boolean
  53. */
  54. protected $_isScreened = false;
  55. /**
  56. * Flag indicating that the file has been sucessfully parsed.
  57. * @var boolean
  58. */
  59. protected $_isParsed = false;
  60. /**
  61. * Object representing the data source to be parsed.
  62. * @var Zend_Pdf_FileParserDataSource
  63. */
  64. protected $_dataSource = null;
  65. /**** Public Interface ****/
  66. /* Abstract Methods */
  67. /**
  68. * Performs a cursory check to verify that the binary file is in the expected
  69. * format. Intended to quickly weed out obviously bogus files.
  70. *
  71. * Must set $this->_isScreened to true if successful.
  72. *
  73. * @throws Zend_Pdf_Exception
  74. */
  75. abstract public function screen();
  76. /**
  77. * Reads and parses the complete binary file.
  78. *
  79. * Must set $this->_isParsed to true if successful.
  80. *
  81. * @throws Zend_Pdf_Exception
  82. */
  83. abstract public function parse();
  84. /* Object Lifecycle */
  85. /**
  86. * Object constructor.
  87. *
  88. * Verifies that the data source has been properly initialized.
  89. *
  90. * @param Zend_Pdf_FileParserDataSource $dataSource
  91. * @throws Zend_Pdf_Exception
  92. */
  93. public function __construct(Zend_Pdf_FileParserDataSource $dataSource)
  94. {
  95. if ($dataSource->getSize() == 0) {
  96. // require_once 'Zend/Pdf/Exception.php';
  97. throw new Zend_Pdf_Exception('The data source has not been properly initialized',
  98. Zend_Pdf_Exception::BAD_DATA_SOURCE);
  99. }
  100. $this->_dataSource = $dataSource;
  101. }
  102. /**
  103. * Object destructor.
  104. *
  105. * Discards the data source object.
  106. */
  107. public function __destruct()
  108. {
  109. $this->_dataSource = null;
  110. }
  111. /* Accessors */
  112. /**
  113. * Returns true if the file has passed a cursory validation check.
  114. *
  115. * @return boolean
  116. */
  117. public function isScreened()
  118. {
  119. return $this->_isScreened;
  120. }
  121. /**
  122. * Returns true if the file has been successfully parsed.
  123. *
  124. * @return boolean
  125. */
  126. public function isParsed()
  127. {
  128. return $this->_isParsed;
  129. }
  130. /**
  131. * Returns the data source object representing the file being parsed.
  132. *
  133. * @return Zend_Pdf_FileParserDataSource
  134. */
  135. public function getDataSource()
  136. {
  137. return $this->_dataSource;
  138. }
  139. /* Primitive Methods */
  140. /**
  141. * Convenience wrapper for the data source object's moveToOffset() method.
  142. *
  143. * @param integer $offset Destination byte offset.
  144. * @throws Zend_Pdf_Exception
  145. */
  146. public function moveToOffset($offset)
  147. {
  148. $this->_dataSource->moveToOffset($offset);
  149. }
  150. public function getOffset() {
  151. return $this->_dataSource->getOffset();
  152. }
  153. public function getSize() {
  154. return $this->_dataSource->getSize();
  155. }
  156. /**
  157. * Convenience wrapper for the data source object's readBytes() method.
  158. *
  159. * @param integer $byteCount Number of bytes to read.
  160. * @return string
  161. * @throws Zend_Pdf_Exception
  162. */
  163. public function readBytes($byteCount)
  164. {
  165. return $this->_dataSource->readBytes($byteCount);
  166. }
  167. /**
  168. * Convenience wrapper for the data source object's skipBytes() method.
  169. *
  170. * @param integer $byteCount Number of bytes to skip.
  171. * @throws Zend_Pdf_Exception
  172. */
  173. public function skipBytes($byteCount)
  174. {
  175. $this->_dataSource->skipBytes($byteCount);
  176. }
  177. /* Parser Methods */
  178. /**
  179. * Reads the signed integer value from the binary file at the current byte
  180. * offset.
  181. *
  182. * Advances the offset by the number of bytes read. Throws an exception if
  183. * an error occurs.
  184. *
  185. * @param integer $size Size of integer in bytes: 1-4
  186. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  187. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  188. * If omitted, uses big-endian.
  189. * @return integer
  190. * @throws Zend_Pdf_Exception
  191. */
  192. public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  193. {
  194. if (($size < 1) || ($size > 4)) {
  195. // require_once 'Zend/Pdf/Exception.php';
  196. throw new Zend_Pdf_Exception("Invalid signed integer size: $size",
  197. Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
  198. }
  199. $bytes = $this->_dataSource->readBytes($size);
  200. /* unpack() will not work for this method because it always works in
  201. * the host byte order for signed integers. It also does not allow for
  202. * variable integer sizes.
  203. */
  204. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  205. $number = ord($bytes[0]);
  206. if (($number & 0x80) == 0x80) {
  207. /* This number is negative. Extract the positive equivalent.
  208. */
  209. $number = (~ $number) & 0xff;
  210. for ($i = 1; $i < $size; $i++) {
  211. $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff);
  212. }
  213. /* Now turn this back into a negative number by taking the
  214. * two's complement (we didn't add one above so won't
  215. * subtract it below). This works reliably on both 32- and
  216. * 64-bit systems.
  217. */
  218. $number = ~$number;
  219. } else {
  220. for ($i = 1; $i < $size; $i++) {
  221. $number = ($number << 8) | ord($bytes[$i]);
  222. }
  223. }
  224. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  225. $number = ord($bytes[$size - 1]);
  226. if (($number & 0x80) == 0x80) {
  227. /* Negative number. See discussion above.
  228. */
  229. $number = 0;
  230. for ($i = --$size; $i >= 0; $i--) {
  231. $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8);
  232. }
  233. $number = ~$number;
  234. } else {
  235. $number = 0;
  236. for ($i = --$size; $i >= 0; $i--) {
  237. $number |= ord($bytes[$i]) << ($i * 8);
  238. }
  239. }
  240. } else {
  241. // require_once 'Zend/Pdf/Exception.php';
  242. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  243. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  244. }
  245. return $number;
  246. }
  247. /**
  248. * Reads the unsigned integer value from the binary file at the current byte
  249. * offset.
  250. *
  251. * Advances the offset by the number of bytes read. Throws an exception if
  252. * an error occurs.
  253. *
  254. * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the
  255. * resulting value WILL BE SIGNED because PHP uses signed integers internally
  256. * for everything. To guarantee portability, be sure to use bitwise operators
  257. * operators on large unsigned integers!
  258. *
  259. * @param integer $size Size of integer in bytes: 1-4
  260. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  261. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  262. * If omitted, uses big-endian.
  263. * @return integer
  264. * @throws Zend_Pdf_Exception
  265. */
  266. public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  267. {
  268. if (($size < 1) || ($size > 4)) {
  269. // require_once 'Zend/Pdf/Exception.php';
  270. throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size",
  271. Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
  272. }
  273. $bytes = $this->_dataSource->readBytes($size);
  274. /* unpack() is a bit heavyweight for this simple conversion. Just
  275. * work the bytes directly.
  276. */
  277. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  278. $number = ord($bytes[0]);
  279. for ($i = 1; $i < $size; $i++) {
  280. $number = ($number << 8) | ord($bytes[$i]);
  281. }
  282. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  283. $number = 0;
  284. for ($i = --$size; $i >= 0; $i--) {
  285. $number |= ord($bytes[$i]) << ($i * 8);
  286. }
  287. } else {
  288. // require_once 'Zend/Pdf/Exception.php';
  289. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  290. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  291. }
  292. return $number;
  293. }
  294. /**
  295. * Returns true if the specified bit is set in the integer bitfield.
  296. *
  297. * @param integer $bit Bit number to test (i.e. - 0-31)
  298. * @param integer $bitField
  299. * @return boolean
  300. */
  301. public function isBitSet($bit, $bitField)
  302. {
  303. $bitMask = 1 << $bit;
  304. $isSet = (($bitField & $bitMask) == $bitMask);
  305. return $isSet;
  306. }
  307. /**
  308. * Reads the signed fixed-point number from the binary file at the current
  309. * byte offset.
  310. *
  311. * Common fixed-point sizes are 2.14 and 16.16.
  312. *
  313. * Advances the offset by the number of bytes read. Throws an exception if
  314. * an error occurs.
  315. *
  316. * @param integer $mantissaBits Number of bits in the mantissa
  317. * @param integer $fractionBits Number of bits in the fraction
  318. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  319. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  320. * If omitted, uses big-endian.
  321. * @return float
  322. * @throws Zend_Pdf_Exception
  323. */
  324. public function readFixed($mantissaBits, $fractionBits,
  325. $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
  326. {
  327. $bitsToRead = $mantissaBits + $fractionBits;
  328. if (($bitsToRead % 8) !== 0) {
  329. // require_once 'Zend/Pdf/Exception.php';
  330. throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes',
  331. Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE);
  332. }
  333. $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits);
  334. return $number;
  335. }
  336. /**
  337. * Reads the Unicode UTF-16-encoded string from the binary file at the
  338. * current byte offset.
  339. *
  340. * The byte order of the UTF-16 string must be specified. You must also
  341. * supply the desired resulting character set.
  342. *
  343. * Advances the offset by the number of bytes read. Throws an exception if
  344. * an error occurs.
  345. *
  346. * @todo Consider changing $byteCount to a character count. They are not
  347. * always equivalent (in the case of surrogates).
  348. * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the
  349. * string being extracted.
  350. *
  351. * @param integer $byteCount Number of bytes (characters * 2) to return.
  352. * @param integer $byteOrder (optional) Big- or little-endian byte order.
  353. * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
  354. * If omitted, uses big-endian.
  355. * @param string $characterSet (optional) Desired resulting character set.
  356. * You may use any character set supported by {@link iconv()}. If omitted,
  357. * uses 'current locale'.
  358. * @return string
  359. * @throws Zend_Pdf_Exception
  360. */
  361. public function readStringUTF16($byteCount,
  362. $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN,
  363. $characterSet = '')
  364. {
  365. if ($byteCount == 0) {
  366. return '';
  367. }
  368. $bytes = $this->_dataSource->readBytes($byteCount);
  369. if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
  370. if ($characterSet == 'UTF-16BE') {
  371. return $bytes;
  372. }
  373. return iconv('UTF-16BE', $characterSet, $bytes);
  374. } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
  375. if ($characterSet == 'UTF-16LE') {
  376. return $bytes;
  377. }
  378. return iconv('UTF-16LE', $characterSet, $bytes);
  379. } else {
  380. // require_once 'Zend/Pdf/Exception.php';
  381. throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
  382. Zend_Pdf_Exception::INVALID_BYTE_ORDER);
  383. }
  384. }
  385. /**
  386. * Reads the Mac Roman-encoded string from the binary file at the current
  387. * byte offset.
  388. *
  389. * You must supply the desired resulting character set.
  390. *
  391. * Advances the offset by the number of bytes read. Throws an exception if
  392. * an error occurs.
  393. *
  394. * @param integer $byteCount Number of bytes (characters) to return.
  395. * @param string $characterSet (optional) Desired resulting character set.
  396. * You may use any character set supported by {@link iconv()}. If omitted,
  397. * uses 'current locale'.
  398. * @return string
  399. * @throws Zend_Pdf_Exception
  400. */
  401. public function readStringMacRoman($byteCount, $characterSet = '')
  402. {
  403. if ($byteCount == 0) {
  404. return '';
  405. }
  406. $bytes = $this->_dataSource->readBytes($byteCount);
  407. if ($characterSet == 'MacRoman') {
  408. return $bytes;
  409. }
  410. return iconv('MacRoman', $characterSet, $bytes);
  411. }
  412. /**
  413. * Reads the Pascal string from the binary file at the current byte offset.
  414. *
  415. * The length of the Pascal string is determined by reading the length bytes
  416. * which preceed the character data. You must supply the desired resulting
  417. * character set.
  418. *
  419. * Advances the offset by the number of bytes read. Throws an exception if
  420. * an error occurs.
  421. *
  422. * @param string $characterSet (optional) Desired resulting character set.
  423. * You may use any character set supported by {@link iconv()}. If omitted,
  424. * uses 'current locale'.
  425. * @param integer $lengthBytes (optional) Number of bytes that make up the
  426. * length. Default is 1.
  427. * @return string
  428. * @throws Zend_Pdf_Exception
  429. */
  430. public function readStringPascal($characterSet = '', $lengthBytes = 1)
  431. {
  432. $byteCount = $this->readUInt($lengthBytes);
  433. if ($byteCount == 0) {
  434. return '';
  435. }
  436. $bytes = $this->_dataSource->readBytes($byteCount);
  437. if ($characterSet == 'ASCII') {
  438. return $bytes;
  439. }
  440. return iconv('ASCII', $characterSet, $bytes);
  441. }
  442. }