407 lines
15 KiB

11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Pdf
  17. * @subpackage Fonts
  18. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Pdf_Cmap */
  23. // require_once 'Zend/Pdf/Cmap.php';
  24. /**
  25. * Implements the "segment mapping to delta values" character map (type 4).
  26. *
  27. * This is the Microsoft standard mapping table type for OpenType fonts. It
  28. * provides the ability to cover multiple contiguous ranges of the Unicode
  29. * character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF).
  30. *
  31. * @package Zend_Pdf
  32. * @subpackage Fonts
  33. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  34. * @license http://framework.zend.com/license/new-bsd New BSD License
  35. */
  36. class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap
  37. {
  38. /**** Instance Variables ****/
  39. /**
  40. * The number of segments in the table.
  41. * @var integer
  42. */
  43. protected $_segmentCount = 0;
  44. /**
  45. * The size of the binary search range for segments.
  46. * @var integer
  47. */
  48. protected $_searchRange = 0;
  49. /**
  50. * The number of binary search steps required to cover the entire search
  51. * range.
  52. * @var integer
  53. */
  54. protected $_searchIterations = 0;
  55. /**
  56. * Array of ending character codes for each segment.
  57. * @var array
  58. */
  59. protected $_segmentTableEndCodes = array();
  60. /**
  61. * The ending character code for the segment at the end of the low search
  62. * range.
  63. * @var integer
  64. */
  65. protected $_searchRangeEndCode = 0;
  66. /**
  67. * Array of starting character codes for each segment.
  68. * @var array
  69. */
  70. protected $_segmentTableStartCodes = array();
  71. /**
  72. * Array of character code to glyph delta values for each segment.
  73. * @var array
  74. */
  75. protected $_segmentTableIdDeltas = array();
  76. /**
  77. * Array of offsets into the glyph index array for each segment.
  78. * @var array
  79. */
  80. protected $_segmentTableIdRangeOffsets = array();
  81. /**
  82. * Glyph index array. Stores glyph numbers, used with range offset.
  83. * @var array
  84. */
  85. protected $_glyphIndexArray = array();
  86. /**** Public Interface ****/
  87. /* Concrete Class Implementation */
  88. /**
  89. * Returns an array of glyph numbers corresponding to the Unicode characters.
  90. *
  91. * If a particular character doesn't exist in this font, the special 'missing
  92. * character glyph' will be substituted.
  93. *
  94. * See also {@link glyphNumberForCharacter()}.
  95. *
  96. * @param array $characterCodes Array of Unicode character codes (code points).
  97. * @return array Array of glyph numbers.
  98. */
  99. public function glyphNumbersForCharacters($characterCodes)
  100. {
  101. $glyphNumbers = array();
  102. foreach ($characterCodes as $key => $characterCode) {
  103. /* These tables only cover the 16-bit character range.
  104. */
  105. if ($characterCode > 0xffff) {
  106. $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
  107. continue;
  108. }
  109. /* Determine where to start the binary search. The segments are
  110. * ordered from lowest-to-highest. We are looking for the first
  111. * segment whose end code is greater than or equal to our character
  112. * code.
  113. *
  114. * If the end code at the top of the search range is larger, then
  115. * our target is probably below it.
  116. *
  117. * If it is smaller, our target is probably above it, so move the
  118. * search range to the end of the segment list.
  119. */
  120. if ($this->_searchRangeEndCode >= $characterCode) {
  121. $searchIndex = $this->_searchRange;
  122. } else {
  123. $searchIndex = $this->_segmentCount;
  124. }
  125. /* Now do a binary search to find the first segment whose end code
  126. * is greater or equal to our character code. No matter the number
  127. * of segments (there may be hundreds in a large font), we will only
  128. * need to perform $this->_searchIterations.
  129. */
  130. for ($i = 1; $i <= $this->_searchIterations; $i++) {
  131. if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
  132. $subtableIndex = $searchIndex;
  133. $searchIndex -= $this->_searchRange >> $i;
  134. } else {
  135. $searchIndex += $this->_searchRange >> $i;
  136. }
  137. }
  138. /* If the segment's start code is greater than our character code,
  139. * that character is not represented in this font. Move on.
  140. */
  141. if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
  142. $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
  143. continue;
  144. }
  145. if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
  146. /* This segment uses a simple mapping from character code to
  147. * glyph number.
  148. */
  149. $glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
  150. } else {
  151. /* This segment relies on the glyph index array to determine the
  152. * glyph number. The calculation below determines the correct
  153. * index into that array. It's a little odd because the range
  154. * offset in the font file is designed to quickly provide an
  155. * address of the index in the raw binary data instead of the
  156. * index itself. Since we've parsed the data into arrays, we
  157. * must process it a bit differently.
  158. */
  159. $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
  160. $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
  161. $subtableIndex - 1);
  162. $glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex];
  163. }
  164. }
  165. return $glyphNumbers;
  166. }
  167. /**
  168. * Returns the glyph number corresponding to the Unicode character.
  169. *
  170. * If a particular character doesn't exist in this font, the special 'missing
  171. * character glyph' will be substituted.
  172. *
  173. * See also {@link glyphNumbersForCharacters()} which is optimized for bulk
  174. * operations.
  175. *
  176. * @param integer $characterCode Unicode character code (code point).
  177. * @return integer Glyph number.
  178. */
  179. public function glyphNumberForCharacter($characterCode)
  180. {
  181. /* This code is pretty much a copy of glyphNumbersForCharacters().
  182. * See that method for inline documentation.
  183. */
  184. if ($characterCode > 0xffff) {
  185. return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
  186. }
  187. if ($this->_searchRangeEndCode >= $characterCode) {
  188. $searchIndex = $this->_searchRange;
  189. } else {
  190. $searchIndex = $this->_segmentCount;
  191. }
  192. for ($i = 1; $i <= $this->_searchIterations; $i++) {
  193. if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
  194. $subtableIndex = $searchIndex;
  195. $searchIndex -= $this->_searchRange >> $i;
  196. } else {
  197. $searchIndex += $this->_searchRange >> $i;
  198. }
  199. }
  200. if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
  201. return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
  202. }
  203. if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
  204. $glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
  205. } else {
  206. $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
  207. $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
  208. $subtableIndex - 1);
  209. $glyphNumber = $this->_glyphIndexArray[$glyphIndex];
  210. }
  211. return $glyphNumber;
  212. }
  213. /**
  214. * Returns an array containing the Unicode characters that have entries in
  215. * this character map.
  216. *
  217. * @return array Unicode character codes.
  218. */
  219. public function getCoveredCharacters()
  220. {
  221. $characterCodes = array();
  222. for ($i = 1; $i <= $this->_segmentCount; $i++) {
  223. for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) {
  224. $characterCodes[] = $code;
  225. }
  226. }
  227. return $characterCodes;
  228. }
  229. /**
  230. * Returns an array containing the glyphs numbers that have entries in this character map.
  231. * Keys are Unicode character codes (integers)
  232. *
  233. * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters())
  234. * call, but this method do it in more effective way (prepare complete list instead of searching
  235. * glyph for each character code).
  236. *
  237. * @internal
  238. * @return array Array representing <Unicode character code> => <glyph number> pairs.
  239. */
  240. public function getCoveredCharactersGlyphs()
  241. {
  242. $glyphNumbers = array();
  243. for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) {
  244. if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) {
  245. $delta = $this->_segmentTableIdDeltas[$segmentNum];
  246. for ($code = $this->_segmentTableStartCodes[$segmentNum];
  247. $code <= $this->_segmentTableEndCodes[$segmentNum];
  248. $code++) {
  249. $glyphNumbers[$code] = ($code + $delta) % 65536;
  250. }
  251. } else {
  252. $code = $this->_segmentTableStartCodes[$segmentNum];
  253. $glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1;
  254. while ($code <= $this->_segmentTableEndCodes[$segmentNum]) {
  255. $glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex];
  256. $code++;
  257. $glyphIndex++;
  258. }
  259. }
  260. }
  261. return $glyphNumbers;
  262. }
  263. /* Object Lifecycle */
  264. /**
  265. * Object constructor
  266. *
  267. * Parses the raw binary table data. Throws an exception if the table is
  268. * malformed.
  269. *
  270. * @param string $cmapData Raw binary cmap table data.
  271. * @throws Zend_Pdf_Exception
  272. */
  273. public function __construct($cmapData)
  274. {
  275. /* Sanity check: The table should be at least 23 bytes in size.
  276. */
  277. $actualLength = strlen($cmapData);
  278. if ($actualLength < 23) {
  279. // require_once 'Zend/Pdf/Exception.php';
  280. throw new Zend_Pdf_Exception('Insufficient table data',
  281. Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL);
  282. }
  283. /* Sanity check: Make sure this is right data for this table type.
  284. */
  285. $type = $this->_extractUInt2($cmapData, 0);
  286. if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) {
  287. // require_once 'Zend/Pdf/Exception.php';
  288. throw new Zend_Pdf_Exception('Wrong cmap table type',
  289. Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE);
  290. }
  291. $length = $this->_extractUInt2($cmapData, 2);
  292. if ($length != $actualLength) {
  293. // require_once 'Zend/Pdf/Exception.php';
  294. throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)",
  295. Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH);
  296. }
  297. /* Mapping tables should be language-independent. The font may not work
  298. * as expected if they are not. Unfortunately, many font files in the
  299. * wild incorrectly record a language ID in this field, so we can't
  300. * call this a failure.
  301. */
  302. $language = $this->_extractUInt2($cmapData, 4);
  303. if ($language != 0) {
  304. // Record a warning here somehow?
  305. }
  306. /* These two values are stored premultiplied by two which is convienent
  307. * when using the binary data directly, but we're parsing it out to
  308. * native PHP data types, so divide by two.
  309. */
  310. $this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1;
  311. $this->_searchRange = $this->_extractUInt2($cmapData, 8) >> 1;
  312. $this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1;
  313. $offset = 14;
  314. for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
  315. $this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset);
  316. }
  317. $this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange];
  318. $offset += 2; // reserved bytes
  319. for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
  320. $this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset);
  321. }
  322. for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
  323. $this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset); // signed
  324. }
  325. /* The range offset helps determine the index into the glyph index array.
  326. * Like the segment count and search range above, it's stored as a byte
  327. * multiple in the font, so divide by two as we extract the values.
  328. */
  329. for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
  330. $this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1;
  331. }
  332. /* The size of the glyph index array varies by font and depends on the
  333. * extent of the usage of range offsets versus deltas. Some fonts may
  334. * not have any entries in this array.
  335. */
  336. for (; $offset < $length; $offset += 2) {
  337. $this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset);
  338. }
  339. /* Sanity check: After reading all of the data, we should be at the end
  340. * of the table.
  341. */
  342. if ($offset != $length) {
  343. // require_once 'Zend/Pdf/Exception.php';
  344. throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)",
  345. Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH);
  346. }
  347. }
  348. }