fpdi =& $fpdi; $this->filename = $filename; parent::pdf_parser($filename); // Get Info $this->getInfo(); // resolve Pages-Dictonary $pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']); // Read pages $this->read_pages($this->c, $pages, $this->pages); // count pages; $this->page_count = count($this->pages); } /** * Overwrite parent::error() * * @param string $msg Error-Message */ function error($msg) { $this->fpdi->error($msg); } /** * Get pagecount from sourcefile * * @return int */ function getPageCount() { return $this->page_count; } /** * Set pageno * * @param int $pageno Pagenumber to use */ function setPageno($pageno) { $pageno-=1; if ($pageno < 0 || $pageno >= $this->getPageCount()) { $this->fpdi->error("Pagenumber is wrong!"); } $this->pageno = $pageno; } /** * Get page-resources from current page * * @return array */ function getPageResources() { return $this->_getPageResources($this->pages[$this->pageno]); } /** * Get page-resources from /Page * * @param array $obj Array of pdf-data */ function _getPageResources ($obj) { // $obj = /Page $obj = $this->pdf_resolve_object($this->c, $obj); // If the current object has a resources // dictionary associated with it, we use // it. Otherwise, we move back to its // parent object. if (isset ($obj[1][1]['/Resources'])) { $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']); if ($res[0] == PDF_TYPE_OBJECT) return $res[1]; return $res; } else { if (!isset ($obj[1][1]['/Parent'])) { return false; } else { $res = $this->_getPageResources($obj[1][1]['/Parent']); if ($res[0] == PDF_TYPE_OBJECT) return $res[1]; return $res; } } } function getInfo() { $avail_infos = array("Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped"); $_infos = $this->pdf_resolve_object($this->c,$this->xref['trailer'][1]['/Info']); $infos = array(); foreach ($avail_infos AS $info) { if (isset($_infos[1][1]["/".$info])) { if ($_infos[1][1]["/".$info][0] == PDF_TYPE_STRING) { $infos[$info] = $this->deescapeString($_infos[1][1]["/".$info][1]); } else if ($_infos[1][1]["/".$info][0] == PDF_TYPE_HEX) { $infos[$info] = $this->hex2String($_infos[1][1]["/".$info][1]); } } } $this->infos = $infos; } /** * Rebuilds a hexstring to string * * @param string $hex hexstring * @return string */ function hex2String($hex) { $endian = false; if (preg_match("/^FEFF/",$hex)) { // is utf-16 aka big endian $i = 4; $endian = "big"; } else if (preg_match("/^FFFE/",$hex)) { // is utf-16 aka little endian $i = 4; $endian = "little"; } else { $i = 0; } $s = ""; $l = strlen($hex); for (; $i < $l; $i+=2) { if (!$endian) { $s .= chr(hexdec($hex[$i].(isset($hex[$i+1]) ? $hex[$i+1] : '0'))); } else { if ($endian == "big") { $_c = $hex[$i].$hex[$i+1]; $i+=2; $c = $hex[$i].$hex[$i+1]; if ($_c != "00") { $s .= "?"; continue; } else { $s .= chr(hexdec($c)); continue; } } else if ($endian == "little") { $c = $hex[$i].$hex[$i+1]; $i+=2; $_c = $hex[$i].$hex[$i+1]; if ($_c != "00") { $s .= "?"; continue; } else { $s .= chr(hexdec($c)); continue; } } } } return $s; } function deescapeString($s) { $torepl = array("/\\\(\d{1,3})/e" => "chr(octdec(\\1))", "/\\\\\(/" => "(", "/\\\\\)/" => ")"); return preg_replace(array_keys($torepl),$torepl,$s); } /** * Get content of current page * * If more /Contents is an array, the streams are concated * * @return string */ function getContent() { $buffer = ""; $contents = $this->getPageContent($this->pages[$this->pageno][1][1]['/Contents']); foreach($contents AS $tmp_content) { $buffer .= $this->rebuildContentStream($tmp_content); } return $buffer; } /** * Resolve all content-objects * * @param array $content_ref * @return array */ function getPageContent($content_ref) { $contents = array(); if ($content_ref[0] == PDF_TYPE_OBJREF) { $content = $this->pdf_resolve_object($this->c, $content_ref); if ($content[1][0] == PDF_TYPE_ARRAY) { $contents = $this->getPageContent($content[1]); } else { $contents[] = $content; } } else if ($content_ref[0] == PDF_TYPE_ARRAY) { foreach ($content_ref[1] AS $tmp_content_ref) { $contents = array_merge($contents,$this->getPageContent($tmp_content_ref)); } } return $contents; } /** * Rebuild content-streams * only non-compressed streams and /FlateDecode are ready! * * @param array $obj * @return string */ function rebuildContentStream($obj) { $filters = array(); if (isset($obj[1][1]['/Filter'])) { $_filter = $obj[1][1]['/Filter']; if ($_filter[0] == PDF_TYPE_TOKEN) { $filters[] = $_filter; } else if ($_filter[0] == PDF_TYPE_ARRAY) { $filters = $_filter[1]; } } $stream = $obj[2][1]; foreach ($filters AS $_filter) { switch ($_filter[1]) { case "/FlateDecode": if (function_exists('gzuncompress')) { $stream = @gzuncompress($stream); } else { $this->fpdi->error(sprintf("To handle %s filter, please compile php with zlib support.",$_filter[1])); } if ($stream === false) { $this->fpdi->error("Error while decompressing string."); } break; case "/LZWDecode": @include_once("decoders/lzw.php"); if (class_exists("LZWDecode")) { $lzwdec = new LZWDecode($this->fpdi); $stream = $lzwdec->decode($stream); } else { $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1])); } break; case "/ASCII85Decode": @include_once("decoders/ascii85.php"); if (class_exists("ASCII85Decode")) { $ascii85 = new ASCII85Decode($this->fpdi); $stream = $ascii85->decode(trim($stream)); } else { $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1])); } break; case null: $stream = $stream; break; default: $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1])); } } return $stream; } /** * Get MediaBox * * gets an array that describes the size of a page. * * @param integer $pageno * @return array @see getPageBox() */ function getPageMediaBox($pageno) { return $this->getPageBox($this->pages[$pageno-1],"/MediaBox"); } /** * Get a Box from a page * Arrayformat is same as used by fpdf_tpl * * @param array $page a /Page * @param string $box_index Type of Box @see getPageBoxes() * @return array */ function getPageBox($page, $box_index) { $page = $this->pdf_resolve_object($this->c,$page); $box = null; if (isset($page[1][1][$box_index])) $box =& $page[1][1][$box_index]; if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) { $tmp_box = $this->pdf_resolve_object($this->c,$box); $box = $tmp_box[1]; } if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) { $b =& $box[1]; return array("x" => $b[0][1]/$this->fpdi->k, "y" => $b[1][1]/$this->fpdi->k, "w" => $b[2][1]/$this->fpdi->k, "h" => $b[3][1]/$this->fpdi->k); } else if (!isset ($page[1][1]['/Parent'])) { return false; } else { return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index); } } /** * Get all Boxes from /Page * * @param array a /Page * @return array */ function getPageBoxes($page) { $_boxes = array("/MediaBox","/CropBox","/BleedBox","/TrimBox","/ArtBox"); $boxes = array(); foreach($_boxes AS $box) { if ($_box = $this->getPageBox($page,$box)) { $boxes[$box] = $_box; } } return $boxes; } /** * Read all /Page(es) * * @param object pdf_context * @param array /Pages * @param array the result-array */ function read_pages (&$c, &$pages, &$result) { // Get the kids dictionary $kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']); if (!is_array($kids)) $this->fpdi->Error("Cannot find /Kids in current /Page-Dictionary"); foreach ($kids[1] as $v) { $pg = $this->pdf_resolve_object ($c, $v); #print_r($pg); if ($pg[1][1]['/Type'][1] === '/Pages') { // If one of the kids is an embedded // /Pages array, resolve it as well. $this->read_pages ($c, $pg, $result); } else { $result[] = $pg; } } } /** * Get PDF-Version * * And reset the PDF Version used in FPDI if needed */ function getPDFVersion() { parent::getPDFVersion(); if (isset($this->fpdi->importVersion) && $this->pdfVersion > $this->fpdi->importVersion) { $this->fpdi->importVersion = $this->pdfVersion; } } } ?>