123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777 |
- <?php
- /* Copyright (C) 2011-2012 Laurent Destailleur <eldy@users.sourceforge.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- /**
- * \file htdocs/core/class/rssparser.class.php
- * \ingroup core
- * \brief File of class to parse RSS feeds
- */
- /**
- * Class to parse RSS files
- */
- class RssParser
- {
- var $db;
- var $error;
- private $_format='';
- private $_urlRSS;
- private $_language;
- private $_generator;
- private $_copyright;
- private $_lastbuilddate;
- private $_imageurl;
- private $_link;
- private $_title;
- private $_description;
- private $_lastfetchdate; // Last successful fetch
- private $_rssarray=array();
- // For parsing with xmlparser
- var $stack = array(); // parser stack
- var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright');
- /**
- * Constructor
- *
- * @param DoliDB $db Database handler
- */
- public function __construct($db)
- {
- $this->db=$db;
- }
- /**
- * getFormat
- *
- * @return string
- */
- public function getFormat()
- {
- return $this->_format;
- }
- /**
- * getUrlRss
- *
- * @return string
- */
- public function getUrlRss()
- {
- return $this->_urlRSS;
- }
- /**
- * getLanguage
- *
- * @return string
- */
- public function getLanguage()
- {
- return $this->_language;
- }
- /**
- * getGenerator
- *
- * @return string
- */
- public function getGenerator()
- {
- return $this->_generator;
- }
- /**
- * getCopyright
- *
- * @return string
- */
- public function getCopyright()
- {
- return $this->_copyright;
- }
- /**
- * getLastBuildDate
- *
- * @return string
- */
- public function getLastBuildDate()
- {
- return $this->_lastbuilddate;
- }
- /**
- * getImageUrl
- *
- * @return string
- */
- public function getImageUrl()
- {
- return $this->_imageurl;
- }
- /**
- * getLink
- *
- * @return string
- */
- public function getLink()
- {
- return $this->_link;
- }
- /**
- * getTitle
- *
- * @return string
- */
- public function getTitle()
- {
- return $this->_title;
- }
- /**
- * getDescription
- *
- * @return string
- */
- public function getDescription()
- {
- return $this->_description;
- }
- /**
- * getLastFetchDate
- *
- * @return string
- */
- public function getLastFetchDate()
- {
- return $this->_lastfetchdate;
- }
- /**
- * getItems
- *
- * @return string
- */
- public function getItems()
- {
- return $this->_rssarray;
- }
- /**
- * Parse rss URL
- *
- * @param string $urlRSS Url to parse
- * @param int $maxNb Max nb of records to get (0 for no limit)
- * @param int $cachedelay 0=No cache, nb of seconds we accept cache files (cachedir must also be defined)
- * @param string $cachedir Directory where to save cache file
- * @return int <0 if KO, >0 if OK
- */
- public function parser($urlRSS, $maxNb=0, $cachedelay=60, $cachedir='')
- {
- global $conf;
- include_once DOL_DOCUMENT_ROOT.'/core/lib/files.lib.php';
- $rss='';
- $str=''; // This will contain content of feed
- // Check parameters
- if (! dol_is_url($urlRSS))
- {
- $this->error="ErrorBadUrl";
- return -1;
- }
- $this->_urlRSS = $urlRSS;
- $newpathofdestfile=$cachedir.'/'.dol_hash($this->_urlRSS);
- $newmask='0644';
- //dol_syslog("RssPArser::parser parse url=".$urlRSS." => cache file=".$newpathofdestfile);
- $nowgmt = dol_now();
- // Search into cache
- $foundintocache=0;
- if ($cachedelay > 0 && $cachedir)
- {
- $filedate=dol_filemtime($newpathofdestfile);
- if ($filedate >= ($nowgmt - $cachedelay))
- {
- //dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we use it.");
- $foundintocache=1;
- $this->_lastfetchdate=$filedate;
- }
- else
- {
- dol_syslog(get_class($this)."::parser cache file ".$newpathofdestfile." is not found or older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we can't use it.");
- }
- }
- // Load file into $str
- if ($foundintocache) // Cache file found and is not too old
- {
- $str = file_get_contents($newpathofdestfile);
- }
- else
- {
- try {
- ini_set("user_agent","Dolibarr ERP-CRM RSS reader");
- ini_set("max_execution_time", $conf->global->MAIN_USE_RESPONSE_TIMEOUT);
- ini_set("default_socket_timeout", $conf->global->MAIN_USE_RESPONSE_TIMEOUT);
- $opts = array('http'=>array('method'=>"GET"));
- if (! empty($conf->global->MAIN_USE_CONNECT_TIMEOUT)) $opts['http']['timeout']=$conf->global->MAIN_USE_CONNECT_TIMEOUT;
- if (! empty($conf->global->MAIN_PROXY_USE)) $opts['http']['proxy']='tcp://'.$conf->global->MAIN_PROXY_HOST.':'.$conf->global->MAIN_PROXY_PORT;
- //var_dump($opts);exit;
- $context = stream_context_create($opts);
- $str = file_get_contents($this->_urlRSS, false, $context);
- }
- catch (Exception $e) {
- print 'Error retrieving URL '.$this->urlRSS.' - '.$e->getMessage();
- }
- }
- if ($str !== false)
- {
- // Convert $str into xml
- if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML))
- {
- //print 'xx'.LIBXML_NOCDATA;
- libxml_use_internal_errors(false);
- $rss = simplexml_load_string($str, "SimpleXMLElement", LIBXML_NOCDATA);
- }
- else
- {
- $xmlparser=xml_parser_create('');
- if (!is_resource($xmlparser)) {
- $this->error="ErrorFailedToCreateParser"; return -1;
- }
-
- xml_set_object($xmlparser, $this);
- xml_set_element_handler($xmlparser, 'feed_start_element', 'feed_end_element');
- xml_set_character_data_handler($xmlparser, 'feed_cdata');
- $status = xml_parse($xmlparser, $str);
- xml_parser_free($xmlparser);
- $rss=$this;
- //var_dump($rss->_format);exit;
- }
- }
-
- // If $rss loaded
- if ($rss)
- {
- // Save file into cache
- if (empty($foundintocache) && $cachedir)
- {
- dol_syslog(get_class($this)."::parser cache file ".$newpathofdestfile." is saved onto disk.");
- if (! dol_is_dir($cachedir)) dol_mkdir($cachedir);
- $fp = fopen($newpathofdestfile, 'w');
- fwrite($fp, $str);
- fclose($fp);
- if (! empty($conf->global->MAIN_UMASK)) $newmask=$conf->global->MAIN_UMASK;
- @chmod($newpathofdestfile, octdec($newmask));
- $this->_lastfetchdate=$nowgmt;
- }
- unset($str); // Free memory
- if (empty($rss->_format)) // If format not detected automatically
- {
- $rss->_format='rss';
- if (empty($rss->channel)) $rss->_format='atom';
- }
- $items=array();
- // Save description entries
- if ($rss->_format == 'rss')
- {
- //var_dump($rss);
- if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML))
- {
- if (!empty($rss->channel->language)) $this->_language = (string) $rss->channel->language;
- if (!empty($rss->channel->generator)) $this->_generator = (string) $rss->channel->generator;
- if (!empty($rss->channel->copyright)) $this->_copyright = (string) $rss->channel->copyright;
- if (!empty($rss->channel->lastbuilddate)) $this->_lastbuilddate = (string) $rss->channel->lastbuilddate;
- if (!empty($rss->channel->image->url[0])) $this->_imageurl = (string) $rss->channel->image->url[0];
- if (!empty($rss->channel->link)) $this->_link = (string) $rss->channel->link;
- if (!empty($rss->channel->title)) $this->_title = (string) $rss->channel->title;
- if (!empty($rss->channel->description)) $this->_description = (string) $rss->channel->description;
- }
- else
- {
- //var_dump($rss->channel);
- if (!empty($rss->channel['language'])) $this->_language = (string) $rss->channel['language'];
- if (!empty($rss->channel['generator'])) $this->_generator = (string) $rss->channel['generator'];
- if (!empty($rss->channel['copyright'])) $this->_copyright = (string) $rss->channel['copyright'];
- if (!empty($rss->channel['lastbuilddate'])) $this->_lastbuilddate = (string) $rss->channel['lastbuilddate'];
- if (!empty($rss->image['url'])) $this->_imageurl = (string) $rss->image['url'];
- if (!empty($rss->channel['link'])) $this->_link = (string) $rss->channel['link'];
- if (!empty($rss->channel['title'])) $this->_title = (string) $rss->channel['title'];
- if (!empty($rss->channel['description'])) $this->_description = (string) $rss->channel['description'];
- }
- if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) $items=$rss->channel->item; // With simplexml
- else $items=$rss->items; // With xmlparse
- //var_dump($items);exit;
- }
- else if ($rss->_format == 'atom')
- {
- //var_dump($rss);
- if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML))
- {
- if (!empty($rss->generator)) $this->_generator = (string) $rss->generator;
- if (!empty($rss->lastbuilddate)) $this->_lastbuilddate = (string) $rss->modified;
- if (!empty($rss->link->href)) $this->_link = (string) $rss->link->href;
- if (!empty($rss->title)) $this->_title = (string) $rss->title;
- if (!empty($rss->description)) $this->_description = (string) $rss->description;
- }
- else
- {
- //if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language'];
- if (!empty($rss->channel['generator'])) $this->_generator = (string) $rss->channel['generator'];
- //if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright'];
- if (!empty($rss->channel['modified'])) $this->_lastbuilddate = (string) $rss->channel['modified'];
- //if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url'];
- if (!empty($rss->channel['link'])) $this->_link = (string) $rss->channel['link'];
- if (!empty($rss->channel['title'])) $this->_title = (string) $rss->channel['title'];
- //if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description'];
- }
- if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) {
- $tmprss=xml2php($rss); $items=$tmprss['entry'];
- } // With simplexml
- else $items=$rss->items; // With xmlparse
- //var_dump($items);exit;
- }
- $i = 0;
- // Loop on each record
- if (is_array($items))
- {
- foreach($items as $item)
- {
- //var_dump($item);exit;
- if ($rss->_format == 'rss')
- {
- if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML))
- {
- $itemLink = (string) $item->link;
- $itemTitle = (string) $item->title;
- $itemDescription = (string) $item->description;
- $itemPubDate = (string) $item->pubDate;
- $itemId = '';
- $itemAuthor = '';
- }
- else
- {
- $itemLink = (string) $item['link'];
- $itemTitle = (string) $item['title'];
- $itemDescription = (string) $item['description'];
- $itemPubDate = (string) $item['pubdate'];
- $itemId = (string) $item['guid'];
- $itemAuthor = (string) $item['author'];
- }
- // Loop on each category
- $itemCategory=array();
- if (is_array($item->category))
- {
- foreach ($item->category as $cat)
- {
- $itemCategory[] = (string) $cat;
- }
- }
- }
- else if ($rss->_format == 'atom')
- {
- if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML))
- {
- $itemLink = (string) $item['link']['href'];
- $itemTitle = (string) $item['title'];
- $itemDescription = (string) $item['summary'];
- $itemPubDate = (string) $item['created'];
- $itemId = (string) $item['id'];
- $itemAuthor = (string) ($item['author']?$item['author']:$item['author_name']);
- }
- else
- {
- $itemLink = (string) $item['link']['href'];
- $itemTitle = (string) $item['title'];
- $itemDescription = (string) $item['summary'];
- $itemPubDate = (string) $item['created'];
- $itemId = (string) $item['id'];
- $itemAuthor = (string) ($item['author']?$item['author']:$item['author_name']);
- }
- }
- else print 'ErrorBadFeedFormat';
- // Add record to result array
- $this->_rssarray[$i] = array(
- 'link'=>$itemLink,
- 'title'=>$itemTitle,
- 'description'=>$itemDescription,
- 'pubDate'=>$itemPubDate,
- 'category'=>$itemCategory,
- 'id'=>$itemId,
- 'author'=>$itemAuthor);
- //var_dump($this->_rssarray);
- $i++;
- if ($i > $maxNb) break; // We get all records we want
- }
- }
- return 1;
- }
- else
- {
- $this->error='ErrorFailedToLoadRSSFile';
- return -1;
- }
- }
- /**
- * Triggered when opened tag is found
- *
- * @param string $p Start
- * @param string $element Tag
- * @param array &$attrs Attributes of tags
- * @return void
- */
- function feed_start_element($p, $element, &$attrs)
- {
- $el = $element = strtolower($element);
- $attrs = array_change_key_case($attrs, CASE_LOWER);
- // check for a namespace, and split if found
- $ns = false;
- if (strpos($element, ':'))
- {
- list($ns, $el) = explode(':', $element, 2);
- }
- if ( $ns and $ns != 'rdf' )
- {
- $this->current_namespace = $ns;
- }
- // if feed type isn't set, then this is first element of feed identify feed from root element
- if (empty($this->_format))
- {
- if ( $el == 'rdf' ) {
- $this->_format = 'rss';
- $this->feed_version = '1.0';
- }
- elseif ( $el == 'rss' ) {
- $this->_format = 'rss';
- $this->feed_version = $attrs['version'];
- }
- elseif ( $el == 'feed' ) {
- $this->_format = 'atom';
- $this->feed_version = $attrs['version'];
- $this->inchannel = true;
- }
- return;
- }
- if ( $el == 'channel' )
- {
- $this->inchannel = true;
- }
- elseif ($el == 'item' or $el == 'entry' )
- {
- $this->initem = true;
- if ( isset($attrs['rdf:about']) ) {
- $this->current_item['about'] = $attrs['rdf:about'];
- }
- }
- // if we're in the default namespace of an RSS feed,
- // record textinput or image fields
- elseif (
- $this->_format == 'rss' and
- $this->current_namespace == '' and
- $el == 'textinput' )
- {
- $this->intextinput = true;
- }
- elseif (
- $this->_format == 'rss' and
- $this->current_namespace == '' and
- $el == 'image' )
- {
- $this->inimage = true;
- }
- // handle atom content constructs
- elseif ( $this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS) )
- {
- // avoid clashing w/ RSS mod_content
- if ($el == 'content' ) {
- $el = 'atom_content';
- }
- $this->incontent = $el;
- }
- // if inside an Atom content construct (e.g. content or summary) field treat tags as text
- elseif ($this->_format == 'atom' and $this->incontent )
- {
- // if tags are inlined, then flatten
- $attrs_str = join(' ', array_map('map_attrs', array_keys($attrs), array_values($attrs)));
- $this->append_content("<$element $attrs_str>");
- array_unshift($this->stack, $el);
- }
- // Atom support many links per containging element.
- // Magpie treats link elements of type rel='alternate'
- // as being equivalent to RSS's simple link element.
- //
- elseif ($this->_format == 'atom' and $el == 'link' )
- {
- if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' )
- {
- $link_el = 'link';
- }
- else {
- $link_el = 'link_' . $attrs['rel'];
- }
- $this->append($link_el, $attrs['href']);
- }
- // set stack[0] to current element
- else {
- array_unshift($this->stack, $el);
- }
- }
- /**
- * Triggered when CDATA is found
- *
- * @param string $p P
- * @param string $text Tag
- * @return void
- */
- function feed_cdata($p, $text)
- {
- if ($this->_format == 'atom' and $this->incontent)
- {
- $this->append_content($text);
- }
- else
- {
- $current_el = join('_', array_reverse($this->stack));
- $this->append($current_el, $text);
- }
- }
- /**
- * Triggered when closed tag is found
- *
- * @param string $p P
- * @param string $el Tag
- * @return void
- */
- function feed_end_element($p, $el)
- {
- $el = strtolower($el);
- if ($el == 'item' or $el == 'entry')
- {
- $this->items[] = $this->current_item;
- $this->current_item = array();
- $this->initem = false;
- }
- elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'textinput' )
- {
- $this->intextinput = false;
- }
- elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'image' )
- {
- $this->inimage = false;
- }
- elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS) )
- {
- $this->incontent = false;
- }
- elseif ($el == 'channel' or $el == 'feed' )
- {
- $this->inchannel = false;
- }
- elseif ($this->_format == 'atom' and $this->incontent ) {
- // balance tags properly
- // note: i don't think this is actually neccessary
- if ( $this->stack[0] == $el )
- {
- $this->append_content("</$el>");
- }
- else {
- $this->append_content("<$el />");
- }
- array_shift($this->stack);
- }
- else {
- array_shift($this->stack);
- }
- $this->current_namespace = false;
- }
- /**
- * To concat 2 string with no warning if an operand is not defined
- *
- * @param string &$str1 Str1
- * @param string $str2 Str2
- * @return string String cancatenated
- */
- function concat(&$str1, $str2="")
- {
- if (!isset($str1) ) {
- $str1="";
- }
- $str1 .= $str2;
- }
- /**
- * Enter description here ...
- *
- * @param string $text Text
- * @return void
- */
- function append_content($text)
- {
- if ( $this->initem ) {
- $this->concat($this->current_item[ $this->incontent ], $text);
- }
- elseif ( $this->inchannel ) {
- $this->concat($this->channel[ $this->incontent ], $text);
- }
- }
- /**
- * smart append - field and namespace aware
- *
- * @param string $el El
- * @param string $text Text
- * @return void
- */
- function append($el, $text)
- {
- if (!$el) {
- return;
- }
- if ( $this->current_namespace )
- {
- if ( $this->initem ) {
- $this->concat($this->current_item[ $this->current_namespace ][ $el ], $text);
- }
- elseif ($this->inchannel) {
- $this->concat($this->channel[ $this->current_namespace][ $el ], $text);
- }
- elseif ($this->intextinput) {
- $this->concat($this->textinput[ $this->current_namespace][ $el ], $text);
- }
- elseif ($this->inimage) {
- $this->concat($this->image[ $this->current_namespace ][ $el ], $text);
- }
- }
- else {
- if ( $this->initem ) {
- $this->concat($this->current_item[ $el ], $text);
- }
- elseif ($this->intextinput) {
- $this->concat($this->textinput[ $el ], $text);
- }
- elseif ($this->inimage) {
- $this->concat($this->image[ $el ], $text);
- }
- elseif ($this->inchannel) {
- $this->concat($this->channel[ $el ], $text);
- }
- }
- }
- }
- /**
- * Function to convert an XML object into an array
- *
- * @param SimpleXMLElement $xml Xml
- * @return void
- */
- function xml2php($xml)
- {
- $fils = 0;
- $tab = false;
- $array = array();
- foreach($xml->children() as $key => $value)
- {
- $child = xml2php($value);
- //To deal with the attributes
- foreach($value->attributes() as $ak=>$av)
- {
- $child[$ak] = (string) $av;
- }
- //Let see if the new child is not in the array
- if($tab==false && in_array($key,array_keys($array)))
- {
- //If this element is already in the array we will create an indexed array
- $tmp = $array[$key];
- $array[$key] = NULL;
- $array[$key][] = $tmp;
- $array[$key][] = $child;
- $tab = true;
- }
- elseif($tab == true)
- {
- //Add an element in an existing array
- $array[$key][] = $child;
- }
- else
- {
- //Add a simple element
- $array[$key] = $child;
- }
- $fils++;
- }
- if($fils==0)
- {
- return (string) $xml;
- }
- return $array;
- }
|