diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/README b/README new file mode 100644 index 0000000..6af7edb --- /dev/null +++ b/README @@ -0,0 +1,48 @@ +NAME + + MagpieRSS - a simple RSS integration tool + +SYNOPSIS + + require_once(rss_fetch.inc); + $url = $_GET['url']; + $rss = fetch_rss( $url ); + + echo "Channel Title: " . $rss->channel['title'] . "

"; + echo "

"; + +DESCRIPTION + + MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like", + and simple to use. + + Some features include: + + * supports RSS 0.9 - 1.0, with limited RSS 2.0 support + * supports namespaces, and modules, including mod_content and mod_event + * open minded [1] + * simple, functional interface, to object oriented backend parser + * automatic caching of parsed RSS objects makes its easy to integrate + * supports conditional GET with Last-Modified, and ETag + * uses constants for easy override of default behaviour + * heavily commented + + +1. By open minded I mean Magpie will accept any tag it finds in good faith that + it was supposed to be here. For strict validation, look elsewhere. + + +GETTING STARTED + + + +COPYRIGHT: + Copyright(c) 2002 kellan@protest.net. All rights reserved. + This software is released under the GNU General Public License. + Please read the disclaimer at the top of the Snoopy.class.inc file. diff --git a/extlib/Snoopy.class.inc b/extlib/Snoopy.class.inc index 95a63bb..94cf36b 100644 --- a/extlib/Snoopy.class.inc +++ b/extlib/Snoopy.class.inc @@ -38,7 +38,7 @@ http://snoopy.sourceforge.com class Snoopy { /**** Public variables ****/ - + /* user definable vars */ var $host = "www.php.net"; // host name we are connecting to @@ -62,15 +62,15 @@ class Snoopy var $passcookies = true; // pass set cookies back through redirects // NOTE: this currently does not respect // dates, domains or paths. - + var $user = ""; // user for http authentication var $pass = ""; // password for http authentication - + // http accept types var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; - + var $results = ""; // where the content is put - + var $error = ""; // error messages sent here var $response_code = ""; // response code returned from server var $headers = array(); // headers returned from server sent here @@ -80,7 +80,7 @@ class Snoopy // set to 0 to disallow timeouts var $timed_out = false; // if a read operation timed out var $status = 0; // http request status - + var $curl_path = "/usr/bin/curl"; // Snoopy will use cURL for fetching // SSL content if a full system path to @@ -92,14 +92,14 @@ class Snoopy // library functions built into php, // as these functions are not stable // as of this Snoopy release. - + // send Accept-encoding: gzip? - var $use_gzip = true; - - /**** Private variables ****/ - + var $use_gzip = true; + + /**** Private variables ****/ + var $_maxlinelen = 4096; // max line length (headers) - + var $_httpmethod = "GET"; // default http request method var $_httpversion = "HTTP/1.0"; // default http request version var $_submit_method = "POST"; // default submit method @@ -109,7 +109,7 @@ class Snoopy var $_redirectdepth = 0; // increments on an http redirect var $_frameurls = array(); // frame src urls var $_framedepth = 0; // increments on frame depth - + var $_isproxy = false; // set if using a proxy server var $_fp_timeout = 30; // timeout for socket connection @@ -124,14 +124,14 @@ class Snoopy function fetch($URI) { - + //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); $URI_PARTS = parse_url($URI); if (!empty($URI_PARTS["user"])) $this->user = $URI_PARTS["user"]; if (!empty($URI_PARTS["pass"])) $this->pass = $URI_PARTS["pass"]; - + switch($URI_PARTS["scheme"]) { case "http": @@ -151,7 +151,7 @@ class Snoopy // no proxy, send only the path $this->_httprequest($path, $fp, $URI, $this->_httpmethod); } - + $this->_disconnect($fp); if($this->_redirectaddr) @@ -174,7 +174,7 @@ class Snoopy { $frameurls = $this->_frameurls; $this->_frameurls = array(); - + while(list(,$frameurl) = each($frameurls)) { if($this->_framedepth < $this->maxframes) @@ -185,13 +185,13 @@ class Snoopy else break; } - } + } } else { return false; } - return true; + return true; break; case "https": if(!$this->curl_path || (!is_executable($this->curl_path))) { @@ -244,15 +244,15 @@ class Snoopy else break; } - } - return true; + } + return true; break; default: // not a valid protocol $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; return false; break; - } + } return true; } @@ -261,8 +261,8 @@ class Snoopy /*======================================================================*\ Private functions \*======================================================================*/ - - + + /*======================================================================*\ Function: _striplinks Purpose: strip the hyperlinks from an html document @@ -271,13 +271,13 @@ class Snoopy \*======================================================================*/ function _striplinks($document) - { + { preg_match_all("'<\s*a\s+.*href\s*=\s* # find ]+)) # if quote found, match up to next matching # quote, otherwise match up to next space 'isx",$document,$links); - + // catenate the non-empty matches from the conditional subpattern @@ -285,14 +285,14 @@ class Snoopy { if(!empty($val)) $match[] = $val; - } - + } + while(list($key,$val) = each($links[3])) { if(!empty($val)) $match[] = $val; - } - + } + // return the links return $match; } @@ -305,18 +305,18 @@ class Snoopy \*======================================================================*/ function _stripform($document) - { + { preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); - + // catenate the matches $match = implode("\r\n",$elements[0]); - + // return the links return $match; } - - + + /*======================================================================*\ Function: _striptext Purpose: strip the text from an html document @@ -326,11 +326,11 @@ class Snoopy function _striptext($document) { - + // I didn't use preg eval (//e) since that is only available in PHP 4.0. // so, list your entities one by one here. I included some of the // more common ones. - + $search = array("']*?>.*?'si", // strip out javascript "'<[\/\!]*?[^<>]*?>'si", // strip out html tags "'([\r\n])[\s]+'", // strip out white space @@ -343,7 +343,7 @@ class Snoopy "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i" - ); + ); $replace = array( "", "", "\\1", @@ -356,9 +356,9 @@ class Snoopy chr(162), chr(163), chr(169)); - + $text = preg_replace($search,$replace,$document); - + return $text; } @@ -372,23 +372,23 @@ class Snoopy function _expandlinks($links,$URI) { - + preg_match("/^[^\?]+/",$URI,$match); $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); - + $search = array( "|^http://".preg_quote($this->host)."|i", "|^(?!http://)(\/)?(?!mailto:)|i", "|/\./|", "|/[^\/]+/\.\./|" ); - + $replace = array( "", $match."/", "/", "/" - ); - + ); + $expandedLinks = preg_replace($search,$replace,$links); return $expandedLinks; @@ -401,25 +401,25 @@ class Snoopy $fp the current open file pointer $URI the full URI $body body contents to send if any (POST) - Output: + Output: \*======================================================================*/ - + function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") { if($this->passcookies && $this->_redirectaddr) $this->setcookies(); - + $URI_PARTS = parse_url($URI); if(empty($url)) $url = "/"; - $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; + $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; if(!empty($this->agent)) $headers .= "User-Agent: ".$this->agent."\r\n"; if(!empty($this->host) && !isset($this->rawheaders['Host'])) $headers .= "Host: ".$this->host."\r\n"; if(!empty($this->accept)) $headers .= "Accept: ".$this->accept."\r\n"; - + if($this->use_gzip) { // make sure PHP was built with --with-zlib // and we can handle gzipp'ed data @@ -429,18 +429,18 @@ class Snoopy else { trigger_error( "use_gzip is on, but PHP was built without zlib support.". - " Requesting file(s) without gzip encoding.", + " Requesting file(s) without gzip encoding.", E_USER_NOTICE); } } - + if(!empty($this->referer)) $headers .= "Referer: ".$this->referer."\r\n"; if(!empty($this->cookies)) - { + { if(!is_array($this->cookies)) $this->cookies = (array)$this->cookies; - + reset($this->cookies); if ( count($this->cookies) > 0 ) { $cookie_headers .= 'Cookie: '; @@ -448,7 +448,7 @@ class Snoopy $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; } $headers .= substr($cookie_headers,0,-2) . "\r\n"; - } + } } if(!empty($this->rawheaders)) { @@ -463,26 +463,26 @@ class Snoopy $headers .= "; boundary=".$this->_mime_boundary; $headers .= "\r\n"; } - if(!empty($body)) + if(!empty($body)) $headers .= "Content-length: ".strlen($body)."\r\n"; - if(!empty($this->user) || !empty($this->pass)) + if(!empty($this->user) || !empty($this->pass)) $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; $headers .= "\r\n"; - + // set the read timeout if needed if ($this->read_timeout > 0) socket_set_timeout($fp, $this->read_timeout); $this->timed_out = false; - + fwrite($fp,$headers.$body,strlen($headers.$body)); - + $this->_redirectaddr = false; unset($this->headers); - + // content was returned gzip encoded? $is_gzipped = false; - + while($currentHeader = fgets($fp,$this->_maxlinelen)) { if ($this->read_timeout > 0 && $this->_check_timeout($fp)) @@ -490,11 +490,11 @@ class Snoopy $this->status=-100; return false; } - + // if($currentHeader == "\r\n") if(preg_match("/^\r?\n$/", $currentHeader) ) break; - + // if a header begins with Location: or URI:, set the redirect if(preg_match("/^(Location:|URI:)/i",$currentHeader)) { @@ -514,20 +514,20 @@ class Snoopy else $this->_redirectaddr = $matches[2]; } - + if(preg_match("|^HTTP/|",$currentHeader)) { if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) { $this->status= $status[1]; - } + } $this->response_code = $currentHeader; } - + if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { $is_gzipped = true; } - + $this->headers[] = $currentHeader; } @@ -540,25 +540,25 @@ class Snoopy break; } } - + // gunzip if ( $is_gzipped ) { // per http://www.php.net/manual/en/function.gzencode.php $results = substr($results, 10); $results = gzinflate($results); } - + if ($this->read_timeout > 0 && $this->_check_timeout($fp)) { $this->status=-100; return false; } - + // check if there is a a redirect meta tag - + if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) { - $this->_redirectaddr = $this->_expandlinks($match[1],$URI); + $this->_redirectaddr = $this->_expandlinks($match[1],$URI); } // have we hit our frame depth and is there frame src to fetch? @@ -574,7 +574,7 @@ class Snoopy // no framed content else $this->results = $results; - + return true; } @@ -584,21 +584,21 @@ class Snoopy Input: $url the url to fetch $URI the full URI $body body contents to send if any (POST) - Output: + Output: \*======================================================================*/ - + function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") { if($this->passcookies && $this->_redirectaddr) $this->setcookies(); - $headers = array(); - + $headers = array(); + $URI_PARTS = parse_url($URI); if(empty($url)) $url = "/"; // GET ... header not needed for curl - //$headers[] = $http_method." ".$url." ".$this->_httpversion; + //$headers[] = $http_method." ".$url." ".$this->_httpversion; if(!empty($this->agent)) $headers[] = "User-Agent: ".$this->agent; if(!empty($this->host)) @@ -608,10 +608,10 @@ class Snoopy if(!empty($this->referer)) $headers[] = "Referer: ".$this->referer; if(!empty($this->cookies)) - { + { if(!is_array($this->cookies)) $this->cookies = (array)$this->cookies; - + reset($this->cookies); if ( count($this->cookies) > 0 ) { $cookie_str = 'Cookie: '; @@ -634,48 +634,48 @@ class Snoopy else $headers[] = "Content-type: $content_type"; } - if(!empty($body)) + if(!empty($body)) $headers[] = "Content-length: ".strlen($body); -# if(!empty($this->user) || !empty($this->pass)) +# if(!empty($this->user) || !empty($this->pass)) # $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); - + if ($this->user and $this->pass) { $cmdline_params .= " -u '$this->user:$this->pass' "; } - + for($curr_header = 0; $curr_header < count($headers); $curr_header++) { $cmdline_params .= " -H \"".$headers[$curr_header]."\""; } - + if(!empty($body)) $cmdline_params .= " -d \"$body\""; - + if($this->read_timeout > 0) $cmdline_params .= " -m ".$this->read_timeout; - + $headerfile = uniqid(time()); - + # accept self-signed certs - $cmdline_params .= " -k"; + $cmdline_params .= " -k"; exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return); - + if($return) { $this->error = "Error: cURL could not retrieve the document, error $return."; return false; } - - + + $results = implode("\r\n",$results); - + $result_headers = file("/tmp/$headerfile"); - + $this->_redirectaddr = false; unset($this->headers); - + for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) { - + // if a header begins with Location: or URI:, set the redirect if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) { @@ -695,7 +695,7 @@ class Snoopy else $this->_redirectaddr = $matches[2]; } - + if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) { $this->response_code = $result_headers[$currentHeader]; @@ -708,10 +708,10 @@ class Snoopy } // check if there is a a redirect meta tag - + if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) { - $this->_redirectaddr = $this->_expandlinks($match[1],$URI); + $this->_redirectaddr = $this->_expandlinks($match[1],$URI); } // have we hit our frame depth and is there frame src to fetch? @@ -729,7 +729,7 @@ class Snoopy $this->results = $results; unlink("/tmp/$headerfile"); - + return true; } @@ -737,7 +737,7 @@ class Snoopy Function: setcookies() Purpose: set cookies for a redirection \*======================================================================*/ - + function setcookies() { for($x=0; $xheaders); $x++) @@ -747,7 +747,7 @@ class Snoopy } } - + /*======================================================================*\ Function: _check_timeout Purpose: checks whether timeout has occurred @@ -771,7 +771,7 @@ class Snoopy Purpose: make a socket connection Input: $fp file pointer \*======================================================================*/ - + function _connect(&$fp) { if(!empty($this->proxy_host) && !empty($this->proxy_port)) @@ -785,9 +785,9 @@ class Snoopy $host = $this->host; $port = $this->port; } - + $this->status = 0; - + if($fp = fsockopen( $host, $port, @@ -823,13 +823,13 @@ class Snoopy Purpose: disconnect a socket connection Input: $fp file pointer \*======================================================================*/ - + function _disconnect($fp) { return(fclose($fp)); } - + /*======================================================================*\ Function: _prepare_post_body Purpose: Prepare post body according to encoding type @@ -837,7 +837,7 @@ class Snoopy $formfiles - form upload files Output: post body \*======================================================================*/ - + function _prepare_post_body($formvars, $formfiles) { settype($formvars, "array"); @@ -845,7 +845,7 @@ class Snoopy if (count($formvars) == 0 && count($formfiles) == 0) return; - + switch ($this->_submit_type) { case "application/x-www-form-urlencoded": reset($formvars); @@ -861,7 +861,7 @@ class Snoopy case "multipart/form-data": $this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); - + reset($formvars); while(list($key,$val) = each($formvars)) { if (is_array($val) || is_object($val)) { @@ -876,7 +876,7 @@ class Snoopy $postdata .= "$val\r\n"; } } - + reset($formfiles); while (list($field_name, $file_names) = each($formfiles)) { settype($file_names, "array"); diff --git a/rss_fetch.inc b/rss_fetch.inc index 84095e9..b5ea928 100644 --- a/rss_fetch.inc +++ b/rss_fetch.inc @@ -15,7 +15,7 @@ * magpierss-general@lists.sourceforge.net * */ - + // Setup MAGPIE_DIR for use on hosts that don't include // the current path in include_path. // with thanks to rajiv and smarty @@ -35,18 +35,18 @@ define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); -/* +/* * CONSTANTS - redefine these in your script to change the * behaviour of fetch_rss() currently, most options effect the cache * - * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? - * For me a built in cache was essential to creating a "PHP-like" + * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? + * For me a built in cache was essential to creating a "PHP-like" * feel to Magpie, see rss_cache.inc for rationale * * * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? - * This should be a location that the webserver can write to. If this - * directory does not already exist Mapie will try to be smart and create + * This should be a location that the webserver can write to. If this + * directory does not already exist Mapie will try to be smart and create * it. This will often fail for permissions reasons. * * @@ -62,20 +62,20 @@ require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); /*=======================================================================*\ - Function: fetch_rss: + Function: fetch_rss: Purpose: return RSS object for the give url maintain the cache Input: url of RSS file Output: parsed RSS object (see rss_parse.inc) - NOTES ON CACHEING: + NOTES ON CACHEING: If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. - + NOTES ON RETRIEVING REMOTE FILES: If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will return a cached object, and touch the cache object upon recieving a 304. - + NOTES ON FAILED REQUESTS: If there is an HTTP error while fetching an RSS object, the cached version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) @@ -85,15 +85,15 @@ define('MAGPIE_VERSION', '0.80a'); $MAGPIE_ERROR = ""; -function fetch_rss ($url) { +function fetch_rss ($url, $cache_age=null) { // initialize constants init(); - + if ( !isset($url) ) { error("fetch_rss called without a url"); return false; } - + // if cache is disabled if ( !MAGPIE_CACHE_ON ) { // fetch file, and parse it @@ -105,7 +105,7 @@ function fetch_rss ($url) { error("Failed to fetch $url and cache is off"); return false; } - } + } // else cache is ON else { // Flow @@ -113,28 +113,29 @@ function fetch_rss ($url) { // 2. if there is a hit, make sure its fresh // 3. if cached obj fails freshness check, fetch remote // 4. if remote fails, return stale object, or error - - $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); - + + $cache_age = $cache_age !== null ? $cache_age : MAGPIE_CACHE_AGE; + $cache = new RSSCache( MAGPIE_CACHE_DIR, $cache_age ); + if (MAGPIE_DEBUG and $cache->ERROR) { debug($cache->ERROR, E_USER_WARNING); } - - + + $cache_status = 0; // response of check_cache $request_headers = array(); // HTTP headers to send with fetch $rss = 0; // parsed RSS object $errormsg = 0; // errors, if any - + // store parsed XML by desired output encoding // as character munging happens at parse time $cache_key = $url . MAGPIE_OUTPUT_ENCODING; - + if (!$cache->ERROR) { // return cache HIT, MISS, or STALE $cache_status = $cache->check_cache( $cache_key); } - + // if object cached, and cache is fresh, return cached obj if ( $cache_status == 'HIT' ) { $rss = $cache->get( $cache_key ); @@ -147,9 +148,9 @@ function fetch_rss ($url) { return $rss; } } - + // else attempt a conditional get - + // setup headers if ( $cache_status == 'STALE' ) { $rss = $cache->get( $cache_key ); @@ -158,9 +159,9 @@ function fetch_rss ($url) { $request_headers['If-Last-Modified'] = $rss->last_modified; } } - + $resp = _fetch_remote_file( $url, $request_headers ); - + if (isset($resp) and $resp) { if ($resp->status == '304' ) { // we have the most current copy @@ -190,7 +191,7 @@ function fetch_rss ($url) { elseif ( $resp->error ) { # compensate for Snoopy's annoying habbit to tacking # on '\n' - $http_error = substr($resp->error, 0, -2); + $http_error = substr($resp->error, 0, -2); $errormsg .= "(HTTP Error: $http_error)"; } else { @@ -201,9 +202,9 @@ function fetch_rss ($url) { else { $errormsg = "Unable to retrieve RSS file for unknown reasons."; } - + // else fetch failed - + // attempt to return cached object if ($rss) { if ( MAGPIE_DEBUG ) { @@ -211,12 +212,12 @@ function fetch_rss ($url) { } return $rss; } - + // else we totally failed - error( $errormsg ); - + error( $errormsg ); + return false; - + } // end if ( !MAGPIE_CACHE_ON ) { } // end fetch_rss() @@ -227,34 +228,34 @@ function fetch_rss ($url) { function error ($errormsg, $lvl=E_USER_WARNING) { global $MAGPIE_ERROR; - + // append PHP's error message if track_errors enabled - if ( isset($php_errormsg) ) { + if ( isset($php_errormsg) ) { $errormsg .= " ($php_errormsg)"; } if ( $errormsg ) { $errormsg = "MagpieRSS: $errormsg"; $MAGPIE_ERROR = $errormsg; - trigger_error( $errormsg, $lvl); + trigger_error( $errormsg, $lvl); } } function debug ($debugmsg, $lvl=E_USER_NOTICE) { trigger_error("MagpieRSS [debug] $debugmsg", $lvl); } - + /*=======================================================================*\ Function: magpie_error Purpose: accessor for the magpie error variable \*=======================================================================*/ function magpie_error ($errormsg="") { global $MAGPIE_ERROR; - - if ( isset($errormsg) and $errormsg ) { + + if ( isset($errormsg) and $errormsg ) { $MAGPIE_ERROR = $errormsg; } - - return $MAGPIE_ERROR; + + return $MAGPIE_ERROR; } /*=======================================================================*\ @@ -262,7 +263,7 @@ function magpie_error ($errormsg="") { Purpose: retrieve an arbitrary remote file Input: url of the remote file headers to send along with the request (optional) - Output: an HTTP response object (see Snoopy.class.inc) + Output: an HTTP response object (see Snoopy.class.inc) \*=======================================================================*/ function _fetch_remote_file ($url, $headers = "" ) { // Snoopy is an HTTP client in PHP @@ -273,7 +274,7 @@ function _fetch_remote_file ($url, $headers = "" ) { if (is_array($headers) ) { $client->rawheaders = $headers; } - + @$client->fetch($url); return $client; @@ -287,10 +288,10 @@ function _fetch_remote_file ($url, $headers = "" ) { \*=======================================================================*/ function _response_to_rss ($resp) { $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); - - // if RSS parsed successfully + + // if RSS parsed successfully if ( $rss and !$rss->ERROR) { - + // find Etag, and Last-Modified foreach($resp->headers as $h) { // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" @@ -301,26 +302,26 @@ function _response_to_rss ($resp) { $field = $h; $val = ""; } - + if ( $field == 'ETag' ) { $rss->etag = $val; } - + if ( $field == 'Last-Modified' ) { $rss->last_modified = $val; } } - - return $rss; + + return $rss; } // else construct error message else { $errormsg = "Failed to parse RSS file."; - + if ($rss) { $errormsg .= " (" . $rss->ERROR . ")"; } error($errormsg); - + return false; } // end if ($rss and !$rss->error) } @@ -337,7 +338,7 @@ function init () { else { define('MAGPIE_INITALIZED', true); } - + if ( !defined('MAGPIE_CACHE_ON') ) { define('MAGPIE_CACHE_ON', true); } @@ -357,39 +358,39 @@ function init () { if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); } - + if ( !defined('MAGPIE_INPUT_ENCODING') ) { define('MAGPIE_INPUT_ENCODING', null); } - + if ( !defined('MAGPIE_DETECT_ENCODING') ) { define('MAGPIE_DETECT_ENCODING', true); } - + if ( !defined('MAGPIE_DEBUG') ) { define('MAGPIE_DEBUG', 0); } - + if ( !defined('MAGPIE_USER_AGENT') ) { $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; - + if ( MAGPIE_CACHE_ON ) { $ua = $ua . ')'; } else { $ua = $ua . '; No cache)'; } - + define('MAGPIE_USER_AGENT', $ua); } - + if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout } - + // use gzip encoding to fetch rss files if supported? if ( !defined('MAGPIE_USE_GZIP') ) { - define('MAGPIE_USE_GZIP', true); + define('MAGPIE_USE_GZIP', true); } } @@ -400,7 +401,7 @@ function init () { HTTP STATUS CODE PREDICATES These functions attempt to classify an HTTP status code based on RFC 2616 and RFC 2518. - + All of them take an HTTP status code as input, and return true or false All this code is adapted from LWP's HTTP::Status. @@ -411,48 +412,48 @@ function init () { Function: is_info Purpose: return true if Informational status code \*=======================================================================*/ -function is_info ($sc) { - return $sc >= 100 && $sc < 200; +function is_info ($sc) { + return $sc >= 100 && $sc < 200; } /*=======================================================================*\ Function: is_success Purpose: return true if Successful status code \*=======================================================================*/ -function is_success ($sc) { - return $sc >= 200 && $sc < 300; +function is_success ($sc) { + return $sc >= 200 && $sc < 300; } /*=======================================================================*\ Function: is_redirect Purpose: return true if Redirection status code \*=======================================================================*/ -function is_redirect ($sc) { - return $sc >= 300 && $sc < 400; +function is_redirect ($sc) { + return $sc >= 300 && $sc < 400; } /*=======================================================================*\ Function: is_error Purpose: return true if Error status code \*=======================================================================*/ -function is_error ($sc) { - return $sc >= 400 && $sc < 600; +function is_error ($sc) { + return $sc >= 400 && $sc < 600; } /*=======================================================================*\ Function: is_client_error Purpose: return true if Error status code, and its a client error \*=======================================================================*/ -function is_client_error ($sc) { - return $sc >= 400 && $sc < 500; +function is_client_error ($sc) { + return $sc >= 400 && $sc < 500; } /*=======================================================================*\ Function: is_client_error Purpose: return true if Error status code, and its a server error \*=======================================================================*/ -function is_server_error ($sc) { - return $sc >= 500 && $sc < 600; +function is_server_error ($sc) { + return $sc >= 500 && $sc < 600; } ?> diff --git a/rss_parse.inc b/rss_parse.inc index 666afab..5060880 100644 --- a/rss_parse.inc +++ b/rss_parse.inc @@ -37,7 +37,7 @@ require_once (MAGPIE_DIR . 'rss_utils.inc'); */ class MagpieRSS { var $parser; - + var $current_item = array(); // item currently being parsed var $items = array(); // collection of parsed items var $channel = array(); // hash of channel fields @@ -46,81 +46,81 @@ class MagpieRSS { var $feed_type; var $feed_version; var $encoding = ''; // output encoding of parsed rss - + var $_source_encoding = ''; // only set if we have to parse xml prolog - + var $ERROR = ""; var $WARNING = ""; - + # define some constants - + var $_ATOM_CONTENT_CONSTRUCTS = array( 'content', 'summary', 'title', /* common */ 'info', 'tagline', 'copyright', /* Atom 0.3 */ 'rights', 'subtitle', /* Atom 1.0 */ ); - + var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div'); var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); # parser variables, useless if you're not a parser, treat as private - + var $stack = array(); # parser stack var $inchannel = false; var $initem = false; - + var $incontent = array(); # non-empty if in namespaced XML content field var $exclude_top = false; # true when Atom 1.0 type="xhtml" var $intextinput = false; var $inimage = false; var $current_namespace = false; - + /** * Set up XML parser, parse source, and return populated RSS object.. - * + * * @param string $source string containing the RSS to be parsed * * NOTE: Probably a good idea to leave the encoding options alone unless * you know what you're doing as PHP's character set support is * a little weird. * - * NOTE: A lot of this is unnecessary but harmless with PHP5 + * NOTE: A lot of this is unnecessary but harmless with PHP5 * * - * @param string $output_encoding output the parsed RSS in this character + * @param string $output_encoding output the parsed RSS in this character * set defaults to ISO-8859-1 as this is PHP's * default. * * NOTE: might be changed to UTF-8 in future * versions. - * - * @param string $input_encoding the character set of the incoming RSS source. + * + * @param string $input_encoding the character set of the incoming RSS source. * Leave blank and Magpie will try to figure it * out. - * - * + * + * * @param bool $detect_encoding if false Magpie won't attempt to detect * source encoding. (caveat emptor) * */ - function MagpieRSS ($source, $output_encoding='ISO-8859-1', - $input_encoding=null, $detect_encoding=true) - { + function MagpieRSS ($source, $output_encoding='ISO-8859-1', + $input_encoding=null, $detect_encoding=true) + { # # if PHP xml isn't compiled in, die # - + if (!function_exists('xml_parser_create')) { - $this->error( "Failed to load PHP's XML Extension. " . + $this->error( "Failed to load PHP's XML Extension. " . "http://www.php.net/manual/en/ref.xml.php", E_USER_ERROR ); } - - list($parser, $source) = $this->create_parser($source, + + list($parser, $source) = $this->create_parser($source, $output_encoding, $input_encoding, $detect_encoding); - - + + if (!is_resource($parser)) { $this->error( "Failed to create an instance of PHP's XML parser. " . "http://www.php.net/manual/en/ref.xml.php", @@ -128,20 +128,20 @@ class MagpieRSS { } $this->parser = $parser; - + # # pass in parser, and a reference to this object # setup handlers # - + xml_set_object( $this->parser, $this ); - xml_set_element_handler($this->parser, + xml_set_element_handler($this->parser, 'feed_start_element', 'feed_end_element' ); - - xml_set_character_data_handler( $this->parser, 'feed_cdata' ); - + + xml_set_character_data_handler( $this->parser, 'feed_cdata' ); + $status = xml_parse( $this->parser, $source ); # parse the feed - + if (! $status ) { $errorcode = xml_get_error_code( $this->parser ); if ( $errorcode != XML_ERROR_NONE ) { @@ -153,36 +153,36 @@ class MagpieRSS { $this->error( $errormsg ); } } - + xml_parser_free( $this->parser ); $this->normalize(); } - + function feed_start_element($p, $element, &$attrs) { $el = $element = strtolower($element); $attrs = array_change_key_case($attrs, CASE_LOWER); - - # check for a namespace, and split if found + + # check for a namespace, and explode if found # only if we're not inside a content tag - - if ( empty($this->incontent) ) { + + if ( empty($this->incontent) ) { $ns = false; - + if ( strpos( $element, ':' ) ) { - list($ns, $el) = split( ':', $element, 2); + list($ns, $el) = explode( ':', $element, 2); } - + if ( $ns and $ns != 'rdf' ) { $this->current_namespace = $ns; } } - # + # # if feed type isn't set, then this is first element of feed # identify feed from root element # - + if (!isset($this->feed_type) ) { if ( $el == 'rdf' ) { $this->feed_type = RSS; @@ -194,93 +194,93 @@ class MagpieRSS { } elseif ( $el == 'feed' ) { $this->feed_type = ATOM; - + if ($attrs['xmlns'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0 $this->feed_version = '1.0'; } - else { + else { # Atom 0.3, probably. $this->feed_version = $attrs['version']; } - + $this->inchannel = true; } return; } - + # # if we're inside a namespaced content construct, treat tags as text # - + if ( !empty($this->incontent) ) { if ((count($this->incontent) > 1) or !$this->exclude_top) { - + # if tags are inlined, then flatten - - $attrs_str = join(' ', - array_map('map_attrs', - array_keys($attrs), + + $attrs_str = join(' ', + array_map('map_attrs', + array_keys($attrs), array_values($attrs)) ); - - if (strlen($attrs_str)) { - $attrs_str = ' '.$attrs_str; + + if (strlen($attrs_str)) { + $attrs_str = ' '.$attrs_str; } - + $this->append_content( "<{$element}{$attrs_str}>" ); } array_push($this->incontent, $el); # stack for parsing content XML - } - + } + elseif ( $el == 'channel' ) { $this->inchannel = true; } - - elseif ($el == 'item' or $el == 'entry' ) + + elseif ($el == 'item' or $el == 'entry' ) { $this->initem = true; if ( isset($attrs['rdf:about']) ) { - $this->current_item['about'] = $attrs['rdf:about']; + $this->current_item['about'] = $attrs['rdf:about']; } } - elseif ( - $this->feed_type == RSS and - $this->current_namespace == '' and - $el == 'textinput' ) + elseif ( + $this->feed_type == RSS and + $this->current_namespace == '' and + $el == 'textinput' ) { # else we're in the default namespace of an RSS feed, # record textinput or image fields - + $this->intextinput = true; } elseif ( - $this->feed_type == RSS and - $this->current_namespace == '' and - $el == 'image' ) + $this->feed_type == RSS and + $this->current_namespace == '' and + $el == 'image' ) { $this->inimage = true; } else { // set stack[0] to current element - + # Atom support many links per containing element. # Magpie treats link elements of type rel='alternate' # as being equivalent to RSS's simple link element. $atom_link = false; - + if ($this->feed_type == ATOM and $el == 'link') { $atom_link = true; if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') { $el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements } } - elseif ( $this->feed_type == ATOM and - in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) ) + elseif ( $this->feed_type == ATOM and + in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) ) { # handle atom content constructs - + # avoid clashing w/ RSS mod_content if ($el == 'content' ) { $el = 'atom_content'; @@ -291,7 +291,7 @@ class MagpieRSS { # (that will pass through some non-validating feeds; # but so what? this isn't a validating parser) # - + $this->incontent = array(); array_push($this->incontent, $el); // start a stack @@ -301,12 +301,12 @@ class MagpieRSS { $this->exclude_top = false; } } - elseif (($this->current_namespace=='xhtml' or + elseif (($this->current_namespace=='xhtml' or (isset($attrs['xmlns']) and $attrs['xmlns'] == 'http://www.w3.org/1999/xhtml')) and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS) ) { # Handle inline XHTML body elements --CWJ - + $this->current_namespace = 'xhtml'; $this->incontent = array(); array_push($this->incontent, $el); // start a stack @@ -328,11 +328,11 @@ class MagpieRSS { # this makes the baby Jesus cry, but we can't do it in normalize() # because we've made the element name for Atom links unpredictable # by tacking on the relation to the end. -CWJ - + if ($atom_link and isset($attrs['href'])) { $this->append($elpath, $attrs['href']); } - + # add attributes if (count($attrs) > 0) { $this->append($elpath.'@', join(',', array_keys($attrs))); @@ -346,7 +346,7 @@ class MagpieRSS { function feed_cdata ($p, $text) { - + if ($this->incontent) { $this->append_content( $text ); } @@ -355,7 +355,7 @@ class MagpieRSS { $this->append($current_el, $text); } } - + function feed_end_element ($p, $el) { $el = strtolower($el); @@ -363,11 +363,11 @@ class MagpieRSS { $opener = array_pop($this->incontent); # Don't get bamboozled by namespace voodoo - if (strpos($el, ':')) { - list($ns, $closer) = split(':', $el); + if (strpos($el, ':')) { + list($ns, $closer) = explode(':', $el); } - else { - $ns = false; $closer = $el; + else { + $ns = false; $closer = $el; } # Don't get bamboozled by our munging of , either @@ -382,14 +382,14 @@ class MagpieRSS { array_push($this->incontent, $opener); $this->append_content("<$el />"); } - elseif ($this->incontent) { + elseif ($this->incontent) { # in the content construct - + if ((count($this->incontent) > 1) or !$this->exclude_top) { $this->append_content(""); } } - else { + else { # shift the opening of the content construct off the normal stack array_shift( $this->stack ); } @@ -401,11 +401,11 @@ class MagpieRSS { $this->current_category = 0; } - elseif ($this->feed_type == RSS and + elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) { $this->intextinput = false; } - elseif ($this->feed_type == RSS and + elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) { $this->inimage = false; } @@ -415,20 +415,20 @@ class MagpieRSS { else { array_shift( $this->stack ); } - - if ( !$this->incontent ) { + + if ( !$this->incontent ) { # Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ $this->current_namespace = false; } } - + function concat (&$str1, $str2="") { if (!isset($str1) ) { $str1=""; } $str1 .= $str2; } - + function append_content($text) { if ( $this->initem ) { if ($this->current_namespace) { @@ -446,13 +446,13 @@ class MagpieRSS { } } } - + # smart append - field and namespace aware function append($el, $text) { if (!$el) { return; } - + if ( $this->current_namespace ) { if ( $this->initem ) { $this->concat( @@ -484,7 +484,7 @@ class MagpieRSS { elseif ($this->inchannel) { $this->concat( $this->channel[ $el ], $text ); } - + } } @@ -493,31 +493,31 @@ class MagpieRSS { if (!$el) { return; } - + if ( $this->current_namespace ) { if ( $this->initem ) { - if (!is_null($set)) { - $this->current_item[ $this->current_namespace ][ $el.'#' ] = $set; + if (!is_null($set)) { + $this->current_item[ $this->current_namespace ][ $el.'#' ] = $set; } - + $ret = (isset($this->current_item[ $this->current_namespace ][ $el.'#' ]) ? $this->current_item[ $this->current_namespace ][ $el.'#' ] : 0); } elseif ($this->inchannel) { - if (!is_null($set)) { - $this->channel[ $this->current_namespace ][ $el.'#' ] = $set; + if (!is_null($set)) { + $this->channel[ $this->current_namespace ][ $el.'#' ] = $set; } - + $ret = (isset($this->channel[ $this->current_namespace][ $el.'#' ]) ? $this->channel[ $this->current_namespace][ $el.'#' ] : 0); } } else { if ( $this->initem ) { - if (!is_null($set)) { - $this->current_item[ $el.'#' ] = $set; + if (!is_null($set)) { + $this->current_item[ $el.'#' ] = $set; } - + $ret = (isset($this->current_item[ $el.'#' ]) ? $this->current_item[ $el.'#' ] : 0); } @@ -525,12 +525,12 @@ class MagpieRSS { if (!is_null($set)) { $this->channel[ $el.'#' ] = $set; } - + $ret = (isset($this->channel[ $el.'#' ]) ? $this->channel[ $el.'#' ] : 0); } } - + return $ret; } @@ -540,12 +540,12 @@ class MagpieRSS { if (isset($source["{$id_from}@"])) { foreach (explode(',', $source["{$id_from}@"]) as $attr) { - if ($from=='link_enclosure' and $attr=='href') { + if ($from=='link_enclosure' and $attr=='href') { # from Atom $dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"]; $dest["{$id_to}"] = $source["{$id_from}@{$attr}"]; } - elseif ($from=='enclosure' and $attr=='url') { + elseif ($from=='enclosure' and $attr=='url') { # from RSS $dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"]; $dest["{$id_to}"] = $source["{$id_from}@{$attr}"]; @@ -562,8 +562,8 @@ class MagpieRSS { $id_to = $this->element_id($to, $i); # Atom 0.3 <=> Atom 1.0 - if ($this->feed_version >= 1.0) { - $used = 'uri'; $norm = 'url'; + if ($this->feed_version >= 1.0) { + $used = 'uri'; $norm = 'url'; } else { $used = 'url'; $norm = 'uri'; @@ -578,11 +578,11 @@ class MagpieRSS { if (isset($source["{$id}_email"])) { $rss_author = $source["{$id}_email"]; } - + if (isset($source["{$id}_name"])) { $rss_author = $source["{$id}_name"] . (isset($rss_author) ? " <$rss_author>" : ''); } - + if (isset($rss_author)) { $source[$id] = $rss_author; # goes to top-level author or contributor $dest[$id_to] = $rss_author; # goes to dc:creator or dc:contributor @@ -595,14 +595,14 @@ class MagpieRSS { $dc_id = $this->element_id($to, $i); # first normalize category elements: Atom 1.0 <=> RSS 2.0 - if ( isset($source["{$cat_id}@term"]) ) { + if ( isset($source["{$cat_id}@term"]) ) { # category identifier $source[$cat_id] = $source["{$cat_id}@term"]; } elseif ( $this->feed_type == RSS ) { $source["{$cat_id}@term"] = $source[$cat_id]; } - + if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy $source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"]; } elseif ( isset($source["{$cat_id}@domain"]) ) { @@ -612,7 +612,7 @@ class MagpieRSS { // Now put the identifier into dc:subject $dest[$dc_id] = $source[$cat_id]; } - + # ... or vice versa function normalize_dc_subject (&$source, $from, &$dest, $to, $i) { $dc_id = $this->element_id($from, $i); @@ -626,14 +626,14 @@ class MagpieRSS { # each of multiple elements is normalized properly. If you need to mess # with things like attributes or change formats or the like, pass it a # callback to handle each element. - + function normalize_element (&$source, $from, &$dest, $to, $via = NULL) { if (isset($source[$from]) or isset($source["{$from}#"])) { if (isset($source["{$from}#"])) { $n = $source["{$from}#"]; $dest["{$to}#"] = $source["{$from}#"]; } - else { + else { $n = 1; } @@ -652,7 +652,7 @@ class MagpieRSS { function normalize () { // if atom populate rss fields and normalize 0.3 and 1.0 feeds - + if ( $this->is_atom() ) { // Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!) if ($this->feed_version < 1.0) { @@ -665,13 +665,13 @@ class MagpieRSS { $this->normalize_element($this->channel, 'rights', $this->channel, 'copyright'); $this->normalize_element($this->channel, 'updated', $this->channel, 'modified'); } - + $this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person'); $this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person'); // Atom elements to RSS elements $this->normalize_element($this->channel, 'subtitle', $this->channel, 'description'); - + if ( isset($this->channel['logo']) ) { $this->normalize_element($this->channel, 'logo', $this->image, 'url'); $this->normalize_element($this->channel, 'link', $this->image, 'link'); @@ -708,13 +708,13 @@ class MagpieRSS { } $item["author#"] = $source["{$author}#"]; - + for ($au = 1; $au <= $item["author#"]; $au++) { $id_to = $this->element_id('author', $au); $id_from = $this->element_id($author, $au); - + $item[$id_to] = $source[$id_from]; - + foreach (array('name', 'email', 'uri', 'url') as $what) { if (isset($source["{$id_from}_{$what}"])) { $item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"]; @@ -731,21 +731,21 @@ class MagpieRSS { $this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure'); // Categories - if ( isset($item['category#']) ) { + if ( isset($item['category#']) ) { # Atom 1.0 categories to dc:subject and RSS 2.0 categories $this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category'); } - elseif ( isset($item['dc']['subject#']) ) { + elseif ( isset($item['dc']['subject#']) ) { # dc:subject to Atom 1.0 and RSS 2.0 categories $this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject'); } // Normalized item timestamp $atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated']; - + if ( $atom_date ) { $epoch = @parse_w3cdtf($atom_date); - + if ($epoch and $epoch > 0) { $item['date_timestamp'] = $epoch; } @@ -762,18 +762,18 @@ class MagpieRSS { for ( $i = 0; $i < count($this->items); $i++) { $item = $this->items[$i]; - + // RSS elements to Atom elements $this->normalize_element($item, 'description', $item, 'summary'); $this->normalize_element($item['content'], 'encoded', $item, 'atom_content'); $this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure'); // Categories - if ( isset($item['category#']) ) { + if ( isset($item['category#']) ) { # RSS 2.0 categories to dc:subject and Atom 1.0 categories $this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category'); } - elseif ( isset($item['dc']['subject#']) ) { + elseif ( isset($item['dc']['subject#']) ) { # dc:subject to Atom 1.0 and RSS 2.0 categories $this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject'); } @@ -781,7 +781,7 @@ class MagpieRSS { // Normalized item timestamp if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { $epoch = @parse_w3cdtf($item['dc']['date']); - + if ($epoch and $epoch > 0) { $item['date_timestamp'] = $epoch; } @@ -797,17 +797,17 @@ class MagpieRSS { } } } - - + + function is_rss () { if ( $this->feed_type == RSS ) { - return $this->feed_version; + return $this->feed_version; } else { return false; } } - + function is_atom() { if ( $this->feed_type == ATOM ) { return $this->feed_version; @@ -820,7 +820,7 @@ class MagpieRSS { # # return XML parser, and possibly re-encoded source # - + function create_parser($source, $out_enc, $in_enc, $detect) { if ( substr(phpversion(),0,1) == 5) { $parser = $this->php5_create_parser($in_enc, $detect); @@ -832,21 +832,21 @@ class MagpieRSS { $this->encoding = $out_enc; xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); } - + return array($parser, $source); } - + # # Instantiate an XML parser under PHP5 # # PHP5 will do a fine job of detecting input encoding - # if passed an empty string as the encoding. + # if passed an empty string as the encoding. # # All hail libxml2! # - + function php5_create_parser($in_enc, $detect) { - + # by default php5 does a fine job of detecting input encodings if(!$detect && $in_enc) { return xml_parser_create($in_enc); @@ -855,7 +855,7 @@ class MagpieRSS { return xml_parser_create(''); } } - + # # Instaniate an XML parser under PHP4 # @@ -870,12 +870,12 @@ class MagpieRSS { # The following code is based on SJM's work with FoF # @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss # - + function php4_create_parser($source, $in_enc, $detect) { if ( !$detect ) { return array(xml_parser_create($in_enc), $source); } - + if (!$in_enc) { if (preg_match('//m', $source, $m)) { $in_enc = strtoupper($m[1]); @@ -885,45 +885,45 @@ class MagpieRSS { $in_enc = 'UTF-8'; } } - + if ($this->known_encoding($in_enc)) { return array(xml_parser_create($in_enc), $source); } - + # the dectected encoding is not one of the simple encodings PHP knows - + # attempt to use the iconv extension to # cast the XML to a known encoding # @see http://php.net/iconv - + if (function_exists('iconv')) { $encoded_source = iconv($in_enc,'UTF-8', $source); if ($encoded_source) { return array(xml_parser_create('UTF-8'), $encoded_source); } } - + # iconv didn't work, try mb_convert_encoding # @see http://php.net/mbstring - + if( function_exists('mb_convert_encoding')) { $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); if ($encoded_source) { return array(xml_parser_create('UTF-8'), $encoded_source); } } - + # - # else + # else # - + $this->error("Feed is in an unsupported character encoding. ($in_enc) " . "You may see strange artifacts, and mangled characters.", E_USER_NOTICE); - + return array(xml_parser_create(), $source); } - + function known_encoding($enc) { $enc = strtoupper($enc); if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { @@ -936,16 +936,16 @@ class MagpieRSS { function error ($errormsg, $lvl=E_USER_WARNING) { // append PHP's error message if track_errors enabled - if ( isset($php_errormsg) ) { + if ( isset($php_errormsg) ) { $errormsg .= " ($php_errormsg)"; } if ( MAGPIE_DEBUG ) { - trigger_error( $errormsg, $lvl); + trigger_error( $errormsg, $lvl); } else { error_log( $errormsg, 0); } - + $notices = E_USER_NOTICE|E_NOTICE; if ( $lvl&$notices ) { $this->WARNING = $errormsg; @@ -965,7 +965,7 @@ function map_attrs($k, $v) { return "$k=\"$v\""; } -// patch to support medieval versions of PHP4.1.x, +// patch to support medieval versions of PHP4.1.x, // courtesy, Ryan Currie, ryan@digibliss.com if (!function_exists('array_change_key_case')) { diff --git a/scripts/magpie_debug.php b/scripts/magpie_debug.php index 28eec76..85aaad0 100755 --- a/scripts/magpie_debug.php +++ b/scripts/magpie_debug.php @@ -24,14 +24,14 @@ test_library_support(); $rss = fetch_rss( $url ); - + if ($rss) { echo "

Example Output

"; echo "Channel: " . $rss->channel['title'] . "

"; echo "

"; @@ -61,16 +61,16 @@ function test_library_support() { else { echo "OK: Found an XML parser.
\n"; } - + if ( ! function_exists('gzinflate') ) { echo "Warning: PHP compiled without Zlib support (--with-zlib). No support for GZIP encoding.
\n"; } else { echo "OK: Support for GZIP encoding.
\n"; } - + if ( ! (function_exists('iconv') and function_exists('mb_convert_encoding') ) ) { - echo "Warning: No support for iconv (--with-iconv) or multi-byte strings (--enable-mbstring)." . + echo "Warning: No support for iconv (--with-iconv) or multi-byte strings (--enable-mbstring)." . "No support character set munging.
\n"; } else {