diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/README b/README
new file mode 100644
index 0000000..6af7edb
--- /dev/null
+++ b/README
@@ -0,0 +1,48 @@
+NAME
+
+ MagpieRSS - a simple RSS integration tool
+
+SYNOPSIS
+
+ require_once(rss_fetch.inc);
+ $url = $_GET['url'];
+ $rss = fetch_rss( $url );
+
+ echo "Channel Title: " . $rss->channel['title'] . "
";
+ echo "
";
+ foreach ($rss->items as $item) {
+ $href = $item['link'];
+ $title = $item['title'];
+ echo "- $title
";
+ }
+ echo "
";
+
+DESCRIPTION
+
+ MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like",
+ and simple to use.
+
+ Some features include:
+
+ * supports RSS 0.9 - 1.0, with limited RSS 2.0 support
+ * supports namespaces, and modules, including mod_content and mod_event
+ * open minded [1]
+ * simple, functional interface, to object oriented backend parser
+ * automatic caching of parsed RSS objects makes its easy to integrate
+ * supports conditional GET with Last-Modified, and ETag
+ * uses constants for easy override of default behaviour
+ * heavily commented
+
+
+1. By open minded I mean Magpie will accept any tag it finds in good faith that
+ it was supposed to be here. For strict validation, look elsewhere.
+
+
+GETTING STARTED
+
+
+
+COPYRIGHT:
+ Copyright(c) 2002 kellan@protest.net. All rights reserved.
+ This software is released under the GNU General Public License.
+ Please read the disclaimer at the top of the Snoopy.class.inc file.
diff --git a/extlib/Snoopy.class.inc b/extlib/Snoopy.class.inc
index 95a63bb..94cf36b 100644
--- a/extlib/Snoopy.class.inc
+++ b/extlib/Snoopy.class.inc
@@ -38,7 +38,7 @@ http://snoopy.sourceforge.com
class Snoopy
{
/**** Public variables ****/
-
+
/* user definable vars */
var $host = "www.php.net"; // host name we are connecting to
@@ -62,15 +62,15 @@ class Snoopy
var $passcookies = true; // pass set cookies back through redirects
// NOTE: this currently does not respect
// dates, domains or paths.
-
+
var $user = ""; // user for http authentication
var $pass = ""; // password for http authentication
-
+
// http accept types
var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
-
+
var $results = ""; // where the content is put
-
+
var $error = ""; // error messages sent here
var $response_code = ""; // response code returned from server
var $headers = array(); // headers returned from server sent here
@@ -80,7 +80,7 @@ class Snoopy
// set to 0 to disallow timeouts
var $timed_out = false; // if a read operation timed out
var $status = 0; // http request status
-
+
var $curl_path = "/usr/bin/curl";
// Snoopy will use cURL for fetching
// SSL content if a full system path to
@@ -92,14 +92,14 @@ class Snoopy
// library functions built into php,
// as these functions are not stable
// as of this Snoopy release.
-
+
// send Accept-encoding: gzip?
- var $use_gzip = true;
-
- /**** Private variables ****/
-
+ var $use_gzip = true;
+
+ /**** Private variables ****/
+
var $_maxlinelen = 4096; // max line length (headers)
-
+
var $_httpmethod = "GET"; // default http request method
var $_httpversion = "HTTP/1.0"; // default http request version
var $_submit_method = "POST"; // default submit method
@@ -109,7 +109,7 @@ class Snoopy
var $_redirectdepth = 0; // increments on an http redirect
var $_frameurls = array(); // frame src urls
var $_framedepth = 0; // increments on frame depth
-
+
var $_isproxy = false; // set if using a proxy server
var $_fp_timeout = 30; // timeout for socket connection
@@ -124,14 +124,14 @@ class Snoopy
function fetch($URI)
{
-
+
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
$URI_PARTS = parse_url($URI);
if (!empty($URI_PARTS["user"]))
$this->user = $URI_PARTS["user"];
if (!empty($URI_PARTS["pass"]))
$this->pass = $URI_PARTS["pass"];
-
+
switch($URI_PARTS["scheme"])
{
case "http":
@@ -151,7 +151,7 @@ class Snoopy
// no proxy, send only the path
$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
}
-
+
$this->_disconnect($fp);
if($this->_redirectaddr)
@@ -174,7 +174,7 @@ class Snoopy
{
$frameurls = $this->_frameurls;
$this->_frameurls = array();
-
+
while(list(,$frameurl) = each($frameurls))
{
if($this->_framedepth < $this->maxframes)
@@ -185,13 +185,13 @@ class Snoopy
else
break;
}
- }
+ }
}
else
{
return false;
}
- return true;
+ return true;
break;
case "https":
if(!$this->curl_path || (!is_executable($this->curl_path))) {
@@ -244,15 +244,15 @@ class Snoopy
else
break;
}
- }
- return true;
+ }
+ return true;
break;
default:
// not a valid protocol
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
return false;
break;
- }
+ }
return true;
}
@@ -261,8 +261,8 @@ class Snoopy
/*======================================================================*\
Private functions
\*======================================================================*/
-
-
+
+
/*======================================================================*\
Function: _striplinks
Purpose: strip the hyperlinks from an html document
@@ -271,13 +271,13 @@ class Snoopy
\*======================================================================*/
function _striplinks($document)
- {
+ {
preg_match_all("'<\s*a\s+.*href\s*=\s* # find ]+)) # if quote found, match up to next matching
# quote, otherwise match up to next space
'isx",$document,$links);
-
+
// catenate the non-empty matches from the conditional subpattern
@@ -285,14 +285,14 @@ class Snoopy
{
if(!empty($val))
$match[] = $val;
- }
-
+ }
+
while(list($key,$val) = each($links[3]))
{
if(!empty($val))
$match[] = $val;
- }
-
+ }
+
// return the links
return $match;
}
@@ -305,18 +305,18 @@ class Snoopy
\*======================================================================*/
function _stripform($document)
- {
+ {
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
-
+
// catenate the matches
$match = implode("\r\n",$elements[0]);
-
+
// return the links
return $match;
}
-
-
+
+
/*======================================================================*\
Function: _striptext
Purpose: strip the text from an html document
@@ -326,11 +326,11 @@ class Snoopy
function _striptext($document)
{
-
+
// I didn't use preg eval (//e) since that is only available in PHP 4.0.
// so, list your entities one by one here. I included some of the
// more common ones.
-
+
$search = array("''si", // strip out javascript
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags
"'([\r\n])[\s]+'", // strip out white space
@@ -343,7 +343,7 @@ class Snoopy
"'&(cent|#162);'i",
"'&(pound|#163);'i",
"'&(copy|#169);'i"
- );
+ );
$replace = array( "",
"",
"\\1",
@@ -356,9 +356,9 @@ class Snoopy
chr(162),
chr(163),
chr(169));
-
+
$text = preg_replace($search,$replace,$document);
-
+
return $text;
}
@@ -372,23 +372,23 @@ class Snoopy
function _expandlinks($links,$URI)
{
-
+
preg_match("/^[^\?]+/",$URI,$match);
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
-
+
$search = array( "|^http://".preg_quote($this->host)."|i",
"|^(?!http://)(\/)?(?!mailto:)|i",
"|/\./|",
"|/[^\/]+/\.\./|"
);
-
+
$replace = array( "",
$match."/",
"/",
"/"
- );
-
+ );
+
$expandedLinks = preg_replace($search,$replace,$links);
return $expandedLinks;
@@ -401,25 +401,25 @@ class Snoopy
$fp the current open file pointer
$URI the full URI
$body body contents to send if any (POST)
- Output:
+ Output:
\*======================================================================*/
-
+
function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
{
if($this->passcookies && $this->_redirectaddr)
$this->setcookies();
-
+
$URI_PARTS = parse_url($URI);
if(empty($url))
$url = "/";
- $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
+ $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
if(!empty($this->agent))
$headers .= "User-Agent: ".$this->agent."\r\n";
if(!empty($this->host) && !isset($this->rawheaders['Host']))
$headers .= "Host: ".$this->host."\r\n";
if(!empty($this->accept))
$headers .= "Accept: ".$this->accept."\r\n";
-
+
if($this->use_gzip) {
// make sure PHP was built with --with-zlib
// and we can handle gzipp'ed data
@@ -429,18 +429,18 @@ class Snoopy
else {
trigger_error(
"use_gzip is on, but PHP was built without zlib support.".
- " Requesting file(s) without gzip encoding.",
+ " Requesting file(s) without gzip encoding.",
E_USER_NOTICE);
}
}
-
+
if(!empty($this->referer))
$headers .= "Referer: ".$this->referer."\r\n";
if(!empty($this->cookies))
- {
+ {
if(!is_array($this->cookies))
$this->cookies = (array)$this->cookies;
-
+
reset($this->cookies);
if ( count($this->cookies) > 0 ) {
$cookie_headers .= 'Cookie: ';
@@ -448,7 +448,7 @@ class Snoopy
$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
}
$headers .= substr($cookie_headers,0,-2) . "\r\n";
- }
+ }
}
if(!empty($this->rawheaders))
{
@@ -463,26 +463,26 @@ class Snoopy
$headers .= "; boundary=".$this->_mime_boundary;
$headers .= "\r\n";
}
- if(!empty($body))
+ if(!empty($body))
$headers .= "Content-length: ".strlen($body)."\r\n";
- if(!empty($this->user) || !empty($this->pass))
+ if(!empty($this->user) || !empty($this->pass))
$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
$headers .= "\r\n";
-
+
// set the read timeout if needed
if ($this->read_timeout > 0)
socket_set_timeout($fp, $this->read_timeout);
$this->timed_out = false;
-
+
fwrite($fp,$headers.$body,strlen($headers.$body));
-
+
$this->_redirectaddr = false;
unset($this->headers);
-
+
// content was returned gzip encoded?
$is_gzipped = false;
-
+
while($currentHeader = fgets($fp,$this->_maxlinelen))
{
if ($this->read_timeout > 0 && $this->_check_timeout($fp))
@@ -490,11 +490,11 @@ class Snoopy
$this->status=-100;
return false;
}
-
+
// if($currentHeader == "\r\n")
if(preg_match("/^\r?\n$/", $currentHeader) )
break;
-
+
// if a header begins with Location: or URI:, set the redirect
if(preg_match("/^(Location:|URI:)/i",$currentHeader))
{
@@ -514,20 +514,20 @@ class Snoopy
else
$this->_redirectaddr = $matches[2];
}
-
+
if(preg_match("|^HTTP/|",$currentHeader))
{
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
{
$this->status= $status[1];
- }
+ }
$this->response_code = $currentHeader;
}
-
+
if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
$is_gzipped = true;
}
-
+
$this->headers[] = $currentHeader;
}
@@ -540,25 +540,25 @@ class Snoopy
break;
}
}
-
+
// gunzip
if ( $is_gzipped ) {
// per http://www.php.net/manual/en/function.gzencode.php
$results = substr($results, 10);
$results = gzinflate($results);
}
-
+
if ($this->read_timeout > 0 && $this->_check_timeout($fp))
{
$this->status=-100;
return false;
}
-
+
// check if there is a a redirect meta tag
-
+
if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
{
- $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
+ $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
}
// have we hit our frame depth and is there frame src to fetch?
@@ -574,7 +574,7 @@ class Snoopy
// no framed content
else
$this->results = $results;
-
+
return true;
}
@@ -584,21 +584,21 @@ class Snoopy
Input: $url the url to fetch
$URI the full URI
$body body contents to send if any (POST)
- Output:
+ Output:
\*======================================================================*/
-
+
function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
{
if($this->passcookies && $this->_redirectaddr)
$this->setcookies();
- $headers = array();
-
+ $headers = array();
+
$URI_PARTS = parse_url($URI);
if(empty($url))
$url = "/";
// GET ... header not needed for curl
- //$headers[] = $http_method." ".$url." ".$this->_httpversion;
+ //$headers[] = $http_method." ".$url." ".$this->_httpversion;
if(!empty($this->agent))
$headers[] = "User-Agent: ".$this->agent;
if(!empty($this->host))
@@ -608,10 +608,10 @@ class Snoopy
if(!empty($this->referer))
$headers[] = "Referer: ".$this->referer;
if(!empty($this->cookies))
- {
+ {
if(!is_array($this->cookies))
$this->cookies = (array)$this->cookies;
-
+
reset($this->cookies);
if ( count($this->cookies) > 0 ) {
$cookie_str = 'Cookie: ';
@@ -634,48 +634,48 @@ class Snoopy
else
$headers[] = "Content-type: $content_type";
}
- if(!empty($body))
+ if(!empty($body))
$headers[] = "Content-length: ".strlen($body);
-# if(!empty($this->user) || !empty($this->pass))
+# if(!empty($this->user) || !empty($this->pass))
# $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
-
+
if ($this->user and $this->pass) {
$cmdline_params .= " -u '$this->user:$this->pass' ";
}
-
+
for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
$cmdline_params .= " -H \"".$headers[$curr_header]."\"";
}
-
+
if(!empty($body))
$cmdline_params .= " -d \"$body\"";
-
+
if($this->read_timeout > 0)
$cmdline_params .= " -m ".$this->read_timeout;
-
+
$headerfile = uniqid(time());
-
+
# accept self-signed certs
- $cmdline_params .= " -k";
+ $cmdline_params .= " -k";
exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return);
-
+
if($return)
{
$this->error = "Error: cURL could not retrieve the document, error $return.";
return false;
}
-
-
+
+
$results = implode("\r\n",$results);
-
+
$result_headers = file("/tmp/$headerfile");
-
+
$this->_redirectaddr = false;
unset($this->headers);
-
+
for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
{
-
+
// if a header begins with Location: or URI:, set the redirect
if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
{
@@ -695,7 +695,7 @@ class Snoopy
else
$this->_redirectaddr = $matches[2];
}
-
+
if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
{
$this->response_code = $result_headers[$currentHeader];
@@ -708,10 +708,10 @@ class Snoopy
}
// check if there is a a redirect meta tag
-
+
if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
{
- $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
+ $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
}
// have we hit our frame depth and is there frame src to fetch?
@@ -729,7 +729,7 @@ class Snoopy
$this->results = $results;
unlink("/tmp/$headerfile");
-
+
return true;
}
@@ -737,7 +737,7 @@ class Snoopy
Function: setcookies()
Purpose: set cookies for a redirection
\*======================================================================*/
-
+
function setcookies()
{
for($x=0; $xheaders); $x++)
@@ -747,7 +747,7 @@ class Snoopy
}
}
-
+
/*======================================================================*\
Function: _check_timeout
Purpose: checks whether timeout has occurred
@@ -771,7 +771,7 @@ class Snoopy
Purpose: make a socket connection
Input: $fp file pointer
\*======================================================================*/
-
+
function _connect(&$fp)
{
if(!empty($this->proxy_host) && !empty($this->proxy_port))
@@ -785,9 +785,9 @@ class Snoopy
$host = $this->host;
$port = $this->port;
}
-
+
$this->status = 0;
-
+
if($fp = fsockopen(
$host,
$port,
@@ -823,13 +823,13 @@ class Snoopy
Purpose: disconnect a socket connection
Input: $fp file pointer
\*======================================================================*/
-
+
function _disconnect($fp)
{
return(fclose($fp));
}
-
+
/*======================================================================*\
Function: _prepare_post_body
Purpose: Prepare post body according to encoding type
@@ -837,7 +837,7 @@ class Snoopy
$formfiles - form upload files
Output: post body
\*======================================================================*/
-
+
function _prepare_post_body($formvars, $formfiles)
{
settype($formvars, "array");
@@ -845,7 +845,7 @@ class Snoopy
if (count($formvars) == 0 && count($formfiles) == 0)
return;
-
+
switch ($this->_submit_type) {
case "application/x-www-form-urlencoded":
reset($formvars);
@@ -861,7 +861,7 @@ class Snoopy
case "multipart/form-data":
$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
-
+
reset($formvars);
while(list($key,$val) = each($formvars)) {
if (is_array($val) || is_object($val)) {
@@ -876,7 +876,7 @@ class Snoopy
$postdata .= "$val\r\n";
}
}
-
+
reset($formfiles);
while (list($field_name, $file_names) = each($formfiles)) {
settype($file_names, "array");
diff --git a/rss_fetch.inc b/rss_fetch.inc
index 84095e9..b5ea928 100644
--- a/rss_fetch.inc
+++ b/rss_fetch.inc
@@ -15,7 +15,7 @@
* magpierss-general@lists.sourceforge.net
*
*/
-
+
// Setup MAGPIE_DIR for use on hosts that don't include
// the current path in include_path.
// with thanks to rajiv and smarty
@@ -35,18 +35,18 @@ define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
-/*
+/*
* CONSTANTS - redefine these in your script to change the
* behaviour of fetch_rss() currently, most options effect the cache
*
- * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
- * For me a built in cache was essential to creating a "PHP-like"
+ * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
+ * For me a built in cache was essential to creating a "PHP-like"
* feel to Magpie, see rss_cache.inc for rationale
*
*
* MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
- * This should be a location that the webserver can write to. If this
- * directory does not already exist Mapie will try to be smart and create
+ * This should be a location that the webserver can write to. If this
+ * directory does not already exist Mapie will try to be smart and create
* it. This will often fail for permissions reasons.
*
*
@@ -62,20 +62,20 @@ require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
/*=======================================================================*\
- Function: fetch_rss:
+ Function: fetch_rss:
Purpose: return RSS object for the give url
maintain the cache
Input: url of RSS file
Output: parsed RSS object (see rss_parse.inc)
- NOTES ON CACHEING:
+ NOTES ON CACHEING:
If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
-
+
NOTES ON RETRIEVING REMOTE FILES:
If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
return a cached object, and touch the cache object upon recieving a
304.
-
+
NOTES ON FAILED REQUESTS:
If there is an HTTP error while fetching an RSS object, the cached
version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
@@ -85,15 +85,15 @@ define('MAGPIE_VERSION', '0.80a');
$MAGPIE_ERROR = "";
-function fetch_rss ($url) {
+function fetch_rss ($url, $cache_age=null) {
// initialize constants
init();
-
+
if ( !isset($url) ) {
error("fetch_rss called without a url");
return false;
}
-
+
// if cache is disabled
if ( !MAGPIE_CACHE_ON ) {
// fetch file, and parse it
@@ -105,7 +105,7 @@ function fetch_rss ($url) {
error("Failed to fetch $url and cache is off");
return false;
}
- }
+ }
// else cache is ON
else {
// Flow
@@ -113,28 +113,29 @@ function fetch_rss ($url) {
// 2. if there is a hit, make sure its fresh
// 3. if cached obj fails freshness check, fetch remote
// 4. if remote fails, return stale object, or error
-
- $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
-
+
+ $cache_age = $cache_age !== null ? $cache_age : MAGPIE_CACHE_AGE;
+ $cache = new RSSCache( MAGPIE_CACHE_DIR, $cache_age );
+
if (MAGPIE_DEBUG and $cache->ERROR) {
debug($cache->ERROR, E_USER_WARNING);
}
-
-
+
+
$cache_status = 0; // response of check_cache
$request_headers = array(); // HTTP headers to send with fetch
$rss = 0; // parsed RSS object
$errormsg = 0; // errors, if any
-
+
// store parsed XML by desired output encoding
// as character munging happens at parse time
$cache_key = $url . MAGPIE_OUTPUT_ENCODING;
-
+
if (!$cache->ERROR) {
// return cache HIT, MISS, or STALE
$cache_status = $cache->check_cache( $cache_key);
}
-
+
// if object cached, and cache is fresh, return cached obj
if ( $cache_status == 'HIT' ) {
$rss = $cache->get( $cache_key );
@@ -147,9 +148,9 @@ function fetch_rss ($url) {
return $rss;
}
}
-
+
// else attempt a conditional get
-
+
// setup headers
if ( $cache_status == 'STALE' ) {
$rss = $cache->get( $cache_key );
@@ -158,9 +159,9 @@ function fetch_rss ($url) {
$request_headers['If-Last-Modified'] = $rss->last_modified;
}
}
-
+
$resp = _fetch_remote_file( $url, $request_headers );
-
+
if (isset($resp) and $resp) {
if ($resp->status == '304' ) {
// we have the most current copy
@@ -190,7 +191,7 @@ function fetch_rss ($url) {
elseif ( $resp->error ) {
# compensate for Snoopy's annoying habbit to tacking
# on '\n'
- $http_error = substr($resp->error, 0, -2);
+ $http_error = substr($resp->error, 0, -2);
$errormsg .= "(HTTP Error: $http_error)";
}
else {
@@ -201,9 +202,9 @@ function fetch_rss ($url) {
else {
$errormsg = "Unable to retrieve RSS file for unknown reasons.";
}
-
+
// else fetch failed
-
+
// attempt to return cached object
if ($rss) {
if ( MAGPIE_DEBUG ) {
@@ -211,12 +212,12 @@ function fetch_rss ($url) {
}
return $rss;
}
-
+
// else we totally failed
- error( $errormsg );
-
+ error( $errormsg );
+
return false;
-
+
} // end if ( !MAGPIE_CACHE_ON ) {
} // end fetch_rss()
@@ -227,34 +228,34 @@ function fetch_rss ($url) {
function error ($errormsg, $lvl=E_USER_WARNING) {
global $MAGPIE_ERROR;
-
+
// append PHP's error message if track_errors enabled
- if ( isset($php_errormsg) ) {
+ if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( $errormsg ) {
$errormsg = "MagpieRSS: $errormsg";
$MAGPIE_ERROR = $errormsg;
- trigger_error( $errormsg, $lvl);
+ trigger_error( $errormsg, $lvl);
}
}
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
}
-
+
/*=======================================================================*\
Function: magpie_error
Purpose: accessor for the magpie error variable
\*=======================================================================*/
function magpie_error ($errormsg="") {
global $MAGPIE_ERROR;
-
- if ( isset($errormsg) and $errormsg ) {
+
+ if ( isset($errormsg) and $errormsg ) {
$MAGPIE_ERROR = $errormsg;
}
-
- return $MAGPIE_ERROR;
+
+ return $MAGPIE_ERROR;
}
/*=======================================================================*\
@@ -262,7 +263,7 @@ function magpie_error ($errormsg="") {
Purpose: retrieve an arbitrary remote file
Input: url of the remote file
headers to send along with the request (optional)
- Output: an HTTP response object (see Snoopy.class.inc)
+ Output: an HTTP response object (see Snoopy.class.inc)
\*=======================================================================*/
function _fetch_remote_file ($url, $headers = "" ) {
// Snoopy is an HTTP client in PHP
@@ -273,7 +274,7 @@ function _fetch_remote_file ($url, $headers = "" ) {
if (is_array($headers) ) {
$client->rawheaders = $headers;
}
-
+
@$client->fetch($url);
return $client;
@@ -287,10 +288,10 @@ function _fetch_remote_file ($url, $headers = "" ) {
\*=======================================================================*/
function _response_to_rss ($resp) {
$rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
-
- // if RSS parsed successfully
+
+ // if RSS parsed successfully
if ( $rss and !$rss->ERROR) {
-
+
// find Etag, and Last-Modified
foreach($resp->headers as $h) {
// 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
@@ -301,26 +302,26 @@ function _response_to_rss ($resp) {
$field = $h;
$val = "";
}
-
+
if ( $field == 'ETag' ) {
$rss->etag = $val;
}
-
+
if ( $field == 'Last-Modified' ) {
$rss->last_modified = $val;
}
}
-
- return $rss;
+
+ return $rss;
} // else construct error message
else {
$errormsg = "Failed to parse RSS file.";
-
+
if ($rss) {
$errormsg .= " (" . $rss->ERROR . ")";
}
error($errormsg);
-
+
return false;
} // end if ($rss and !$rss->error)
}
@@ -337,7 +338,7 @@ function init () {
else {
define('MAGPIE_INITALIZED', true);
}
-
+
if ( !defined('MAGPIE_CACHE_ON') ) {
define('MAGPIE_CACHE_ON', true);
}
@@ -357,39 +358,39 @@ function init () {
if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1');
}
-
+
if ( !defined('MAGPIE_INPUT_ENCODING') ) {
define('MAGPIE_INPUT_ENCODING', null);
}
-
+
if ( !defined('MAGPIE_DETECT_ENCODING') ) {
define('MAGPIE_DETECT_ENCODING', true);
}
-
+
if ( !defined('MAGPIE_DEBUG') ) {
define('MAGPIE_DEBUG', 0);
}
-
+
if ( !defined('MAGPIE_USER_AGENT') ) {
$ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
-
+
if ( MAGPIE_CACHE_ON ) {
$ua = $ua . ')';
}
else {
$ua = $ua . '; No cache)';
}
-
+
define('MAGPIE_USER_AGENT', $ua);
}
-
+
if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
}
-
+
// use gzip encoding to fetch rss files if supported?
if ( !defined('MAGPIE_USE_GZIP') ) {
- define('MAGPIE_USE_GZIP', true);
+ define('MAGPIE_USE_GZIP', true);
}
}
@@ -400,7 +401,7 @@ function init () {
HTTP STATUS CODE PREDICATES
These functions attempt to classify an HTTP status code
based on RFC 2616 and RFC 2518.
-
+
All of them take an HTTP status code as input, and return true or false
All this code is adapted from LWP's HTTP::Status.
@@ -411,48 +412,48 @@ function init () {
Function: is_info
Purpose: return true if Informational status code
\*=======================================================================*/
-function is_info ($sc) {
- return $sc >= 100 && $sc < 200;
+function is_info ($sc) {
+ return $sc >= 100 && $sc < 200;
}
/*=======================================================================*\
Function: is_success
Purpose: return true if Successful status code
\*=======================================================================*/
-function is_success ($sc) {
- return $sc >= 200 && $sc < 300;
+function is_success ($sc) {
+ return $sc >= 200 && $sc < 300;
}
/*=======================================================================*\
Function: is_redirect
Purpose: return true if Redirection status code
\*=======================================================================*/
-function is_redirect ($sc) {
- return $sc >= 300 && $sc < 400;
+function is_redirect ($sc) {
+ return $sc >= 300 && $sc < 400;
}
/*=======================================================================*\
Function: is_error
Purpose: return true if Error status code
\*=======================================================================*/
-function is_error ($sc) {
- return $sc >= 400 && $sc < 600;
+function is_error ($sc) {
+ return $sc >= 400 && $sc < 600;
}
/*=======================================================================*\
Function: is_client_error
Purpose: return true if Error status code, and its a client error
\*=======================================================================*/
-function is_client_error ($sc) {
- return $sc >= 400 && $sc < 500;
+function is_client_error ($sc) {
+ return $sc >= 400 && $sc < 500;
}
/*=======================================================================*\
Function: is_client_error
Purpose: return true if Error status code, and its a server error
\*=======================================================================*/
-function is_server_error ($sc) {
- return $sc >= 500 && $sc < 600;
+function is_server_error ($sc) {
+ return $sc >= 500 && $sc < 600;
}
?>
diff --git a/rss_parse.inc b/rss_parse.inc
index 666afab..5060880 100644
--- a/rss_parse.inc
+++ b/rss_parse.inc
@@ -37,7 +37,7 @@ require_once (MAGPIE_DIR . 'rss_utils.inc');
*/
class MagpieRSS {
var $parser;
-
+
var $current_item = array(); // item currently being parsed
var $items = array(); // collection of parsed items
var $channel = array(); // hash of channel fields
@@ -46,81 +46,81 @@ class MagpieRSS {
var $feed_type;
var $feed_version;
var $encoding = ''; // output encoding of parsed rss
-
+
var $_source_encoding = ''; // only set if we have to parse xml prolog
-
+
var $ERROR = "";
var $WARNING = "";
-
+
# define some constants
-
+
var $_ATOM_CONTENT_CONSTRUCTS = array(
'content', 'summary', 'title', /* common */
'info', 'tagline', 'copyright', /* Atom 0.3 */
'rights', 'subtitle', /* Atom 1.0 */
);
-
+
var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div');
var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
# parser variables, useless if you're not a parser, treat as private
-
+
var $stack = array(); # parser stack
var $inchannel = false;
var $initem = false;
-
+
var $incontent = array(); # non-empty if in namespaced XML content field
var $exclude_top = false; # true when Atom 1.0 type="xhtml"
var $intextinput = false;
var $inimage = false;
var $current_namespace = false;
-
+
/**
* Set up XML parser, parse source, and return populated RSS object..
- *
+ *
* @param string $source string containing the RSS to be parsed
*
* NOTE: Probably a good idea to leave the encoding options alone unless
* you know what you're doing as PHP's character set support is
* a little weird.
*
- * NOTE: A lot of this is unnecessary but harmless with PHP5
+ * NOTE: A lot of this is unnecessary but harmless with PHP5
*
*
- * @param string $output_encoding output the parsed RSS in this character
+ * @param string $output_encoding output the parsed RSS in this character
* set defaults to ISO-8859-1 as this is PHP's
* default.
*
* NOTE: might be changed to UTF-8 in future
* versions.
- *
- * @param string $input_encoding the character set of the incoming RSS source.
+ *
+ * @param string $input_encoding the character set of the incoming RSS source.
* Leave blank and Magpie will try to figure it
* out.
- *
- *
+ *
+ *
* @param bool $detect_encoding if false Magpie won't attempt to detect
* source encoding. (caveat emptor)
*
*/
- function MagpieRSS ($source, $output_encoding='ISO-8859-1',
- $input_encoding=null, $detect_encoding=true)
- {
+ function MagpieRSS ($source, $output_encoding='ISO-8859-1',
+ $input_encoding=null, $detect_encoding=true)
+ {
#
# if PHP xml isn't compiled in, die
#
-
+
if (!function_exists('xml_parser_create')) {
- $this->error( "Failed to load PHP's XML Extension. " .
+ $this->error( "Failed to load PHP's XML Extension. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
-
- list($parser, $source) = $this->create_parser($source,
+
+ list($parser, $source) = $this->create_parser($source,
$output_encoding, $input_encoding, $detect_encoding);
-
-
+
+
if (!is_resource($parser)) {
$this->error( "Failed to create an instance of PHP's XML parser. " .
"http://www.php.net/manual/en/ref.xml.php",
@@ -128,20 +128,20 @@ class MagpieRSS {
}
$this->parser = $parser;
-
+
#
# pass in parser, and a reference to this object
# setup handlers
#
-
+
xml_set_object( $this->parser, $this );
- xml_set_element_handler($this->parser,
+ xml_set_element_handler($this->parser,
'feed_start_element', 'feed_end_element' );
-
- xml_set_character_data_handler( $this->parser, 'feed_cdata' );
-
+
+ xml_set_character_data_handler( $this->parser, 'feed_cdata' );
+
$status = xml_parse( $this->parser, $source ); # parse the feed
-
+
if (! $status ) {
$errorcode = xml_get_error_code( $this->parser );
if ( $errorcode != XML_ERROR_NONE ) {
@@ -153,36 +153,36 @@ class MagpieRSS {
$this->error( $errormsg );
}
}
-
+
xml_parser_free( $this->parser );
$this->normalize();
}
-
+
function feed_start_element($p, $element, &$attrs) {
$el = $element = strtolower($element);
$attrs = array_change_key_case($attrs, CASE_LOWER);
-
- # check for a namespace, and split if found
+
+ # check for a namespace, and explode if found
# only if we're not inside a content tag
-
- if ( empty($this->incontent) ) {
+
+ if ( empty($this->incontent) ) {
$ns = false;
-
+
if ( strpos( $element, ':' ) ) {
- list($ns, $el) = split( ':', $element, 2);
+ list($ns, $el) = explode( ':', $element, 2);
}
-
+
if ( $ns and $ns != 'rdf' ) {
$this->current_namespace = $ns;
}
}
- #
+ #
# if feed type isn't set, then this is first element of feed
# identify feed from root element
#
-
+
if (!isset($this->feed_type) ) {
if ( $el == 'rdf' ) {
$this->feed_type = RSS;
@@ -194,93 +194,93 @@ class MagpieRSS {
}
elseif ( $el == 'feed' ) {
$this->feed_type = ATOM;
-
+
if ($attrs['xmlns'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0
$this->feed_version = '1.0';
}
- else {
+ else {
# Atom 0.3, probably.
$this->feed_version = $attrs['version'];
}
-
+
$this->inchannel = true;
}
return;
}
-
+
#
# if we're inside a namespaced content construct, treat tags as text
#
-
+
if ( !empty($this->incontent) ) {
if ((count($this->incontent) > 1) or !$this->exclude_top) {
-
+
# if tags are inlined, then flatten
-
- $attrs_str = join(' ',
- array_map('map_attrs',
- array_keys($attrs),
+
+ $attrs_str = join(' ',
+ array_map('map_attrs',
+ array_keys($attrs),
array_values($attrs))
);
-
- if (strlen($attrs_str)) {
- $attrs_str = ' '.$attrs_str;
+
+ if (strlen($attrs_str)) {
+ $attrs_str = ' '.$attrs_str;
}
-
+
$this->append_content( "<{$element}{$attrs_str}>" );
}
array_push($this->incontent, $el); # stack for parsing content XML
- }
-
+ }
+
elseif ( $el == 'channel' ) {
$this->inchannel = true;
}
-
- elseif ($el == 'item' or $el == 'entry' )
+
+ elseif ($el == 'item' or $el == 'entry' )
{
$this->initem = true;
if ( isset($attrs['rdf:about']) ) {
- $this->current_item['about'] = $attrs['rdf:about'];
+ $this->current_item['about'] = $attrs['rdf:about'];
}
}
- elseif (
- $this->feed_type == RSS and
- $this->current_namespace == '' and
- $el == 'textinput' )
+ elseif (
+ $this->feed_type == RSS and
+ $this->current_namespace == '' and
+ $el == 'textinput' )
{
# else we're in the default namespace of an RSS feed,
# record textinput or image fields
-
+
$this->intextinput = true;
}
elseif (
- $this->feed_type == RSS and
- $this->current_namespace == '' and
- $el == 'image' )
+ $this->feed_type == RSS and
+ $this->current_namespace == '' and
+ $el == 'image' )
{
$this->inimage = true;
}
else {
// set stack[0] to current element
-
+
# Atom support many links per containing element.
# Magpie treats link elements of type rel='alternate'
# as being equivalent to RSS's simple link element.
$atom_link = false;
-
+
if ($this->feed_type == ATOM and $el == 'link') {
$atom_link = true;
if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') {
$el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements
}
}
- elseif ( $this->feed_type == ATOM and
- in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) )
+ elseif ( $this->feed_type == ATOM and
+ in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) )
{
# handle atom content constructs
-
+
# avoid clashing w/ RSS mod_content
if ($el == 'content' ) {
$el = 'atom_content';
@@ -291,7 +291,7 @@ class MagpieRSS {
# (that will pass through some non-validating feeds;
# but so what? this isn't a validating parser)
#
-
+
$this->incontent = array();
array_push($this->incontent, $el); // start a stack
@@ -301,12 +301,12 @@ class MagpieRSS {
$this->exclude_top = false;
}
}
- elseif (($this->current_namespace=='xhtml' or
+ elseif (($this->current_namespace=='xhtml' or
(isset($attrs['xmlns']) and $attrs['xmlns'] == 'http://www.w3.org/1999/xhtml'))
and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS) )
{
# Handle inline XHTML body elements --CWJ
-
+
$this->current_namespace = 'xhtml';
$this->incontent = array();
array_push($this->incontent, $el); // start a stack
@@ -328,11 +328,11 @@ class MagpieRSS {
# this makes the baby Jesus cry, but we can't do it in normalize()
# because we've made the element name for Atom links unpredictable
# by tacking on the relation to the end. -CWJ
-
+
if ($atom_link and isset($attrs['href'])) {
$this->append($elpath, $attrs['href']);
}
-
+
# add attributes
if (count($attrs) > 0) {
$this->append($elpath.'@', join(',', array_keys($attrs)));
@@ -346,7 +346,7 @@ class MagpieRSS {
function feed_cdata ($p, $text) {
-
+
if ($this->incontent) {
$this->append_content( $text );
}
@@ -355,7 +355,7 @@ class MagpieRSS {
$this->append($current_el, $text);
}
}
-
+
function feed_end_element ($p, $el) {
$el = strtolower($el);
@@ -363,11 +363,11 @@ class MagpieRSS {
$opener = array_pop($this->incontent);
# Don't get bamboozled by namespace voodoo
- if (strpos($el, ':')) {
- list($ns, $closer) = split(':', $el);
+ if (strpos($el, ':')) {
+ list($ns, $closer) = explode(':', $el);
}
- else {
- $ns = false; $closer = $el;
+ else {
+ $ns = false; $closer = $el;
}
# Don't get bamboozled by our munging of , either
@@ -382,14 +382,14 @@ class MagpieRSS {
array_push($this->incontent, $opener);
$this->append_content("<$el />");
}
- elseif ($this->incontent) {
+ elseif ($this->incontent) {
# in the content construct
-
+
if ((count($this->incontent) > 1) or !$this->exclude_top) {
$this->append_content("$el>");
}
}
- else {
+ else {
# shift the opening of the content construct off the normal stack
array_shift( $this->stack );
}
@@ -401,11 +401,11 @@ class MagpieRSS {
$this->current_category = 0;
}
- elseif ($this->feed_type == RSS and
+ elseif ($this->feed_type == RSS and
$this->current_namespace == '' and $el == 'textinput' ) {
$this->intextinput = false;
}
- elseif ($this->feed_type == RSS and
+ elseif ($this->feed_type == RSS and
$this->current_namespace == '' and $el == 'image' ) {
$this->inimage = false;
}
@@ -415,20 +415,20 @@ class MagpieRSS {
else {
array_shift( $this->stack );
}
-
- if ( !$this->incontent ) {
+
+ if ( !$this->incontent ) {
# Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ
$this->current_namespace = false;
}
}
-
+
function concat (&$str1, $str2="") {
if (!isset($str1) ) {
$str1="";
}
$str1 .= $str2;
}
-
+
function append_content($text) {
if ( $this->initem ) {
if ($this->current_namespace) {
@@ -446,13 +446,13 @@ class MagpieRSS {
}
}
}
-
+
# smart append - field and namespace aware
function append($el, $text) {
if (!$el) {
return;
}
-
+
if ( $this->current_namespace ) {
if ( $this->initem ) {
$this->concat(
@@ -484,7 +484,7 @@ class MagpieRSS {
elseif ($this->inchannel) {
$this->concat( $this->channel[ $el ], $text );
}
-
+
}
}
@@ -493,31 +493,31 @@ class MagpieRSS {
if (!$el) {
return;
}
-
+
if ( $this->current_namespace ) {
if ( $this->initem ) {
- if (!is_null($set)) {
- $this->current_item[ $this->current_namespace ][ $el.'#' ] = $set;
+ if (!is_null($set)) {
+ $this->current_item[ $this->current_namespace ][ $el.'#' ] = $set;
}
-
+
$ret = (isset($this->current_item[ $this->current_namespace ][ $el.'#' ]) ?
$this->current_item[ $this->current_namespace ][ $el.'#' ] : 0);
}
elseif ($this->inchannel) {
- if (!is_null($set)) {
- $this->channel[ $this->current_namespace ][ $el.'#' ] = $set;
+ if (!is_null($set)) {
+ $this->channel[ $this->current_namespace ][ $el.'#' ] = $set;
}
-
+
$ret = (isset($this->channel[ $this->current_namespace][ $el.'#' ]) ?
$this->channel[ $this->current_namespace][ $el.'#' ] : 0);
}
}
else {
if ( $this->initem ) {
- if (!is_null($set)) {
- $this->current_item[ $el.'#' ] = $set;
+ if (!is_null($set)) {
+ $this->current_item[ $el.'#' ] = $set;
}
-
+
$ret = (isset($this->current_item[ $el.'#' ]) ?
$this->current_item[ $el.'#' ] : 0);
}
@@ -525,12 +525,12 @@ class MagpieRSS {
if (!is_null($set)) {
$this->channel[ $el.'#' ] = $set;
}
-
+
$ret = (isset($this->channel[ $el.'#' ]) ?
$this->channel[ $el.'#' ] : 0);
}
}
-
+
return $ret;
}
@@ -540,12 +540,12 @@ class MagpieRSS {
if (isset($source["{$id_from}@"])) {
foreach (explode(',', $source["{$id_from}@"]) as $attr) {
- if ($from=='link_enclosure' and $attr=='href') {
+ if ($from=='link_enclosure' and $attr=='href') {
# from Atom
$dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"];
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
}
- elseif ($from=='enclosure' and $attr=='url') {
+ elseif ($from=='enclosure' and $attr=='url') {
# from RSS
$dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"];
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
@@ -562,8 +562,8 @@ class MagpieRSS {
$id_to = $this->element_id($to, $i);
# Atom 0.3 <=> Atom 1.0
- if ($this->feed_version >= 1.0) {
- $used = 'uri'; $norm = 'url';
+ if ($this->feed_version >= 1.0) {
+ $used = 'uri'; $norm = 'url';
}
else {
$used = 'url'; $norm = 'uri';
@@ -578,11 +578,11 @@ class MagpieRSS {
if (isset($source["{$id}_email"])) {
$rss_author = $source["{$id}_email"];
}
-
+
if (isset($source["{$id}_name"])) {
$rss_author = $source["{$id}_name"] . (isset($rss_author) ? " <$rss_author>" : '');
}
-
+
if (isset($rss_author)) {
$source[$id] = $rss_author; # goes to top-level author or contributor
$dest[$id_to] = $rss_author; # goes to dc:creator or dc:contributor
@@ -595,14 +595,14 @@ class MagpieRSS {
$dc_id = $this->element_id($to, $i);
# first normalize category elements: Atom 1.0 <=> RSS 2.0
- if ( isset($source["{$cat_id}@term"]) ) {
+ if ( isset($source["{$cat_id}@term"]) ) {
# category identifier
$source[$cat_id] = $source["{$cat_id}@term"];
}
elseif ( $this->feed_type == RSS ) {
$source["{$cat_id}@term"] = $source[$cat_id];
}
-
+
if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy
$source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"];
} elseif ( isset($source["{$cat_id}@domain"]) ) {
@@ -612,7 +612,7 @@ class MagpieRSS {
// Now put the identifier into dc:subject
$dest[$dc_id] = $source[$cat_id];
}
-
+
# ... or vice versa
function normalize_dc_subject (&$source, $from, &$dest, $to, $i) {
$dc_id = $this->element_id($from, $i);
@@ -626,14 +626,14 @@ class MagpieRSS {
# each of multiple elements is normalized properly. If you need to mess
# with things like attributes or change formats or the like, pass it a
# callback to handle each element.
-
+
function normalize_element (&$source, $from, &$dest, $to, $via = NULL) {
if (isset($source[$from]) or isset($source["{$from}#"])) {
if (isset($source["{$from}#"])) {
$n = $source["{$from}#"];
$dest["{$to}#"] = $source["{$from}#"];
}
- else {
+ else {
$n = 1;
}
@@ -652,7 +652,7 @@ class MagpieRSS {
function normalize () {
// if atom populate rss fields and normalize 0.3 and 1.0 feeds
-
+
if ( $this->is_atom() ) {
// Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!)
if ($this->feed_version < 1.0) {
@@ -665,13 +665,13 @@ class MagpieRSS {
$this->normalize_element($this->channel, 'rights', $this->channel, 'copyright');
$this->normalize_element($this->channel, 'updated', $this->channel, 'modified');
}
-
+
$this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person');
$this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person');
// Atom elements to RSS elements
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'description');
-
+
if ( isset($this->channel['logo']) ) {
$this->normalize_element($this->channel, 'logo', $this->image, 'url');
$this->normalize_element($this->channel, 'link', $this->image, 'link');
@@ -708,13 +708,13 @@ class MagpieRSS {
}
$item["author#"] = $source["{$author}#"];
-
+
for ($au = 1; $au <= $item["author#"]; $au++) {
$id_to = $this->element_id('author', $au);
$id_from = $this->element_id($author, $au);
-
+
$item[$id_to] = $source[$id_from];
-
+
foreach (array('name', 'email', 'uri', 'url') as $what) {
if (isset($source["{$id_from}_{$what}"])) {
$item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"];
@@ -731,21 +731,21 @@ class MagpieRSS {
$this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure');
// Categories
- if ( isset($item['category#']) ) {
+ if ( isset($item['category#']) ) {
# Atom 1.0 categories to dc:subject and RSS 2.0 categories
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
}
- elseif ( isset($item['dc']['subject#']) ) {
+ elseif ( isset($item['dc']['subject#']) ) {
# dc:subject to Atom 1.0 and RSS 2.0 categories
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
}
// Normalized item timestamp
$atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated'];
-
+
if ( $atom_date ) {
$epoch = @parse_w3cdtf($atom_date);
-
+
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
@@ -762,18 +762,18 @@ class MagpieRSS {
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
-
+
// RSS elements to Atom elements
$this->normalize_element($item, 'description', $item, 'summary');
$this->normalize_element($item['content'], 'encoded', $item, 'atom_content');
$this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure');
// Categories
- if ( isset($item['category#']) ) {
+ if ( isset($item['category#']) ) {
# RSS 2.0 categories to dc:subject and Atom 1.0 categories
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
}
- elseif ( isset($item['dc']['subject#']) ) {
+ elseif ( isset($item['dc']['subject#']) ) {
# dc:subject to Atom 1.0 and RSS 2.0 categories
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
}
@@ -781,7 +781,7 @@ class MagpieRSS {
// Normalized item timestamp
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
$epoch = @parse_w3cdtf($item['dc']['date']);
-
+
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
@@ -797,17 +797,17 @@ class MagpieRSS {
}
}
}
-
-
+
+
function is_rss () {
if ( $this->feed_type == RSS ) {
- return $this->feed_version;
+ return $this->feed_version;
}
else {
return false;
}
}
-
+
function is_atom() {
if ( $this->feed_type == ATOM ) {
return $this->feed_version;
@@ -820,7 +820,7 @@ class MagpieRSS {
#
# return XML parser, and possibly re-encoded source
#
-
+
function create_parser($source, $out_enc, $in_enc, $detect) {
if ( substr(phpversion(),0,1) == 5) {
$parser = $this->php5_create_parser($in_enc, $detect);
@@ -832,21 +832,21 @@ class MagpieRSS {
$this->encoding = $out_enc;
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
}
-
+
return array($parser, $source);
}
-
+
#
# Instantiate an XML parser under PHP5
#
# PHP5 will do a fine job of detecting input encoding
- # if passed an empty string as the encoding.
+ # if passed an empty string as the encoding.
#
# All hail libxml2!
#
-
+
function php5_create_parser($in_enc, $detect) {
-
+
# by default php5 does a fine job of detecting input encodings
if(!$detect && $in_enc) {
return xml_parser_create($in_enc);
@@ -855,7 +855,7 @@ class MagpieRSS {
return xml_parser_create('');
}
}
-
+
#
# Instaniate an XML parser under PHP4
#
@@ -870,12 +870,12 @@ class MagpieRSS {
# The following code is based on SJM's work with FoF
# @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
#
-
+
function php4_create_parser($source, $in_enc, $detect) {
if ( !$detect ) {
return array(xml_parser_create($in_enc), $source);
}
-
+
if (!$in_enc) {
if (preg_match('//m', $source, $m)) {
$in_enc = strtoupper($m[1]);
@@ -885,45 +885,45 @@ class MagpieRSS {
$in_enc = 'UTF-8';
}
}
-
+
if ($this->known_encoding($in_enc)) {
return array(xml_parser_create($in_enc), $source);
}
-
+
# the dectected encoding is not one of the simple encodings PHP knows
-
+
# attempt to use the iconv extension to
# cast the XML to a known encoding
# @see http://php.net/iconv
-
+
if (function_exists('iconv')) {
$encoded_source = iconv($in_enc,'UTF-8', $source);
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
-
+
# iconv didn't work, try mb_convert_encoding
# @see http://php.net/mbstring
-
+
if( function_exists('mb_convert_encoding')) {
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
-
+
#
- # else
+ # else
#
-
+
$this->error("Feed is in an unsupported character encoding. ($in_enc) " .
"You may see strange artifacts, and mangled characters.",
E_USER_NOTICE);
-
+
return array(xml_parser_create(), $source);
}
-
+
function known_encoding($enc) {
$enc = strtoupper($enc);
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
@@ -936,16 +936,16 @@ class MagpieRSS {
function error ($errormsg, $lvl=E_USER_WARNING) {
// append PHP's error message if track_errors enabled
- if ( isset($php_errormsg) ) {
+ if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( MAGPIE_DEBUG ) {
- trigger_error( $errormsg, $lvl);
+ trigger_error( $errormsg, $lvl);
}
else {
error_log( $errormsg, 0);
}
-
+
$notices = E_USER_NOTICE|E_NOTICE;
if ( $lvl&$notices ) {
$this->WARNING = $errormsg;
@@ -965,7 +965,7 @@ function map_attrs($k, $v) {
return "$k=\"$v\"";
}
-// patch to support medieval versions of PHP4.1.x,
+// patch to support medieval versions of PHP4.1.x,
// courtesy, Ryan Currie, ryan@digibliss.com
if (!function_exists('array_change_key_case')) {
diff --git a/scripts/magpie_debug.php b/scripts/magpie_debug.php
index 28eec76..85aaad0 100755
--- a/scripts/magpie_debug.php
+++ b/scripts/magpie_debug.php
@@ -24,14 +24,14 @@
test_library_support();
$rss = fetch_rss( $url );
-
+
if ($rss) {
echo "Example Output
";
echo "Channel: " . $rss->channel['title'] . "";
echo "
";
foreach ($rss->items as $item) {
$href = $item['link'];
- $title = $item['title'];
+ $title = $item['title'];
echo "- $title
";
}
echo "
";
@@ -61,16 +61,16 @@ function test_library_support() {
else {
echo "OK: Found an XML parser.
\n";
}
-
+
if ( ! function_exists('gzinflate') ) {
echo "Warning: PHP compiled without Zlib support (--with-zlib). No support for GZIP encoding.
\n";
}
else {
echo "OK: Support for GZIP encoding.
\n";
}
-
+
if ( ! (function_exists('iconv') and function_exists('mb_convert_encoding') ) ) {
- echo "Warning: No support for iconv (--with-iconv) or multi-byte strings (--enable-mbstring)." .
+ echo "Warning: No support for iconv (--with-iconv) or multi-byte strings (--enable-mbstring)." .
"No support character set munging.
\n";
}
else {