Magpie with feedparser

Summit - September 19, 2007 - 07:42
Project:Feedparser
Version:5.x-1.x-dev
Component:feedaggregator_node
Category:feature request
Priority:normal
Assigned:Unassigned
Status:active
Description

Hi,

I would like to use Magpie instead of Simplepie, because I would like to use productfeeds which are not rss 2.0, atom or other newsfeeds.

I found this code that can help may be:

<?php
/*
* Project:     XML for MagpieRSS: a simple RSS integration tool
* File:        xml_fetch.inc, a simple functional interface
                to fetching and parsing XML
* Author:      Bram Brambring  <bram@brambring.nl>
* License:     GPL
* Script requires an installation of MagpieRSS
* Tested with magpie 1.72
* The lastest version of MagpieRSS can be obtained from:
* <a href="http://magpierss.sourceforge.net
" title="http://magpierss.sourceforge.net
" rel="nofollow">http://magpierss.sourceforge.net
</a> *
*/


require_once( MAGPIE_DIR . 'rss_fetch.inc' );
require_once(
MAGPIE_DIR . 'rss_cache.inc' );



/*=======================================================================*\
    Function: xml_fetch:
    Purpose:  returns  object for the give url
              maintain the cache
    Input:    url of RSS file
              XML class
              cache age
    Output:   parsed RSS object (see rss_parse.inc)
    NOTES:
     see rss_fetch.inc
\*=======================================================================*/


function xml_fetch ($url,$class='MagpieRSS',$cache_age=MAGPIE_CACHE_AGE) {
   
// initialize constants
   
init();
    if ( !isset(
$url) ) {
       
error("fetch_rss called without a url");
        return
false;
    }
   
   
// if cache is disabled
   
if ( !MAGPIE_CACHE_ON ) {
       
// fetch file, and parse it
       
$resp = _fetch_remote_file( $url );
        if (
is_success( $resp->status ) ) {
            return
_response( $resp,$class );
        }
        else {
           
error("Failed to fetch $url and cache is off");
            return
false;
        }
    }
   
// else cache is ON
   
else {
       
// Flow
        // 1. check cache
        // 2. if there is a hit, make sure its fresh
        // 3. if cached obj fails freshness check, fetch remote
        // 4. if remote fails, return stale object, or error
       
       
$cache = new RSSCache( MAGPIE_CACHE_DIR, $cache_age );
       
        if (
MAGPIE_DEBUG and $cache->ERROR) {
           
debug($cache->ERROR, E_USER_WARNING);
        }
       
       
       
$cache_status    = 0;       // response of check_cache
       
$request_headers = array(); // HTTP headers to send with fetch
       
$rss             = 0;       // parsed RSS object
       
$errormsg        = 0;       // errors, if any
       
        // store parsed XML by desired output encoding
        // as character munging happens at parse time
       
$cache_key       = $url . MAGPIE_OUTPUT_ENCODING;
       
        if (!
$cache->ERROR) {
           
// return cache HIT, MISS, or STALE
           
$cache_status = $cache->check_cache( $cache_key);
        }
               
       
// if object cached, and cache is fresh, return cached obj
       
if ( $cache_status == 'HIT' ) {
           
$rss = $cache->get( $cache_key );
            if ( isset(
$rss) and $rss ) {
               
// should be cache age
               
$rss->from_cache = 1;
                if (
MAGPIE_DEBUG > 1) {
                   
debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
                }
                return
$rss;
            }
        }
       
       
// else attempt a conditional get
       
        // setup headers
       
if ( $cache_status == 'STALE' ) {
           
$rss = $cache->get( $cache_key );
            if (
$rss and $rss->etag and $rss->last_modified ) {
               
$request_headers['If-None-Match'] = $rss->etag;
               
$request_headers['If-Last-Modified'] = $rss->last_modified;
            }
        }
       
       
$resp = _fetch_remote_file( $url, $request_headers );
       
        if (isset(
$resp) and $resp) {
          if (
$resp->status == '304' ) {
               
// we have the most current copy
               
if ( MAGPIE_DEBUG > 1) {
                   
debug("Got 304 for $url");
                }
               
// reset cache on 304 (at minutillo insistent prodding)
               
$cache->set($cache_key, $rss);
                return
$rss;
            }
            elseif (
is_success( $resp->status ) ) {
               
$rss = _response( $resp,$class  );
                if (
$rss ) {
                    if (
MAGPIE_DEBUG > 1) {
                       
debug("Fetch successful");
                    }
                   
// add object to cache
                   
$cache->set( $cache_key, $rss );
                    return
$rss;
                }
            }
            else {
               
$errormsg = "Failed to fetch $url ";
                if (
$resp->status == '-100' ) {
                   
$errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
                }
                elseif (
$resp->error ) {
                   
# compensate for Snoopy's annoying habbit to tacking
                    # on '\n'
                   
$http_error = substr($resp->error, 0, -2);
                   
$errormsg .= "(HTTP Error: $http_error)";
                }
                else {
                   
$errormsg .=  "(HTTP Response: " . $resp->response_code .')';
                }
            }
        }
        else {
           
$errormsg = "Unable to retrieve RSS file for unknown reasons.";
        }
       
       
// else fetch failed
       
        // attempt to return cached object
       
if ($rss) {
            if (
MAGPIE_DEBUG ) {
               
debug("Returning STALE object for $url");
            }
            return
$rss;
        }
       
       
// else we totally failed
       
error( $errormsg );
       
        return
false;
       
    }
// end if ( !MAGPIE_CACHE_ON )
} // end fetch_rss()


/*=======================================================================*\
    Function:   _response
    Purpose:    parse an HTTP response object into an RSS object
    Input:      an HTTP response object (see Snoopy)
    Output:     parsed RSS object (see rss_parse)
\*=======================================================================*/
function _response ($resp,$class) {
   
$rss = new $class( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
   
   
// if RSS parsed successfully      
   
if ( $rss and !$rss->ERROR) {
       
       
// find Etag, and Last-Modified
       
foreach($resp->headers as $h) {
           
// 2003-03-02 - Nicola Asuni (<a href="http://www.tecnick.com" title="www.tecnick.com" rel="nofollow">www.tecnick.com</a>) - fixed bug "Undefined offset: 1"
           
if (strpos($h, ": ")) {
                list(
$field, $val) = explode(": ", $h, 2);
            }
            else {
               
$field = $h;
               
$val = "";
            }
           
            if (
$field == 'ETag' ) {
               
$rss->etag = $val;
            }
           
            if (
$field == 'Last-Modified' ) {
               
$rss->last_modified = $val;
            }
        }
       
        return
$rss;   
    }
// else construct error message
   
else {
       
$errormsg = "Failed to parse RSS file.";
       
        if (
$rss) {
           
$errormsg .= " (" . $rss->ERROR . ")";
        }
       
error($errormsg);
       
        return
false;
    }
// end if ($rss and !$rss->error)
}


#########################
# Affilaites .nl parser


/*
* Project:     MagpieRSS: a simple RSS integration tool
* File:        tradetracker.inc, a simple functional interface
                to fetching and parsing the XML from tradetracker
* Author:      Bram Brambring  <bram@brambring.nl>
* License:     GPL
* Script requires an installation of MagpieRSS
* The lastest version of MagpieRSS can be obtained from:
* <a href="http://magpierss.sourceforge.net
" title="http://magpierss.sourceforge.net
" rel="nofollow">http://magpierss.sourceforge.net
</a> *
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* magpierss-general@lists.sourceforge.net
*
*/


class AffiliateRSS extends MagpieRSS{
   
    function
AffiliateRSS ($source, $output_encoding='ISO-8859-1',
                       
$input_encoding=null, $detect_encoding=true)
    {  
       
# if PHP xml isn't compiled in, die
        #
       
if (!function_exists('xml_parser_create')) {
           
$this->error( "Failed to load PHP's XML Extension. " .
                         
"http://www.php.net/manual/en/ref.xml.php",
                          
E_USER_ERROR );
        }
       
        list(
$parser, $source) = $this->create_parser($source,
               
$output_encoding, $input_encoding, $detect_encoding);
       
       
        if (!
is_resource($parser)) {
           
$this->error( "Failed to create an instance of PHP's XML parser. " .
                         
"http://www.php.net/manual/en/ref.xml.php",
                         
E_USER_ERROR );
        }

       
       
$this->parser = $parser;
       
       
# pass in parser, and a reference to this object
        # setup handlers
        #
       
xml_set_object( $this->parser, $this );
       
xml_set_element_handler($this->parser,
               
'feed_start_element', 'feed_end_element' );
                       
       
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
   
       
$status = xml_parse( $this->parser, $source );
       
        if (!
$status ) {
           
$errorcode = xml_get_error_code( $this->parser );
            if (
$errorcode != XML_ERROR_NONE ) {
               
$xml_error = xml_error_string( $errorcode );
               
$error_line = xml_get_current_line_number($this->parser);
               
$error_col = xml_get_current_column_number($this->parser);
               
$errormsg = "$xml_error at line $error_line, column $error_col";

               
$this->error( $errormsg );
            }
        }
       
       
xml_parser_free( $this->parser );

    }
   
    function
feed_start_element($p, $element, &$attrs) {
       
$el = $element = strtolower($element);
       
$attrs = array_change_key_case($attrs, CASE_LOWER);
       
       
// check for a namespace, and split if found
       
$ns = false;
        if (
strpos( $element, ':' ) ) {
            list(
$ns, $el) = split( ':', $element, 2);
        }
        if (
$ns and $ns != 'rdf' ) {
           
$this->current_namespace = $ns;
        }
           
       
# if feed type isn't set, then this is first element of feed
        # identify feed from root element
        #
   
       
if ( $el == 'items' )
        {
           
        }

        elseif (
$el == 'item' )
        {
           
$this->initem = true;
        }
       
// set stack[0] to current element
       
else {
           
array_unshift($this->stack, $el);
        }
    }
   

   
   
    function
feed_end_element ($p, $el) {
       
$el = strtolower($el);
       
        if (
$el == 'item' )
        {
           
$this->items[] = $this->current_item;
           
$this->current_item = array();
           
$this->current_categories = array();
           
$this->current_fields = array();
           
$this->initem = false;
        }
        elseif (
$el == 'items' )
        {

        }
        else {
           
array_shift( $this->stack);
        }
       
       
$this->current_namespace = false;
    }
   
   
}
// end class RSS


class TradetrackerRSS extends MagpieRSS{
    var
$current_categories   = array();  // item currently being parsed
   
var $current_fields   = array();  // item currently being parsed
   
var $incategories        = false;
    var
$incategory        = false;
    var
$inadditonal        = false;
    var
$infield        = false;
   

    function
TradetrackerRSS ($source, $output_encoding='ISO-8859-1',
                       
$input_encoding=null, $detect_encoding=true)
    {  
       
# if PHP xml isn't compiled in, die
        #
       
if (!function_exists('xml_parser_create')) {
           
$this->error( "Failed to load PHP's XML Extension. " .
                         
"http://www.php.net/manual/en/ref.xml.php",
                          
E_USER_ERROR );
        }
       
        list(
$parser, $source) = $this->create_parser($source,
               
$output_encoding, $input_encoding, $detect_encoding);
       
       
        if (!
is_resource($parser)) {
           
$this->error( "Failed to create an instance of PHP's XML parser. " .
                         
"http://www.php.net/manual/en/ref.xml.php",
                         
E_USER_ERROR );
        }

       
       
$this->parser = $parser;
       
       
# pass in parser, and a reference to this object
        # setup handlers
        #
       
xml_set_object( $this->parser, $this );
       
xml_set_element_handler($this->parser,
               
'feed_start_element', 'feed_end_element' );
                       
       
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
   
       
$status = xml_parse( $this->parser, $source );
       
        if (!
$status ) {
           
$errorcode = xml_get_error_code( $this->parser );
            if (
$errorcode != XML_ERROR_NONE ) {
               
$xml_error = xml_error_string( $errorcode );
               
$error_line = xml_get_current_line_number($this->parser);
               
$error_col = xml_get_current_column_number($this->parser);
               
$errormsg = "$xml_error at line $error_line, column $error_col";

               
$this->error( $errormsg );
            }
        }
       
       
xml_parser_free( $this->parser );

    }
   
    function
feed_start_element($p, $element, &$attrs) {
       
$el = $element = strtolower($element);
       
$attrs = array_change_key_case($attrs, CASE_LOWER);
       
       
// check for a namespace, and split if found
       
$ns = false;
        if (
strpos( $element, ':' ) ) {
            list(
$ns, $el) = split( ':', $element, 2);
        }
        if (
$ns and $ns != 'rdf' ) {
           
$this->current_namespace = $ns;
        }
           
       
# if feed type isn't set, then this is first element of feed
        # identify feed from root element
        #
   
       
if ( $el == 'products' )
        {
           
        }

        elseif (
$el == 'product' )
        {
           
$this->initem = true;
        }
        elseif (
$el == 'categories') {
           
$this->incategories = true;
        }
        elseif (
$el == 'category') {
           
$this->incategory = true;
           
$this->concat( $this->current_item['_category'], $attrs['path'] );
           
$this->concat($this->current_categories[$attrs['name']],$attrs['path']);
        }
        elseif (
$el == 'additional') {
           
$this->inadditional = true;
        }
        elseif (
$el == 'field') {
           
$this->infield = true;
            if ( isset(
$attrs['name']) && isset($attrs['value']) ) {
             
$this->concat($this->current_fields[$attrs['name']],$attrs['value']);
            }
        }
       
// set stack[0] to current element
       
else {
           
array_unshift($this->stack, $el);
        }
    }
   
    function
feed_end_element ($p, $el) {
       
$el = strtolower($el);
       
        if (
$el == 'product' )
        {
           
$this->current_item['categories']=$this->current_categories;
           
$this->current_item['fields']=$this->current_fields;
           
$this->items[] = $this->current_item;
           
$this->current_item = array();
           
$this->current_categories = array();
           
$this->current_fields = array();
           
$this->initem = false;
        }
        elseif (
$el == 'additional') {
           
$this->inadditional = false;
        }
        elseif (
$el == 'field') {
           
$this->infield = false;
        }
        elseif (
$el == 'categories') {
           
$this->incategories = false;
        }
        elseif (
$el == 'category') {
           
$this->incategory = false;
        }
        elseif (
$el == 'products' )
        {

        }
        else {
           
array_shift( $this->stack);
        }
       
       
$this->current_namespace = false;
    }
   
   
}
// end class RSS

?>

Is it possible to integrate this code in feedparser?
How can this be done?

Greetings,
Martijn

 
 

Drupal is a registered trademark of Dries Buytaert.