--- aggregator.module 2011-01-11 20:31:48.000000000 +0100 +++ aggregator.module.new 2011-01-11 20:14:43.000000000 +0100 @@ -466,8 +466,7 @@ * Call-back function used by the XML parser. */ function aggregator_element_start($parser, $name, $attributes) { - global $item, $element, $tag, $items, $channel; - + global $item, $element, $tag, $items, $channel, $insideElement; switch ($name) { case 'IMAGE': case 'TEXTINPUT': @@ -484,6 +483,7 @@ $element = $name; } case 'LINK': + if( !$insideElement) $element = 'LINK'; if (!empty($attributes['REL']) && $attributes['REL'] == 'alternate') { if ($element == 'ITEM') { $items[$item]['LINK'] = $attributes['HREF']; @@ -492,12 +492,17 @@ $channel['LINK'] = $attributes['HREF']; } } + else if( !empty( $attributes['HREF'] ) && ( empty( $attributes['REL']) || $attributes['REL'] == 'alternate' ) ){ + $channel[ 'BASELINK' ] = $attributes[ 'HREF' ]; + } break; case 'ITEM': + $insideElement = true; $element = $name; $item += 1; break; case 'ENTRY': + $insideElement = true; $element = 'ITEM'; $item += 1; break; @@ -510,14 +515,37 @@ * Call-back function used by the XML parser. */ function aggregator_element_end($parser, $name) { - global $element; - + global $element, $insideElement, $channel, $items, $item; switch ($name) { case 'IMAGE': case 'TEXTINPUT': case 'ITEM': + $insideElement = false; + if( !empty( $items[ $item ][ 'LINK' ] ) && !empty( $channel[ 'BASELINK' ] ) && strstr($items[ $item ][ 'LINK' ], '://' ) === false ) { + $prefix = ''; + if( substr( $items[ $item ][ 'LINK' ], 0, 4) != 'http' ) $prefix = $channel[ 'BASELINK' ]; + if( substr( $items[ $item ][ 'LINK' ], 0, 1 ) == '/' && substr( $prefix, strlen($prefix)-1 ) == '/' ) { + $items[ $item ][ 'LINK' ] = substr( $prefix, 0, strlen($prefix)-1) . $items[ $item ][ 'LINK' ]; + } else { + $items[ $item ][ 'LINK' ] = $prefix . $items[ $item ][ 'LINK' ]; + } + } + break; case 'ENTRY': + $insideElement = false; + if( !empty( $items[ $item ][ 'LINK' ] ) && !empty( $channel[ 'BASELINK' ] ) && strstr($items[ $item ][ 'LINK' ], '://' ) === false ) { + $prefix = ''; + if( substr( $items[ $item ][ 'LINK' ], 0, 4) != 'http' ) $prefix = $channel[ 'BASELINK' ]; + if( substr( $items[ $item ][ 'LINK' ], 0, 1 ) == '/' && substr( $prefix, strlen($prefix)-1 ) == '/' ) { + $items[ $item ][ 'LINK' ] = substr( $prefix, 0, strlen($prefix)-1) . $items[ $item ][ 'LINK' ]; + } else { + $items[ $item ][ 'LINK' ] = $prefix . $items[ $item ][ 'LINK' ]; + } + } + break; case 'CONTENT': + case 'LINK': + if( !$insideElement) $element = ''; case 'INFO': $element = ''; break; @@ -532,7 +560,7 @@ * Call-back function used by the XML parser. */ function aggregator_element_data($parser, $data) { - global $channel, $element, $items, $item, $image, $tag; + global $channel, $element, $items, $item, $image, $tag, $insideElement; $items += array($item => array()); switch ($element) { case 'ITEM': @@ -544,11 +572,14 @@ $image += array($tag => ''); $image[$tag] .= $data; break; - case 'LINK': - if ($data) { + case 'LINK': + if ( $data && $insideElement ) { $items[$item] += array($tag => ''); $items[$item][$tag] .= $data; - } + } + else if( !$insideElement ) { + $channel[ 'BASELINK' ] .= $data; + } break; case 'CONTENT': $items[$item] += array('CONTENT' => ''); @@ -570,8 +601,13 @@ // it or its contents will end up in the item array. break; default: - $channel += array($tag => ''); - $channel[$tag] .= $data; + if( $insideElement ) { + $items[$item] += array( $tag => '' ); + $items[$item][$tag] .= $data; + } else { + $channel += array($tag => ''); + $channel[$tag] .= $data; + } } } @@ -698,10 +734,10 @@ * 0 on error, 1 otherwise. */ function aggregator_parse_feed(&$data, $feed) { - global $items, $image, $channel; + global $items, $image, $channel, $insideElement; // Unset the global variables before we use them: - unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']); + unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag'], $GLOBALS[ 'insideElement' ] ); $items = array(); $image = array(); $channel = array(); @@ -710,7 +746,6 @@ $xml_parser = drupal_xml_parser_create($data); xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end'); xml_set_character_data_handler($xml_parser, 'aggregator_element_data'); - if (!xml_parse($xml_parser, $data, 1)) { watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser)), WATCHDOG_WARNING); drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error');