diff -Naur planet-6.x-1.3/planet.info 6.x/planet.info --- planet-6.x-1.3/planet.info 2009-03-12 10:25:55.000000000 -0400 +++ 6.x/planet.info 2009-03-19 11:10:42.000000000 -0400 @@ -1,13 +1,6 @@ -; $Id: planet.info,v 1.4 2009/03/11 23:15:34 swe3tdave Exp $ +; $Id: planet.info,v 1.2 2007/05/30 03:22:01 daryl Exp $ name = Planet description = Planet blog aggregator package = Community - optional version = 6.x core = 6.x - -; Information added by drupal.org packaging script on 2009-03-12 -version = "6.x-1.3" -core = "6.x" -project = "planet" -datestamp = "1236867955" - diff -Naur planet-6.x-1.3/planet.install 6.x/planet.install --- planet-6.x-1.3/planet.install 2009-03-11 19:15:34.000000000 -0400 +++ 6.x/planet.install 2009-03-19 11:37:10.000000000 -0400 @@ -1,5 +1,5 @@ TRUE, 'default' => 0, ), + 'hash' => array( + 'type' => 'varchar', + 'length' => 32, + 'description' => t("A hash of the feed's headers."), + ), + 'error' => array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'length' => 1, + 'description' => t("Whether the feed is throwing errors or not."), + ), ), 'primary key' => array('fid'), + 'indexes' => array( + 'error' => array('error'), + ), ); $schema['planet_items'] = array( @@ -67,6 +82,12 @@ 'unsigned' => 1, 'not null' => FALSE, ), + 'iid' => array( + 'type' => 'varchar', + 'length' => 32, + 'not null' => TRUE, + 'description' => t("md5 of the feed item's title and body."), + ), 'guid' => array( 'type' => 'varchar', 'length' => 120, @@ -120,3 +141,41 @@ db_query("DELETE FROM {node} WHERE type = '%s'", planet); } + +function planet_update_1() { + $ret = array(); + db_add_field($ret, 'planet_feeds', 'hash', array( + 'type' => 'varchar', + 'length' => 32, + 'description' => t("A hash of the feed's headers."), + ) + ); + return $ret; +} + +function planet_update_2() { + $ret = array(); + db_add_field($ret, 'planet_items', 'iid', array( + 'type' => 'varchar', + 'length' => 32, + 'not null' => TRUE, + 'description' => t("md5 of the feed item's title and body."), + ) + ); + return $ret; +} + +function planet_update_3() { + $ret = array(); + db_add_field($ret, 'planet_feeds', 'error', array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'length' => 1, + 'description' => t("Whether the feed is throwing errors or not."), + ) + ); + db_add_index($ret, 'planet_feeds', 'error', array('error')); + + return $ret; +} \ Pas de fin de ligne à la fin du fichier. diff -Naur planet-6.x-1.3/planet.module 6.x/planet.module --- planet-6.x-1.3/planet.module 2009-03-11 19:15:34.000000000 -0400 +++ 6.x/planet.module 2009-03-19 11:11:19.000000000 -0400 @@ -1,5 +1,5 @@ uid; - } - - if ($op == 'update' || $op == 'delete') { - if (user_access('edit own blog', $account) && ($account->uid == $node->uid) || user_access('administer nodes', $account)) { - return TRUE; - } - } -} - function planet_menu() { $items['admin/settings/planet'] = array( @@ -486,508 +472,58 @@ if (!$fid) { $fid = intval(arg(4)); } - - $feed = db_fetch_object(db_query('SELECT * FROM {planet_feeds} WHERE fid = %d', $fid)); - - $headers = array(); - $result = planet_http_request($feed->link, $headers, 15); - - switch ($result->code) { - case 304: - drupal_set_message(t('No new content syndicated from %site.', array('%site' => ''. $feed->title .''))); - break; - - case 301: - if ($result->redirect_url) { - $feed->link = $result->redirect_url; - watchdog('planet', 'Updated URL for feed %title to %url.', array('%title' => ''. $feed->title .'', '%url' => ''. $feed->url .''), WATCHDOG_NOTICE, l(t('view'), 'planet/'.$feed->fid)); - db_query("UPDATE {planet_feeds} SET link = '%s' WHERE fid = %d", $feed->link, $feed->fid); - } - break; - - case 200: - case 302: - case 307: - $xml_tree = planet_parse_xml($result->data); - - if ($xml_tree['parser_error']) { - watchdog('planet', 'Failed to parse RSS feed %site: %error at line %line.', array('%site' => ''. $feed->title .'', '%error' => $xml_tree['parser_error'], '%line' => $xml_tree['parser_line']), WATCHDOG_ERROR); - drupal_set_message(t('Failed to parse RSS feed %site: %error at line %line.', array('%site' => ''. $feed->title .'', '%error' => $xml_tree['parser_error'], '%line' => $xml_tree['parser_line'])), 'error'); - break; - } - else { - drupal_set_message('Parsing feed '. $feed->title .' took '. $xml_tree['parser_time'] .' seconds.'); - } - - if (planet_parse_items($xml_tree, $feed) !== false) { - if ($result->headers['Last-Modified']) { - $modified = strtotime($result->headers['Last-Modified']); - } - - /* - ** Prepare data: - */ - if ($xml_tree['RSS']) { // RSS 0.91, 0.92, 2.0 - $root = &$xml_tree['RSS'][0]; - $channel = &$root['CHANNEL'][0]; - $image = &$channel['IMAGE'][0]; - $description = &$channel['DESCRIPTION'][0]['VALUE']; - $link = &$channel['LINK'][0]['VALUE']; - } - else if ($xml_tree['RDF:RDF']) { - $root = &$xml_tree['RDF:RDF'][0]; - $channel = &$root['CHANNEL'][0]; - $image = &$root['IMAGE'][0]; - $description = &$channel['DESCRIPTION'][0]['VALUE']; - $link = &$channel['LINK'][0]['VALUE']; - } - else if ($xml_tree['FEED']) { // Atom 0.3, 1.0 - $root = &$xml_tree['FEED'][0]; - $channel = &$root; - $image = &$channel['LOGO'][0]['VALUE']; - $description = ($channel['TAGLINE'][0]['VALUE'] ? $channel['TAGLINE'][0]['VALUE'] : ''); - // TODO: remove this Atom hack when we have field mapping or at least specialized parsers in place - if (count($channel['LINK']) > 1) { - $link = $feed->link; - foreach ($channel['LINK'] as $l) { - if ($l['REL'] == 'alternate') { - $link = $l['HREF']; - } - } - } - else { - $link = $channel['LINK'][0]['HREF']; - } - } - else if ($xml_tree['CHANNEL']) { // RSS 1.1 - $root = &$xml_tree['CHANNEL'][0]; - $channel = &$root; - $image = &$channel['IMAGE'][0]; - $description = &$channel['DESCRIPTION'][0]['VALUE']; - $link = &$channel['LINK'][0]['VALUE']; - } - else if ($xml_tree['OPML']) { - $root = &$xml_tree['OPML'][0]; - $channel = &$root; - $image = NULL; - $description = NULL; - $link = NULL; - } - else { - // unsupported format - break; - } - - if (!$feed->uid) { - if ($channel['AUTHOR'][0]['VALUE']) { - $feed->uid = $channel['AUTHOR'][0]['VALUE']; - } - if ($channel['AUTHOR'][0]['NAME'][0]['VALUE']) { - $feed->uid = $channel['AUTHOR'][0]['NAME'][0]['VALUE']; - } - else if ($channel['DC:CREATOR']) { - $feed->uid = $channel['DC:CREATOR'][0]['VALUE']; - } - else { - $feed->uid = ''; - } - } - - /* - ** Generate image link - */ - if (!$feed->image && $image['LINK'] && $image['URL'] && $image['TITLE']) { - if (strlen($image['TITLE'][0]['VALUE']) > 250) { - $image['TITLE'][0]['VALUE'] = trim(substr($image['TITLE'][0]['VALUE'], 0, 250)).'...'; - } - $feed->image = ''; - } - - /* - ** Update the feed data: - */ - $feed->checked = time(); - $feed->link = $link; - $feed->etag = $result->headers['ETag']; - $feed->modified = $modified; - if ($feed->body == '' && $description/* && valid_input_data($description)*/) { - $feed->body = $feed->teaser = $description; - } - $feed->rss_data = &$xml_tree; - - /* - ** Taxonomy module doesn't add taxonomy terms at load time... so we have to do it by hand :(( - */ - $terms = module_invoke('taxonomy', 'node_get_terms', $feed->nid, 'tid'); - $feed->taxonomy = array(); - foreach ($terms as $tid => $term) { - if ($term->tid) { - $feed->taxonomy[] = $term->tid; - } - } - } - default: - } - - - db_query('UPDATE {planet_feeds} SET checked = %d WHERE fid = %d', time(), $fid); - return $feed->title; - //print theme('page', 'refreshing '. $fid .'.');// and got '. print_r($feed, 1)); -} - -/** - * Private function; Parse HTTP headers from data retreived with cURL - * from: http://pl2.php.net/manual/en/function.curl-setopt.php#42009 - */ -function planet_parse_response($response) { - /* - ***original code extracted from examples at - ***http://www.webreference.com/programming - /php/cookbook/chap11/1/3.html - - ***returns an array in the following format which varies depending on headers returned - - [0] => the HTTP error or response code such as 404 - [1] => Array - ( - [Server] => Microsoft-IIS/5.0 - [Date] => Wed, 28 Apr 2004 23:29:20 GMT - [X-Powered-By] => ASP.NET - [Connection] => close - [Set-Cookie] => COOKIESTUFF - [Expires] => Thu, 01 Dec 1994 16:00:00 GMT - [Content-Type] => text/html - [Content-Length] => 4040 - ) - [2] => Response body (string) - */ - - do { - list($response_headers, $response) = explode("\r\n\r\n", $response, 2); - $response_header_lines = explode("\r\n", $response_headers); - - // first line of headers is the HTTP response code - $http_response_line = array_shift($response_header_lines); - if (preg_match('@^HTTP/[0-9]\.[0-9] ([0-9]{3})@', $http_response_line, $matches)) { - $response_code = $matches[1]; - } - else { - $response_code = "Error"; - } - } - while (substr($response_code, 0, 1) == "1"); - - $response_body = $response; - - // put the rest of the headers in an array - $response_header_array = array(); - foreach ($response_header_lines as $header_line) { - list($header, $value) = explode(':', $header_line, 2); - $response_header_array[$header] = trim($value); - } - - return array($response_code, $response_header_array, $response_body, $http_response_line); -} - -/** - * Private function; Gets data from given URL :) - */ -function planet_http_request($url, $headers = array(), $timeout = 15, $method = 'GET', $data = NULL, $follow = 3) { - if (!function_exists('curl_init')) { - return drupal_http_request($url, $headers, $method, $data, $follow); - } - - // convert headers array to format used by cURL - $temp = array(); - foreach ($headers as $header => $value) { - $temp[] = $header .': '. $value; - } - $headers = $temp; - - $result = new StdClass(); - - $ch = curl_init(); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_HEADER, 1); - curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0); - curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); - - $data = curl_exec($ch); - $info = curl_getinfo($ch); - - curl_close($ch); - unset($ch); - - $response = planet_parse_response($data); - $result->code = $response[0]; - $result->headers = $response[1]; - $result->data = $response[2]; - $error = $response[3]; - switch ($code) { - case 200: // OK - case 304: // Not modified - break; - case 301: // Moved permanently - case 302: // Moved temporarily - case 307: // Moved temporarily - $location = $result->headers['Location']; - - if ($follow) { - $result = planet_http_request($result->headers['Location'], $headers, $timeout, $method, $data, --$follow); - $result->redirect_code = $result->code; - } - $result->redirect_url = $location; - break; - default: - $result->error = $error; - break; - } - - $result->code = $response[0]; - return $result; -} - -/** - * Private function; Checks a news feed for new items. - */ - - -/** - * Private function; - * Parse the W3C date/time format, a subset of ISO 8601. PHP date parsing - * functions do not handle this format. - * See http://www.w3.org/TR/NOTE-datetime for more information. - * Origionally from MagpieRSS (http://magpierss.sourceforge.net/). - * - * @param $date_str A string with a potentially W3C DTF date. - * @return A timestamp if parsed successfully or -1 if not. - */ -function planet_parse_w3cdtf($date_str) { - if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) { - list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); - // calc epoch for current date assuming GMT - $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year); - if ($match[10] != 'Z') { // Z is zulu time, aka GMT - list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]); - // zero out the variables - if (!$tz_hour) { - $tz_hour = 0; - } - if (!$tz_min) { - $tz_min = 0; - } - $offset_secs = (($tz_hour * 60) + $tz_min) * 60; - // is timezone ahead of GMT? then subtract offset - if ($tz_mod == '+') { - $offset_secs *= -1; - } - $epoch += $offset_secs; - } - return $epoch; - } - else { - return -1; - } -} - -/** - * Private function; - * from: http://pl2.php.net/manual/en/function.html-entity-decode.php#51055 - * Used as callback function for preg_replace_all() to decode numeric entities to UTF-8 chars - * - * @param $ord Number - * @return UTF-8 string - */ -function planet_replace_num_entity($ord) { - $ord = $ord[1]; - if (preg_match('/^x([0-9a-f]+)$/i', $ord, $match)) { - $ord = hexdec($match[1]); - } - else { - $ord = intval($ord); - } - - $no_bytes = 0; - $byte = array(); - - if ($ord == 128) { - return chr(226) . chr(130) . chr(172); - } - else if ($ord == 129) { - return chr(239) . chr(191) . chr(189); - } - else if ($ord == 130) { - return chr(226) . chr(128) . chr(154); - } - else if ($ord == 131) { - return chr(198) . chr(146); - } - else if ($ord == 132) { - return chr(226) . chr(128) . chr(158); - } - else if ($ord == 133) { - return chr(226) . chr(128) . chr(166); - } - else if ($ord == 134) { - return chr(226) . chr(128) . chr(160); - } - else if ($ord == 135) { - return chr(226) . chr(128) . chr(161); - } - else if ($ord == 136) { - return chr(203) . chr(134); - } - else if ($ord == 137) { - return chr(226) . chr(128) . chr(176); - } - else if ($ord == 138) { - return chr(197) . chr(160); - } - else if ($ord == 139) { - return chr(226) . chr(128) . chr(185); - } - else if ($ord == 140) { - return chr(197) . chr(146); - } - else if ($ord == 141) { - return chr(239) . chr(191) . chr(189); - } - else if ($ord == 142) { - return chr(197) . chr(189); - } - else if ($ord == 143) { - return chr(239) . chr(191) . chr(189); - } - else if ($ord == 144) { - return chr(239) . chr(191) . chr(189); - } - else if ($ord == 145) { - return chr(226) . chr(128) . chr(152); - } - else if ($ord == 146) { - return chr(226) . chr(128) . chr(153); - } - else if ($ord == 147) { - return chr(226) . chr(128) . chr(156); - } - else if ($ord == 148) { - return chr(226) . chr(128) . chr(157); - } - else if ($ord == 149) { - return chr(226) . chr(128) . chr(162); - } - else if ($ord == 150) { - return chr(226) . chr(128) . chr(147); - } - else if ($ord == 151) { - return chr(226) . chr(128) . chr(148); - } - else if ($ord == 152) { - return chr(203) . chr(156); - } - else if ($ord == 153) { - return chr(226) . chr(132) . chr(162); - } - else if ($ord == 154) { - return chr(197) . chr(161); - } - else if ($ord == 155) { - return chr(226) . chr(128) . chr(186); - } - else if ($ord == 156) { - return chr(197) . chr(147); - } - else if ($ord == 157) { - return chr(239) . chr(191) . chr(189); - } - else if ($ord == 158) { - return chr(197) . chr(190); - } - else if ($ord == 159) { - return chr(197) . chr(184); - } - else if ($ord == 160) { - return chr(194) . chr(160); - } - - if ($ord < 128) { - return chr($ord); - } - else if ($ord < 2048) { - $no_bytes = 2; - } - else if ($ord < 65536) { - $no_bytes = 3; - } - else if ($ord < 1114112) { - $no_bytes = 4; + // initialize simplepie + // we want to do this only once and not each time per feed, which would be slower + include_once './'. drupal_get_path('module', 'planet') .'/simplepie.inc'; + + $process_feed = db_fetch_object(db_query('SELECT * FROM {planet_feeds} WHERE fid = %d', $fid)); + + $feed = new SimplePie(); + $feed->enable_cache(FALSE); + $feed->set_timeout(15); + // prevent SimplePie from using all of it's data santization since we use Drupal's input formats to handle this + $feed->set_stupidly_fast(TRUE); + $feed->set_feed_url($process_feed->link); + // FeedBurner requires this check otherwise it won't work well with SimplePie + // also performance improvement + header('If-Modified-Since:'. $process_feed->checked); + $success = $feed->init(); + + if ($success && $feed->data) { + // get a unique hash of the headers in the feed, fast and easy way to compare if this feed is updated or not + $hash = md5(serialize($feed->data)); + + // hashes don't match so likely the feed is updated + if ($process_feed->hash != $hash) { + // above we define hook_view() which then performs check_url() on the $url in the feed node + // the problem is check_url() calls filter_xss_bad_protocol() which does it thing to prevent XSS + // but it returns the string through check_plain() which calls htmlspecialchars() + // this converts & in a url to & and then causes SimplePie not to be able to parse it + // because of this, we decode this URL since we are passing it directly to SimplePie + // it is still encoded everywhere else it is output to prevent XSS + $process_feed->link = htmlspecialchars_decode($process_feed->link, ENT_QUOTES); + + // turn each feed item into a node + planet_item_feed_parse($process_feed, $feed); + } + + // finished processing this feed so we can mark it checked + db_query("UPDATE {planet_feeds} SET checked = %d, hash = '%s', error = 0 WHERE fid = %d", time(), $hash, $process_feed->fid); + } + else if (isset($feed->error)) { + db_query("UPDATE {planet_feeds} SET error = 1 WHERE fid = %d", $process_feed->fid); + watchdog('planet', 'The feed %feed could not be processed due to the following error: %error', array('%feed' => $process_feed->title, '%error' => $feed->error), WATCHDOG_ERROR, l('view', $process_feed->link)); } else { - return; - } - - switch ($no_bytes) { - case 2: - $prefix = array(31, 192); - break; - - case 3: - $prefix = array(15, 224); - break; - - case 4: - $prefix = array(7, 240); - break; + watchdog('planet', 'You shouldn\'t be here. Something has gone terribly wrong.'); } - - for ($i = 0; $i < $no_bytes; $i++) { - $byte[$no_bytes - $i - 1] = (($ord & (63 * pow(2, 6 * $i))) / pow(2, 6 * $i)) & 63 | 128; - } - - $byte[0] = ($byte[0] & $prefix[0]) | $prefix[1]; - - $ret = ''; - for ($i = 0; $i < $no_bytes; $i++) { - $ret .= chr($byte[$i]); - } - - return $ret; -} - -/** - * Private function; Convert named entities to UTF-8 characters - * from: http://pl2.php.net/manual/en/function.html-entity-decode.php#51722 - */ -function planet_replace_name_entities(&$text) { - static $ttr; - if (!$ttr) { - $trans_tbl = get_html_translation_table(HTML_ENTITIES); - foreach ($trans_tbl as $k => $v) { - $ttr[$v] = utf8_encode($k); - } - $ttr['''] = "'"; - } - return strtr($text, $ttr); -} - -/** - * Private function; Convert all entities to UTF-8 characters - */ -function planet_replace_entities(&$text) { - $result = planet_replace_name_entities($text); - return preg_replace_callback('/&#([0-9a-fx]+);/mi', 'planet_replace_num_entity', $result); + + return $process_feed->title; } /** - * Private function; Clone object function to stay compatible with both php4 and php5 - * from: Drupal 4.7CVS - * TODO: remove after moving to Drupal 4.7 + * Private function; Checks a news feed for new items. */ -function planet_clone($object) { - return version_compare(phpversion(), '5.0') < 0 ? $object : clone($object); -} /** * Private function; Convert relative URLs @@ -998,341 +534,6 @@ return preg_replace($src, $dst, $data); } -/** - * Private function; Creates nodes from data found in given xml_tree - */ -function planet_parse_items(&$xml_tree, &$feed) { - - if ($xml_tree['RSS']) { // RSS 0.91, 0.92, 2.0 - $items = &$xml_tree['RSS'][0]['CHANNEL'][0]['ITEM']; - $link_field = 'VALUE'; - } - else if ($xml_tree['RDF:RDF']) { - $items = &$xml_tree['RDF:RDF'][0]['ITEM']; - $link_field = 'VALUE'; - } - else if ($xml_tree['FEED']) { // Atom 0.3, 1.0 - $items = &$xml_tree['FEED'][0]['ENTRY']; - $link_field = 'HREF'; - } - else if ($xml_tree['CHANNEL']) { // RSS 1.1 - $items = &$xml_tree['CHANNEL'][0]['ITEMS'][0]['ITEM']; - $link_field = 'VALUE'; - } - else { - // unsupported format - $items = array(); - return false; - } - - /* - ** We reverse the array such that we store the first item last, - ** and the last item first. In the database, the newest item - ** should be at the top. - */ - $items_added = 0; - - - for ($index = count($items) - 1; $index >= 0; $index--) { - $item = &$items[$index]; - //print '
'. print_r($item, 1) .'
'; - $teaser = NULL; - $body = NULL; - - // Description field is needed early for case when no title is specified - if ($item['DESCRIPTION']) { // RSS 0.91, 0.92, 1.0, 1.1, 2.0 - $body = &$item['DESCRIPTION'][0]['VALUE']; - } - else if ($item['SUMMARY']) { // Atom 0.3, 1.0 - $body = &$item['SUMMARY'][0]['VALUE']; - } - - if ($item['CONTENT']) { // Atom 0.3, 1.0 - if (strlen($body) < strlen($item['CONTENT'][0]['VALUE'])) { - if ($body) { - $teaser = $body; - } - $body = &$item['CONTENT'][0]['VALUE']; - } - } - else if ($item['CONTENT:ENCODED']) { // Don't know where it came from but it can be found in RSS 2.0 feeds - if (strlen($body) < strlen($item['CONTENT:ENCODED'][0]['VALUE'])) { - if ($body) { - $teaser = $body; - } - $body = &$item['CONTENT:ENCODED'][0]['VALUE']; - } - } - - /* - ** Resolve the item's title. If no title is found, we use - ** up to 40 characters of the description ending at a word - ** boundary but not splitting potential entities. - */ - if (!($title = $item['TITLE'][0]['VALUE'])) { - $title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($body, 40)); - } - - // If title was "escaped" then it may still contain entities, becuase each & from entity was also escabet to & before - // TODO: the same for content? - if ($item['TITLE'][0]['MODE'] == 'escaped') { - $title = planet_replace_entities($title); - } - $title = strip_tags($title); - - /* - ** Resolve the items link. - */ - if ($item['LINK']) { - // TODO: remove this Atom hack when we have field mapping or at least specialized parsers in place - if (count($item['LINK']) > 1) { - $link = $feed->link; - foreach ($item['LINK'] as $temp) { - if ($temp['REL'] == 'alternate') { - $link = $temp[$link_field]; - } - } - } - else { - $link = $item['LINK'][0][$link_field]; - } - } - elseif ($item['GUID'] && (strncmp($item['GUID'][0][$link_field], 'http://', 7) == 0) && $item['GUID'][0]['ISPERMALINK'] != 'false') { - $link = $item['GUID'][0][$link_field]; - } - else { - $link = $feed->link; - } - - /* - ** Resolve the items source. - */ - if ($item['SOURCE'][0]['VALUE'] && $item['SOURCE'][0]['URL']) { // RSS 2.0 - $source_title = &$item['SOURCE'][0]['VALUE']; - $source_link = &$item['SOURCE'][0]['URL']; - } - else if ($item['SOURCE'] || $item['ATOM:SOURCE']) { // ATOM 1.0 - if ($item['SOURCE'][0]['TITLE']) $source_title = &$item['SOURCE'][0]['TITLE'][0]['VALUE']; - else if ($item['SOURCE'][0]['ATOM:TITLE']) $source_title = &$item['SOURCE'][0]['ATOM:TITLE'][0]['VALUE']; - if ($item['SOURCE'][0]['LINK']) $source_link = &$item['SOURCE'][0]['LINK'][0]['VALUE']; - else if ($item['SOURCE'][0]['ATOM:LINK']) $source_link = &$item['SOURCE'][0]['ATOM:LINK'][0]['VALUE']; - } - else { - $source_title = ''; - $source_link = ''; - } - - /* - ** Try to resolve and parse the item's publication date. If no - ** date is found, we use the current date instead. - */ - // TODO: find nicer way for handling namespaces ;) - if ($item['PUBDATE']) $date = $item['PUBDATE'][0]['VALUE']; // RSS 2.0 - else if ($item['DC:DATE']) $date = $item['DC:DATE'][0]['VALUE']; // Dublin core - else if ($item['DATE']) $date = $item['DATE'][0]['VALUE']; // Dublin core - else if ($item['DCTERMS:ISSUED']) $date = $item['DCTERMS:ISSUED'][0]['VALUE']; // Dublin core - else if ($item['ISSUED']) $date = $item['ISSUED'][0]['VALUE']; // Dublin core - else if ($item['DCTERMS:CREATED']) $date = $item['DCTERMS:CREATED'][0]['VALUE']; // Dublin core - else if ($item['CREATED']) $date = $item['CREATED'][0]['VALUE']; // Dublin core - else if ($item['DCTERMS:MODIFIED']) $date = $item['DCTERMS:MODIFIED'][0]['VALUE']; // Dublin core - else if ($item['MODIFIED']) $date = $item['MODIFIED'][0]['VALUE']; // Dublin core - else if ($item['ATOM:UPDATED']) $date = $item['ATOM:UPDATED'][0]['VALUE']; // Atom - else if ($item['UPDATED']) $date = $item['UPDATED'][0]['VALUE']; // Atom - else $date = 'now'; - - if ($feed->item_date_source == FEEDS_ITEM_DATE_SNIFFED && $date) { - $timestamp = strtotime($date); // strtotime() returns -1 on failure - if ($timestamp < 0) { - $timestamp = planet_parse_w3cdtf($date); // also returns -1 on failure - if ($timestamp < 0) { - $timestamp = time(); // better than nothing - } - } - } - else { - $timestamp = time(); - } - - // Ignore items older than allowed for feed - if ($timestamp < $time_horizont) { - continue; - } - - /* - ** Save this item. Try to avoid duplicate entries as much as - ** possible. If we find a duplicate entry, we resolve it and - ** pass along it's ID such that we can update it if needed. - */ - // Try to use RSS:GUID/ATOM:ID as unique identifier - $guid = ''; - if ($item['GUID'][0]['VALUE']) { // RSS 2.0 - $guid = $item['GUID'][0]['VALUE']; - } - else if ($item['ATOM:ID'][0]['VALUE']) { // ATOM 0.3, 1.0 - $guid = $item['ATOM:ID'][0]['VALUE']; - } - else if ($item['ID'][0]['VALUE']) { // ATOM 0.3, 1.0 - $guid = $item['ID'][0]['VALUE']; - } - else { - // feed may contain duplicated links for different items, so we try to generate unique ID for each item - $guid = md5("$title - . " . $feed->fid); - } - // TODO: is there anyway to check if DC:IDENTIFIER is unique? - // http://dublincore.org/documents/usageguide/elements.shtml says it can be non-unique so useles for us :( - - $entry = NULL; - if ($guid && strlen($guid) > 0) { - $entry = db_fetch_object(db_query("SELECT nid FROM {planet_items} WHERE guid = '%s' AND fid = %d", $guid, $feed->fid)); - } - else if ($link && $link != $feed->link && $link != $feed->url) { - $entry = db_fetch_object(db_query("SELECT nid FROM {planet_items} WHERE guid = '%s' AND fid = %d", $link, $feed->fid)); - } - else { - $entry = db_fetch_object(db_query("SELECT ai.nid AS nid FROM {node} n, {planet_items} ai WHERE ai.fid = %d AND ai.nid = n.nid AND n.title = '%s'", $feed->fid, $title)); - } - - //print $guid .'
'; - //print $entry->nid .'
'; - // Ignore items already existing in database and not allowed to be updated - - //Fields to update in either case - $entry->changed = strtotime($date); - $entry->title = $title; - $entry->body = $body; - $entry->body = planet_convert_relative_urls($body, $link); - $entry->teaser = node_teaser($entry->body); - $entry->revision = true; - - //Fields to set if it's a new item. - if (!isset($entry->nid)) { - //print "Planet item " . $entry->title . "
"; - $entry->type = 'planet'; - - $options = variable_get('node_options_planet', array()); - - $entry->uid = $feed->uid; - $entry->status = 1; - $entry->moderate = 0; - $entry->promote = in_array('promote', $options) ? 1 : 0; - $entry->sticky = in_array('sticky', $options) ? 1 : 0; - $entry->comment = in_array('comment', $options) ? 2 : 0; - $entry->format = variable_get('planet_filter_formats', 1); - $entry->created = strtotime($date); - $entry->revision = true; - - $terms = module_invoke('taxonomy', 'node_get_terms', $edit->nid, 'tid'); - foreach ($terms as $tid => $term) { - if ($term->tid) { - $edit->taxonomy[] = $term->tid; - } - } - //print '
'. print_r($entry, 1) .'
'; - node_save($entry); - db_query('INSERT INTO {planet_items} (fid, nid, guid, link, created) VALUES(%d, %d, "%s", "%s", UNIX_TIMESTAMP(NOW()))', $feed->fid, $entry->nid, $guid, $link); - watchdog('planet', 'Adding '. $title); - drupal_set_message('Adding '. $title); - } - } - - return $items_added; -} - - -/** - * Private function; parses given XML data and returns array - */ -function planet_parse_xml(&$data) { - global $xml_tree, $xml_paths, $xml_path_cur; - $xml_tree = array(); - $xml_paths[] = &$xml_tree; - $xml_path_cur = 0; - - $_start = microtime(); - - // Some feeds already use CDATA but in "wrong way": http://www.rocketboom.com/vlog/quicktime_daily_enclosures.xml (ie. something - $data = trim(str_replace(array(''), '', $data)); - - // Add CDATA around content which may contain (x)html data, and is not contained in CDATA yet - $src = array( - '%(<(link|content|content:encoded|description|title|summary|info|tagline|copyright|source|itunes:summary|media:text|text)(?>[^<]*(?)(?!)%sUS', - '%24:(\d\d:\d\d)%' // workaround buggy hour format in feeds - /*'%(<(\w+)(?>[^<]*type=")(?:text/html|application/xhtml\+xml|html|xhtml")(?>[^<]*(?)(?!)%sUS'*/ - ); - $dst = array( - '$1$4', - '00:$1' - ); - $data = preg_replace($src, $dst, $data); - - // parse the data: - $xml_parser = drupal_xml_parser_create($data); - if ($xml_parser == NULL) { - return $xml_tree; - } - - xml_set_element_handler($xml_parser, 'planet_element_start', 'planet_element_end'); - xml_set_character_data_handler($xml_parser, 'planet_element_data'); - xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 1); - xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE, 1); - if (!xml_parse($xml_parser, $data, 1)) { - $xml_tree['parser_error'] = xml_error_string(xml_get_error_code($xml_parser)); - $xml_tree['parser_line'] = xml_get_current_line_number($xml_parser); - } - else { - unset($xml_tree['parser_error']); - unset($xml_tree['parser_line']); - } - xml_parser_free($xml_parser); - - $_end = microtime(); - - list($sec, $usec) = explode(' ', $_start); - $_start = $sec + $usec; - list($sec, $usec) = explode(' ', $_end); - $xml_tree['parser_time'] = ($sec + $usec) - $_start; - - return $xml_tree; -} - -/** - * Private call-back function used by the XML parser. - */ -function planet_element_start($parser, $name, $attributes) { - global $xml_tree, $xml_paths, $xml_path_cur; - - $temp = &$xml_paths[$xml_path_cur++]; - $temp[$name][] = $attributes; - $xml_paths[$xml_path_cur] = &$temp[$name][count($temp[$name])-1]; -} - -/** - * Private call-back function used by the XML parser. - */ -function planet_element_end($parser, $name) { - global $xml_tree, $xml_paths, $xml_path_cur; - - $temp = &$xml_paths[$xml_path_cur]; - array_pop($xml_paths); - $xml_path_cur--; - if (isset($temp['VALUE'])) { - $temp['VALUE'] = trim(planet_replace_entities($temp['VALUE'])); - } -} - -/** - * Private call-back function used by the XML parser. - */ -function planet_element_data($parser, $data) { - global $xml_tree, $xml_paths, $xml_path_cur; - - $temp = trim($data); - if (strlen($temp) > 0) { - $temp = &$xml_paths[$xml_path_cur]; - $temp['VALUE'] .= $data; - } -} - function planet_page_last() { global $user; @@ -1388,27 +589,6 @@ } } -/** - * Menu callback; displays a Drupal page containing recent planet entries. - */ -function planet_page($a = NULL, $b = NULL) { - - if (is_numeric($a)) { // $a is a user ID - if ($b == 'feed') { - return planet_feed_user($a); - } - else { - return planet_page_user($a); - } - } - else if ($a == 'feed') { - return planet_feed_last(); - } - else { - return planet_page_last(); - } -} - function planet_page_user($uid) { global $user; @@ -1453,3 +633,114 @@ return $form; } +/** + * Turn each feed item into a node. + * + * @param $process_feed + * Feed node object + * @param $feed + * SimplePie feed object instaniated. + */ +function planet_item_feed_parse($process_feed, $feed) { + // loop through all of the items in the feed, faster than foreach + $max = $feed->get_item_quantity(); + $count = 0; + module_load_include('inc', 'node', 'node.pages'); + module_load_include('inc', 'node', 'content_types'); + $node = node_get_types('type', 'feed_item'); + + for ($i = 0; $i < $max; $i++) { + $item = $feed->get_item($i); + + // we don't use $item->get_id(true) from SimplePie because it is slightly buggy + // and requires a lot of overhead to compute each time (since it uses a gigantic array structure) + // instead we opt for a much lighter weight comparison of just the title and body, eliminating the + // possibility of any date changes or other tiny changes causing duplicate nodes that otherwise + // appear to be the same + // that is why the body and title processing appears out here, so we can check for duplicates + // it is fast enough to not make much of a difference otherwise + $body = $item->get_content(); + // this strips out any tags that may appear as in the title, and makes sure " -> " for display + $title = strip_tags(decode_entities($item->get_title())); + + // some feeds don't provide titles so we construct one with the first 72 characters of the body + if (!$title) { + // remove any HTML or line breaks so these don't appear in the title + $title = trim(str_replace(array("\n", "\r"), ' ', strip_tags($body))); + $title = trim(substr($title, 0, 72)); + $lastchar = substr($title, -1, 1); + // check to see if the last character in the title is a non-alphanumeric character, except for ? or ! + // if it is strip it off so you don't get strange looking titles + if (preg_match('/[^0-9A-Za-z\!\?]/', $lastchar)) { + $title = substr($title, 0, -1); + } + // ? and ! are ok to end a title with since they make sense + if ($lastchar != '!' and $lastchar != '?') { + $title .= '...'; + } + } + + // unique id for each feed item, try and use item permalink, otherwise use feed permalink + if (!$link = $item->get_permalink()) { + $link = $feed->get_permalink(); + } + // we don't need serialize() since we already have strings + $iid = md5($title . $link); + $guid = md5("$title - . " . $process_feed->fid); + // make sure we don't already have this feed item + $duplicate = db_result(db_query("SELECT COUNT(iid) FROM {planet_items} WHERE iid = '%s'", $iid)); + + if (!$duplicate) { + + + $entry = NULL; + if ($guid && strlen($guid) > 0) { + $entry = db_fetch_object(db_query("SELECT nid FROM {planet_items} WHERE guid = '%s' AND fid = %d", $guid, $process_feed->fid)); + } + else if ($link && $link != $feed->link && $link != $feed->url) { + $entry = db_fetch_object(db_query("SELECT nid FROM {planet_items} WHERE guid = '%s' AND fid = %d", $link, $process_feed->fid)); + } + else { + $entry = db_fetch_object(db_query("SELECT ai.nid AS nid FROM {node} n, {planet_items} ai WHERE ai.fid = %d AND ai.nid = n.nid AND n.title = '%s'", $process_feed->fid, $title)); + } + + $link = $item->get_permalink(); + // this is node created date format for Drupal + $date = $item->get_date('Y-m-d H:i:s O'); + + $entry->changed = $date; + $entry->title = $title; + $entry->body = $body; + $entry->body = planet_convert_relative_urls($body, $link); + $entry->teaser = node_teaser($entry->body); + $entry->revision = true; + + if (!isset($entry->nid)) { + //print "Planet item " . $entry->title . "
"; + $entry->type = 'planet'; + + $options = variable_get('node_options_planet', array()); + + $entry->uid = $process_feed->uid; + $entry->status = 1; + $entry->moderate = 0; + $entry->promote = in_array('promote', $options) ? 1 : 0; + $entry->sticky = in_array('sticky', $options) ? 1 : 0; + $entry->comment = in_array('comment', $options) ? 2 : 0; + $entry->format = variable_get('planet_filter_formats', 1); + $entry->created = $date; + $entry->revision = true; + + } + + node_save($entry); + db_query('INSERT INTO {planet_items} (fid, nid, iid, guid, link, created) VALUES(%d, %d, "%s", "%s", "%s", UNIX_TIMESTAMP(NOW()))', $process_feed->fid, $entry->nid, $iid, $guid, $link); + watchdog('planet', 'Adding '. $title); + drupal_set_message('Adding '. $title); + } + + // we unset $item each time to prevent any pass by reference memory leaks that PHP encounters with objects in foreach loops + unset($item); + } + +} \ Pas de fin de ligne à la fin du fichier. diff -Naur planet-6.x-1.3/README.txt 6.x/README.txt --- planet-6.x-1.3/README.txt 2006-08-25 16:11:43.000000000 -0400 +++ 6.x/README.txt 2009-03-19 11:10:42.000000000 -0400 @@ -12,8 +12,11 @@ 1. Copy the planet directory into your Drupal modules directory. -2. Go to admin/modules and enable the planet module. +2. Download the SimplePie 1.1 library: http://simplepie.org/ + Place simplepie.inc in your SimpleFeed module directory. -3. To perform further configuration (and to add feeds), go to admin/settings/planet. +3. Go to admin/modules and enable the planet module. + +4. To perform further configuration (and to add feeds), go to admin/settings/planet.