Index: modules/aggregator/aggregator.admin.inc =================================================================== RCS file: /cvs/drupal/drupal/modules/aggregator/aggregator.admin.inc,v retrieving revision 1.21 diff -u -r1.21 aggregator.admin.inc --- modules/aggregator/aggregator.admin.inc 8 Nov 2008 07:12:18 -0000 1.21 +++ modules/aggregator/aggregator.admin.inc 1 Dec 2008 19:03:07 -0000 @@ -52,36 +52,36 @@ * @see aggregator_form_feed_validate() * @see aggregator_form_feed_submit() */ -function aggregator_form_feed(&$form_state, $edit = array('refresh' => 900, 'block' => 5, 'title' => '', 'url' => '', 'fid' => NULL)) { +function aggregator_form_feed(&$form_state, $feed = NULL) { $period = drupal_map_assoc(array(900, 1800, 3600, 7200, 10800, 21600, 32400, 43200, 64800, 86400, 172800, 259200, 604800, 1209600, 2419200), 'format_interval'); - if ($edit['refresh'] == '') { - $edit['refresh'] = 3600; + if (empty($feed->refresh)) { + $feed->refresh = 3600; } $form['title'] = array('#type' => 'textfield', '#title' => t('Title'), - '#default_value' => $edit['title'], + '#default_value' => isset($feed->title) ? $feed->title : '', '#maxlength' => 255, '#description' => t('The name of the feed (or the name of the website providing the feed).'), '#required' => TRUE, ); $form['url'] = array('#type' => 'textfield', '#title' => t('URL'), - '#default_value' => $edit['url'], + '#default_value' => isset($feed->url) ? $feed->url : '', '#maxlength' => 255, '#description' => t('The fully-qualified URL of the feed.'), '#required' => TRUE, ); $form['refresh'] = array('#type' => 'select', '#title' => t('Update interval'), - '#default_value' => $edit['refresh'], + '#default_value' => isset($feed->refresh) ? $feed->refresh : 900, '#options' => $period, '#description' => t('The length of time between feed updates. Requires a correctly configured cron maintenance task.', array('@cron' => url('admin/reports/status'))), ); $form['block'] = array('#type' => 'select', '#title' => t('News items in block'), - '#default_value' => $edit['block'], + '#default_value' => isset($feed->block) ? $feed->block : 5, '#options' => drupal_map_assoc(array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)), '#description' => t("Drupal can make a block with the most recent news items of this feed. You can configure blocks to be displayed in the sidebar of your page. This setting lets you configure the number of news items to show in this feed's block. If you choose '0' this feed's block will be disabled.", array('@block-admin' => url('admin/build/block'))), ); @@ -89,11 +89,12 @@ // Handling of categories. $options = array(); $values = array(); - $categories = db_query('SELECT c.cid, c.title, f.fid FROM {aggregator_category} c LEFT JOIN {aggregator_category_feed} f ON c.cid = f.cid AND f.fid = :fid ORDER BY title', array(':fid' => $edit['fid'])); + $categories = db_query('SELECT c.cid, c.title, f.fid FROM {aggregator_category} c LEFT JOIN {aggregator_category_feed} f ON c.cid = f.cid AND f.fid = :fid ORDER BY title', array(':fid' => isset($feed->fid) ? $feed->fid : NULL)); foreach ($categories as $category) { $options[$category->cid] = check_plain($category->title); if ($category->fid) $values[] = $category->cid; } + if ($options) { $form['category'] = array( '#type' => 'checkboxes', @@ -108,14 +109,14 @@ '#value' => t('Save'), ); - if ($edit['fid']) { + if (!empty($feed->fid)) { $form['delete'] = array( '#type' => 'submit', '#value' => t('Delete'), ); $form['fid'] = array( '#type' => 'hidden', - '#value' => $edit['fid'], + '#value' => $feed->fid, ); } @@ -200,7 +201,7 @@ '#value' => $feed, ), ), - t('Are you sure you want to remove all items from the feed %feed?', array('%feed' => $feed['title'])), + t('Are you sure you want to remove all items from the feed %feed?', array('%feed' => $feed->title)), 'admin/content/aggregator', t('This action cannot be undone.'), t('Remove items'), @@ -386,37 +387,105 @@ * Form builder; Configure the aggregator system. * * @ingroup forms - * @see system_settings_form() */ -function aggregator_admin_settings() { - $items = array(0 => t('none')) + drupal_map_assoc(array(3, 5, 10, 15, 20, 25), '_aggregator_items'); - $period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 4838400, 9676800), 'format_interval'); - - $form['aggregator_allowed_html_tags'] = array( - '#type' => 'textfield', '#title' => t('Allowed HTML tags'), '#size' => 80, '#maxlength' => 255, - '#default_value' => variable_get('aggregator_allowed_html_tags', '
      • '), - '#description' => t('A space-separated list of HTML tags allowed in the content of feed items. (Tags in this list are not removed by Drupal.)'), - ); - - $form['aggregator_summary_items'] = array( - '#type' => 'select', '#title' => t('Items shown in sources and categories pages') , - '#default_value' => variable_get('aggregator_summary_items', 3), '#options' => $items, - '#description' => t('Number of feed items displayed in feed and category summary pages.'), - ); +function aggregator_admin_form($form_state) { + + // Make sure configuration is sane. + aggregator_sanitize_configuration(); + + // Get all available fetchers. + $fetchers = module_implements('aggregator_fetch'); + foreach ($fetchers as $k => $module) { + if ($info = module_invoke($module, 'aggregator_fetch_info')) { + $label = $info['title'] . ' ' . $info['description'] . ''; + } + else { + $label = $module; + } + unset($fetchers[$k]); + $fetchers[$module] = $label; + } + + // Get all available parsers. + $parsers = module_implements('aggregator_parse'); + foreach ($parsers as $k => $module) { + if ($info = module_invoke($module, 'aggregator_parse_info')) { + $label = $info['title'] . ' ' . $info['description'] . ''; + } + else { + $label = $module; + } + unset($parsers[$k]); + $parsers[$module] = $label; + } + + // Get all available processors. + $processors = module_implements('aggregator_process'); + foreach ($processors as $k => $module) { + if ($info = module_invoke($module, 'aggregator_process_info')) { + $label = $info['title'] . ' ' . $info['description'] . ''; + } + else { + $label = $module; + } + unset($processors[$k]); + $processors[$module] = $label; + } + + // Only show basic configuration if there are actually options. + $basic_conf = array(); + if (count($fetchers) > 1) { + $basic_conf['aggregator_fetcher'] = array( + '#type' => 'radios', + '#title' => t('Fetcher'), + '#description' => t('Fetchers download data from an external source. Choose a fetcher suitable for the external source you would like to download from.'), + '#options' => $fetchers, + '#default_value' => variable_get('aggregator_fetcher', 'aggregator'), + ); + } + if (count($parsers) > 1) { + $basic_conf['aggregator_parser'] = array( + '#type' => 'radios', + '#title' => t('Parser'), + '#description' => t('Parsers transform downloaded data into standard structures. Choose a parser suitable for the type of feeds you would like to aggregate.'), + '#options' => $parsers, + '#default_value' => variable_get('aggregator_parser', 'aggregator'), + ); + } + if (count($processors) > 1) { + $basic_conf['aggregator_processors'] = array( + '#type' => 'checkboxes', + '#title' => t('Processors'), + '#description' => t('Processors act on parsed feed data, for example they store feed items. Choose the processors suitable for your task.'), + '#options' => $processors, + '#default_value' => variable_get('aggregator_processors', array('aggregator')), + ); + } + if (count($basic_conf)) { + $form['basic_conf'] = array( + '#type' => 'fieldset', + '#title' => t('Basic configuration'), + '#description' => t('For most aggregation tasks, the default settings are fine.'), + '#collapsible' => TRUE, + '#collapsed' => FALSE, + ); + $form['basic_conf'] += $basic_conf; + } + + // Implementing modules will expect an array at $form['modules']. + $form['modules'] = array(); - $form['aggregator_clear'] = array( - '#type' => 'select', '#title' => t('Discard items older than'), - '#default_value' => variable_get('aggregator_clear', 9676800), '#options' => $period, - '#description' => t('The length of time to retain feed items before discarding. (Requires a correctly configured cron maintenance task.)', array('@cron' => url('admin/reports/status'))), + $form['submit'] = array( + '#type' => 'submit', + '#value' => t('Save configuration'), ); - $form['aggregator_category_selector'] = array( - '#type' => 'radios', '#title' => t('Category selection type'), '#default_value' => variable_get('aggregator_category_selector', 'checkboxes'), - '#options' => array('checkboxes' => t('checkboxes'), 'select' => t('multiple selector')), - '#description' => t('The type of category selection widget displayed on categorization pages. (For a small number of categories, checkboxes are easier to use, while a multiple selector works well with large numbers of categories.)'), - ); + return $form; +} - return system_settings_form($form); +function aggregator_admin_form_submit($form, &$form_state) { + $form_state['values']['aggregator_processors'] = array_filter($form_state['values']['aggregator_processors']); + system_settings_form_submit($form, $form_state); } /** Index: modules/aggregator/aggregator.api.php =================================================================== RCS file: modules/aggregator/aggregator.api.php diff -N modules/aggregator/aggregator.api.php --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ modules/aggregator/aggregator.api.php 1 Dec 2008 19:03:07 -0000 @@ -0,0 +1,194 @@ +url contains the link to the feed. Download the data at the URL + * and expose it to other modules by attaching it to $feed->source_string. + * + * @see hook_aggregator_fetch_info() + * @see hook_aggregator_parse() + * @see hook_aggregator_process() + * + * @ingroup aggregator + */ +function hook_aggregator_fetch($feed) { + $feed->source_string = mymodule_fetch($feed->url); +} + +/** + * Implement this hook to expose the title and a short description of your + * fetcher. + * + * The title and the description provided are shown on + * admin/content/aggregator/settings among other places. Use as title the human + * readable name of the fetcher and as description a brief (40 to 80 characters) + * explanation of the fetcher's functionality. + * + * This hook is only called if your module implements hook_aggregator_fetch(). + * If this hook is not implemented aggregator will use your module's file name + * as title and there will be no description. + * + * @return + * An associative array defining a title and a description string. + * + * @see hook_aggregator_fetch() + * + * @ingroup aggregator + */ +function hook_aggregator_fetch_info() { + return array( + 'title' => t('Default fetcher'), + 'description' => t('Default fetcher for resources available by URL.'), + ); +} + +/** + * Implement this hook to create an alternative parser for aggregator module. + * + * A parser converts feed item data to a common format. The parser is called + * at the second of the three aggregation stages: data is downloaded by the + * active fetcher, it is converted to a common format by the active parser and + * finally, it is passed to all active processors which manipulate or store the + * data. + * + * Modules that define this hook can be set as active parser on + * admin/content/aggregator/settings. Only one parser can be active at a time. + * + * @param $feed + * The $feed object that describes the resource to be parsed. + * $feed->source_string contains the raw feed data as a string. Parse data + * from $feed->source_string and expose it to other modules as an array of + * data items on $feed->items. + * + * By convention, the common format for a single feed item is: + * $item[key-name] = value; + * + * Recognized keys: + * TITLE (string) - the title of a feed item + * DESCRIPTION (string) - the description (body text) of a feed item + * TIMESTAMP (UNIX timestamp) - the feed item's published time as UNIX timestamp + * AUTHOR (string) - the feed item's author + * GUID (string) - RSS/Atom global unique identifier + * LINK (string) - the feed item's URL + * + * @see hook_aggregator_parse_info() + * @see hook_aggregator_fetch() + * @see hook_aggregator_process() + * + * @ingroup aggregator + */ +function hook_aggregator_parse($feed) { + $feed->items = mymodule_parse($feed->source_string); +} + +/** + * Implement this hook to expose the title and a short description of your + * parser. + * + * The title and the description provided are shown on + * admin/content/aggregator/settings among other places. Use as title the human + * readable name of the parser and as description a brief (40 to 80 characters) + * explanation of the parser's functionality. + * + * This hook is only called if your module implements hook_aggregator_parse(). + * If this hook is not implemented aggregator will use your module's file name + * as title and there will be no description. + * + * @return + * An associative array defining a title and a description string. + * + * @see hook_aggregator_parse() + * + * @ingroup aggregator + */ +function hook_aggregator_parse_info() { + return array( + 'title' => t('Default parser'), + 'description' => t('Default parser for RSS, Atom and RDF feeds.'), + ); +} + +/** + * Implement this hook to create a processor for aggregator module. + * + * A processor acts on parsed feed data. Active processors are called at the + * third and last of the aggregation stages: data is downloaded by the active + * fetcher, it is converted to a common format by the active parser and + * finally, it is passed to all active processors which manipulate or store the + * data. + * + * Modules that define this hook can be activated as processor on + * admin/content/aggregator/settings. + * + * @param $feed + * The $feed object that describes the resource to be processed. $feed->items + * contains an array of feed items downloaded and parsed at the parsing + * stage. See hook_aggregator_parse() for the basic format of a single item + * in the $feed->items array. For the exact format refer to the particular + * parser in use. + * + * @see hook_aggregator_process_info() + * @see hook_aggregator_fetch() + * @see hook_aggregator_parse() + * + * @ingroup aggregator + */ +function hook_aggregator_process($feed) { + foreach ($feed->items as $item) { + mymodule_save($item); + } +} + +/** + * Implement this hook to expose the title and a short description of your + * processor. + * + * The title and the description provided are shown most importantly on + * admin/content/aggregator/settings . Use as title the natural name of the + * processor and as description a brief (40 to 80 characters) explanation of + * the functionality. + * + * This hook is only called if your module implements + * hook_aggregator_process(). If this hook is not implemented aggregator + * will use your module's file name as title and there will be no description. + * + * @return + * An associative array defining a title and a description string. + * + * @see hook_aggregator_process() + * + * @ingroup aggregator + */ +function hook_aggregator_process_info($feed) { + return array( + 'title' => t('Default processor'), + 'description' => t('Creates lightweight records of feed items.'), + ); +} + +/** + * @} End of "addtogroup hooks". + */ \ No newline at end of file Index: modules/aggregator/aggregator.fetcher.inc =================================================================== RCS file: modules/aggregator/aggregator.fetcher.inc diff -N modules/aggregator/aggregator.fetcher.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ modules/aggregator/aggregator.fetcher.inc 1 Dec 2008 19:03:07 -0000 @@ -0,0 +1,74 @@ + t('Default fetcher'), + 'description' => t('Downloads data from a URL using Drupal\'s HTTP request handler.'), + ); +} + +/** + * Implementation of hook_aggregator_fetch(). + */ +function aggregator_aggregator_fetch($feed) { + $feed->source_string = FALSE; + + // Generate conditional GET headers. + $headers = array(); + if ($feed->etag) { + $headers['If-None-Match'] = $feed->etag; + } + if ($feed->modified) { + $headers['If-Modified-Since'] = gmdate('D, d M Y H:i:s', $feed->modified) . ' GMT'; + } + + // Request feed. + $result = drupal_http_request($feed->url, $headers); + + // Process HTTP response code. + switch ($result->code) { + case 304: + db_update('aggregator_feed') + ->fields(array('checked' => REQUEST_TIME)) + ->condition('fid', $feed->fid) + ->execute(); + drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed->title))); + break; + case 301: + $feed->url = $result->redirect_url; + $feed->redirected = TRUE; + // Do not break here. + case 200: + case 302: + case 307: + // We store the md5 hash of feed data in the database. When refreshing a + // feed we compare stored hash and new hash calculated from downloaded + // data. If both are equal we say that feed is not updated. + $md5 = md5($result->data); + if ($feed->hash == $md5) { + db_update('aggregator_feed') + ->condition('fid', $feed->fid) + ->fields(array('checked' => REQUEST_TIME)) + ->execute(); + drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed->title))); + break; + } + + $feed->source_string = $result->data; + $feed->http_headers = $result->headers; + break; + default: + watchdog('aggregator', 'The feed from %site seems to be broken, due to "%error".', array('%site' => $feed->title, '%error' => $result->code . ' ' . $result->error), WATCHDOG_WARNING); + drupal_set_message(t('The feed from %site seems to be broken, because of error "%error".', array('%site' => $feed->title, '%error' => $result->code . ' ' . $result->error))); + module_invoke('system', 'check_http_request'); + } +} Index: modules/aggregator/aggregator.info =================================================================== RCS file: /cvs/drupal/drupal/modules/aggregator/aggregator.info,v retrieving revision 1.9 diff -u -r1.9 aggregator.info --- modules/aggregator/aggregator.info 11 Oct 2008 02:32:33 -0000 1.9 +++ modules/aggregator/aggregator.info 1 Dec 2008 19:03:07 -0000 @@ -8,4 +8,7 @@ files[] = aggregator.module files[] = aggregator.admin.inc files[] = aggregator.pages.inc +files[] = aggregator.fetcher.inc +files[] = aggregator.parser.inc +files[] = aggregator.processor.inc files[] = aggregator.install Index: modules/aggregator/aggregator.install =================================================================== RCS file: /cvs/drupal/drupal/modules/aggregator/aggregator.install,v retrieving revision 1.19 diff -u -r1.19 aggregator.install --- modules/aggregator/aggregator.install 15 Nov 2008 13:01:04 -0000 1.19 +++ modules/aggregator/aggregator.install 1 Dec 2008 19:03:07 -0000 @@ -20,6 +20,9 @@ variable_del('aggregator_summary_items'); variable_del('aggregator_clear'); variable_del('aggregator_category_selector'); + variable_del('aggregator_fetcher'); + variable_del('aggregator_parser'); + variable_del('aggregator_processors'); } /** Index: modules/aggregator/aggregator.module =================================================================== RCS file: /cvs/drupal/drupal/modules/aggregator/aggregator.module,v retrieving revision 1.401 diff -u -r1.401 aggregator.module --- modules/aggregator/aggregator.module 16 Nov 2008 04:38:15 -0000 1.401 +++ modules/aggregator/aggregator.module 1 Dec 2008 19:03:07 -0000 @@ -133,7 +133,7 @@ $items['admin/content/aggregator/settings'] = array( 'title' => 'Settings', 'page callback' => 'drupal_get_form', - 'page arguments' => array('aggregator_admin_settings'), + 'page arguments' => array('aggregator_admin_form'), 'type' => MENU_LOCAL_TASK, 'weight' => 10, 'access arguments' => array('administer news feeds'), @@ -289,7 +289,7 @@ function aggregator_cron() { $result = db_query('SELECT * FROM {aggregator_feed} WHERE checked + refresh < :time', array(':time' => REQUEST_TIME)); foreach ($result as $feed) { - aggregator_refresh((array)$feed); + aggregator_refresh($feed); } } @@ -326,8 +326,8 @@ elseif ($op == 'save') { list($type, $id) = explode('-', $delta); if ($type == 'category') { - db_merge('aggregator_category') - ->key(array('cid' => $id)) + db_update('aggregator_category') + ->condition('cid', $id) ->fields(array('block' => $edit['block'])) ->execute(); } @@ -406,6 +406,7 @@ ->fields(array( 'title' => $edit['title'], 'description' => $edit['description'], + 'block' => 5, )) ->execute(); $op = 'insert'; @@ -495,7 +496,7 @@ * An associative array describing the feed to be cleared. */ function aggregator_remove($feed) { - $iids = db_query('SELECT iid FROM {aggregator_item} WHERE fid = :fid', array(':fid' => $feed['fid']))->fetchCol(); + $iids = db_query('SELECT iid FROM {aggregator_item} WHERE fid = :fid', array(':fid' => $feed->fid))->fetchCol(); if ($iids) { db_delete('aggregator_category_item') ->condition('iid', $iids, 'IN') @@ -503,132 +504,19 @@ } db_delete('aggregator_item') - ->condition('fid', $feed['fid']) + ->condition('fid', $feed->fid) ->execute(); db_merge('aggregator_feed') - ->key(array('fid' => $feed['fid'])) + ->key(array('fid' => $feed->fid)) ->fields(array( 'checked' => 0, 'hash' => '', 'modified' => 0, - 'description' => $feed['description'], - 'image' => $feed['image'], + 'description' => $feed->description, + 'image' => $feed->image, )) ->execute(); - drupal_set_message(t('The news items from %site have been removed.', array('%site' => $feed['title']))); -} - -/** - * Callback function used by the XML parser. - */ -function aggregator_element_start($parser, $name, $attributes) { - global $item, $element, $tag, $items, $channel; - - switch ($name) { - case 'IMAGE': - case 'TEXTINPUT': - case 'CONTENT': - case 'SUMMARY': - case 'TAGLINE': - case 'SUBTITLE': - case 'LOGO': - case 'INFO': - $element = $name; - break; - case 'ID': - if ($element != 'ITEM') { - $element = $name; - } - case 'LINK': - if (!empty($attributes['REL']) && $attributes['REL'] == 'alternate') { - if ($element == 'ITEM') { - $items[$item]['LINK'] = $attributes['HREF']; - } - else { - $channel['LINK'] = $attributes['HREF']; - } - } - break; - case 'ITEM': - $element = $name; - $item += 1; - break; - case 'ENTRY': - $element = 'ITEM'; - $item += 1; - break; - } - - $tag = $name; -} - -/** - * Call-back function used by the XML parser. - */ -function aggregator_element_end($parser, $name) { - global $element; - - switch ($name) { - case 'IMAGE': - case 'TEXTINPUT': - case 'ITEM': - case 'ENTRY': - case 'CONTENT': - case 'INFO': - $element = ''; - break; - case 'ID': - if ($element == 'ID') { - $element = ''; - } - } -} - -/** - * Callback function used by the XML parser. - */ -function aggregator_element_data($parser, $data) { - global $channel, $element, $items, $item, $image, $tag; - $items += array($item => array()); - switch ($element) { - case 'ITEM': - $items[$item] += array($tag => ''); - $items[$item][$tag] .= $data; - break; - case 'IMAGE': - case 'LOGO': - $image += array($tag => ''); - $image[$tag] .= $data; - break; - case 'LINK': - if ($data) { - $items[$item] += array($tag => ''); - $items[$item][$tag] .= $data; - } - break; - case 'CONTENT': - $items[$item] += array('CONTENT' => ''); - $items[$item]['CONTENT'] .= $data; - break; - case 'SUMMARY': - $items[$item] += array('SUMMARY' => ''); - $items[$item]['SUMMARY'] .= $data; - break; - case 'TAGLINE': - case 'SUBTITLE': - $channel += array('DESCRIPTION' => ''); - $channel['DESCRIPTION'] .= $data; - break; - case 'INFO': - case 'ID': - case 'TEXTINPUT': - // The sub-element is not supported. However, we must recognize - // it or its contents will end up in the item array. - break; - default: - $channel += array($tag => ''); - $channel[$tag] .= $data; - } + drupal_set_message(t('The news items from %site have been removed.', array('%site' => $feed->title))); } /** @@ -638,311 +526,26 @@ * An associative array describing the feed to be refreshed. */ function aggregator_refresh($feed) { - global $channel, $image; - - // Generate conditional GET headers. - $headers = array(); - if ($feed['etag']) { - $headers['If-None-Match'] = $feed['etag']; - } - if ($feed['modified']) { - $headers['If-Modified-Since'] = gmdate('D, d M Y H:i:s', $feed['modified']) . ' GMT'; - } - - // Request feed. - $result = drupal_http_request($feed['url'], $headers); - - // Process HTTP response code. - switch ($result->code) { - case 304: - db_update('aggregator_feed') - ->fields(array('checked' => REQUEST_TIME)) - ->condition('fid', $feed['fid']) - ->execute(); - drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed['title']))); - break; - case 301: - $feed['url'] = $result->redirect_url; - // Do not break here. - case 200: - case 302: - case 307: - // We store the md5 hash of feed data in the database. When refreshing a - // feed we compare stored hash and new hash calculated from downloaded - // data. If both are equal we say that feed is not updated. - $md5 = md5($result->data); - if ($feed['hash'] == $md5) { - db_update('aggregator_feed') - ->condition('fid', $feed['fid']) - ->fields(array('checked' => REQUEST_TIME)) - ->execute(); - drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed['title']))); - break; - } - - // Filter the input data. - if (aggregator_parse_feed($result->data, $feed)) { - $modified = empty($result->headers['Last-Modified']) ? 0 : strtotime($result->headers['Last-Modified']); - - // Prepare the channel data. - foreach ($channel as $key => $value) { - $channel[$key] = trim($value); - } - - // Prepare the image data (if any). - foreach ($image as $key => $value) { - $image[$key] = trim($value); - } - - if (!empty($image['LINK']) && !empty($image['URL']) && !empty($image['TITLE'])) { - $image = l(theme('image', $image['URL'], $image['TITLE']), $image['LINK'], array('html' => TRUE)); - } - else { - $image = ''; - } - - $etag = empty($result->headers['ETag']) ? '' : $result->headers['ETag']; - // Update the feed data. - db_merge('aggregator_feed') - ->key(array('fid' => $feed['fid'])) - ->fields(array( - 'url' => $feed['url'], - 'checked' => REQUEST_TIME, - 'link' => $channel['LINK'], - 'description' => $channel['DESCRIPTION'], - 'image' => $image, - 'hash' => $md5, - 'etag' => $etag, - 'modified' => $modified, - )) - ->execute(); - - // Clear the cache. - cache_clear_all(); - - if (isset($result->redirect_url)) { - watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed['title'], '%url' => $feed['url'])); - } - - watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed['title'])); - drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed['title']))); - } - break; - default: - watchdog('aggregator', 'The feed from %site seems to be broken, due to "%error".', array('%site' => $feed['title'], '%error' => $result->code . ' ' . $result->error), WATCHDOG_WARNING); - drupal_set_message(t('The feed from %site seems to be broken, because of error "%error".', array('%site' => $feed['title'], '%error' => $result->code . ' ' . $result->error))); - module_invoke('system', 'check_http_request'); - } -} - -/** - * Parse the W3C date/time format, a subset of ISO 8601. - * - * PHP date parsing functions do not handle this format. - * See http://www.w3.org/TR/NOTE-datetime for more information. - * Originally from MagpieRSS (http://magpierss.sourceforge.net/). - * - * @param $date_str - * A string with a potentially W3C DTF date. - * @return - * A timestamp if parsed successfully or FALSE if not. - */ -function aggregator_parse_w3cdtf($date_str) { - if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) { - list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); - // Calculate the epoch for current date assuming GMT. - $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year); - if ($match[10] != 'Z') { // Z is zulu time, aka GMT - list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]); - // Zero out the variables. - if (!$tz_hour) { - $tz_hour = 0; - } - if (!$tz_min) { - $tz_min = 0; - } - $offset_secs = (($tz_hour * 60) + $tz_min) * 60; - // Is timezone ahead of GMT? If yes, subtract offset. - if ($tz_mod == '+') { - $offset_secs *= -1; - } - $epoch += $offset_secs; - } - return $epoch; - } - else { - return FALSE; - } -} - -/** - * Parse a feed and store its items. - * - * @param $data - * The feed data. - * @param $feed - * An associative array describing the feed to be parsed. - * @return - * FALSE on error, TRUE otherwise. - */ -function aggregator_parse_feed(&$data, $feed) { - global $items, $image, $channel; - - // Unset the global variables before we use them. - unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']); - $items = array(); - $image = array(); - $channel = array(); - - // Parse the data. - $xml_parser = drupal_xml_parser_create($data); - xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end'); - xml_set_character_data_handler($xml_parser, 'aggregator_element_data'); - - if (!xml_parse($xml_parser, $data, 1)) { - watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser)), WATCHDOG_WARNING); - drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error'); - return FALSE; - } - xml_parser_free($xml_parser); - - // We reverse the array such that we store the first item last, and the last - // item first. In the database, the newest item should be at the top. - $items = array_reverse($items); - - // Initialize variables. - $title = $link = $author = $description = $guid = NULL; - foreach ($items as $item) { - unset($title, $link, $author, $description, $guid); - - // Prepare the item: - foreach ($item as $key => $value) { - $item[$key] = trim($value); - } - - // Resolve the item's title. If no title is found, we use up to 40 - // characters of the description ending at a word boundary, but not - // splitting potential entities. - if (!empty($item['TITLE'])) { - $title = $item['TITLE']; - } - elseif (!empty($item['DESCRIPTION'])) { - $title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item['DESCRIPTION'], 40)); - } - else { - $title = ''; - } - - // Resolve the items link. - if (!empty($item['LINK'])) { - $link = $item['LINK']; - } - else { - $link = $feed['link']; - } - $guid = isset($item['GUID']) ? $item['GUID'] : ''; - - // Atom feeds have a CONTENT and/or SUMMARY tag instead of a DESCRIPTION tag. - if (!empty($item['CONTENT:ENCODED'])) { - $item['DESCRIPTION'] = $item['CONTENT:ENCODED']; - } - elseif (!empty($item['SUMMARY'])) { - $item['DESCRIPTION'] = $item['SUMMARY']; - } - elseif (!empty($item['CONTENT'])) { - $item['DESCRIPTION'] = $item['CONTENT']; - } - - // Try to resolve and parse the item's publication date. - $date = ''; - foreach (array('PUBDATE', 'DC:DATE', 'DCTERMS:ISSUED', 'DCTERMS:CREATED', 'DCTERMS:MODIFIED', 'ISSUED', 'CREATED', 'MODIFIED', 'PUBLISHED', 'UPDATED') as $key) { - if (!empty($item[$key])) { - $date = $item[$key]; - break; + // Fetch the feed. + $fetcher = variable_get('aggregator_fetcher', 'aggregator'); + module_invoke($fetcher, 'aggregator_fetch', $feed); + + if ($feed->source_string !== FALSE) { + // Parse the feed. + $parser = variable_get('aggregator_parser', 'aggregator'); + module_invoke($parser, 'aggregator_parse', $feed); + + // If there are items on the feed, let all enabled processors do their work on it. + if (@count($feed->items)) { + $processors = variable_get('aggregator_processors', array('aggregator')); + foreach ($processors as $processor) { + module_invoke($processor, 'aggregator_process', $feed); } } - - $timestamp = strtotime($date); - - if ($timestamp === FALSE) { - $timestamp = aggregator_parse_w3cdtf($date); // Aggregator_parse_w3cdtf() returns FALSE on failure. - } - - // Save this item. Try to avoid duplicate entries as much as possible. If - // we find a duplicate entry, we resolve it and pass along its ID is such - // that we can update it if needed. - if (!empty($guid)) { - $entry = db_query("SELECT iid, timestamp FROM {aggregator_item} WHERE fid = :fid AND guid = :guid", array(':fid' => $feed['fid'], ':guid' => $guid))->fetchObject(); - } - elseif ($link && $link != $feed['link'] && $link != $feed['url']) { - $entry = db_query("SELECT iid, timestamp FROM {aggregator_item} WHERE fid = :fid AND link = :link", array(':fid' => $feed['fid'], ':link' => $link))->fetchObject(); - } - else { - $entry = db_query("SELECT iid, timestamp FROM {aggregator_item} WHERE fid = :fid AND title = :title", array(':fid' => $feed['fid'], ':title' => $title))->fetchObject(); - } - - if (!$timestamp) { - $timestamp = isset($entry->timestamp) ? $entry->timestamp : REQUEST_TIME; - } - $item += array('AUTHOR' => '', 'DESCRIPTION' => ''); - aggregator_save_item(array('iid' => (isset($entry->iid) ? $entry->iid : ''), 'fid' => $feed['fid'], 'timestamp' => $timestamp, 'title' => $title, 'link' => $link, 'author' => $item['AUTHOR'], 'description' => $item['DESCRIPTION'], 'guid' => $guid)); - } - - // Remove all items that are older than flush item timer. - $age = REQUEST_TIME - variable_get('aggregator_clear', 9676800); - $iids = db_query('SELECT iid FROM {aggregator_item} WHERE fid = :fid AND timestamp < :timestamp', array(':fid' => $feed['fid'], ':timestamp' => $age))->fetchCol(); - if ($iids) { - db_delete('aggregator_category_item') - ->condition('iid', $iids, 'IN') - ->execute(); - db_delete('aggregator_item') - ->condition('iid', $iids, 'IN') - ->execute(); - } - - return TRUE; -} - -/** - * Add/edit/delete an aggregator item. - * - * @param $edit - * An associative array describing the item to be added/edited/deleted. - */ -function aggregator_save_item($edit) { - if ($edit['title'] && empty($edit['iid'])) { - $edit['iid'] = db_insert('aggregator_item') - ->fields(array( - 'title' => $edit['title'], - 'link' => $edit['link'], - 'author' => $edit['author'], - 'description' => $edit['description'], - 'guid' => $edit['guid'], - 'timestamp' => $edit['timestamp'], - 'fid' => $edit['fid'], - )) - ->execute(); - } - if ($edit['iid'] && !$edit['title']) { - db_delete('aggregator_item') - ->condition('iid', $edit['iid']) - ->execute(); - db_delete('aggregator_category_item') - ->condition('iid', $edit['iid']) - ->execute(); - } - elseif ($edit['title'] && $edit['link']) { - // file the items in the categories indicated by the feed - $result = db_query('SELECT cid FROM {aggregator_category_feed} WHERE fid = :fid', array(':fid' => $edit['fid'])); - foreach ($result as $category) { - db_merge('aggregator_category_item') - ->fields(array( - 'cid' => $category->cid, - 'iid' => $edit['iid'], - )) - ->execute(); - } } + + // Expire old feed items. + aggregator_expire($feed); } /** @@ -956,7 +559,7 @@ function aggregator_feed_load($fid) { static $feeds; if (!isset($feeds[$fid])) { - $feeds[$fid] = db_query('SELECT * FROM {aggregator_feed} WHERE fid = :fid', array(':fid' => $fid))->fetchAssoc(); + $feeds[$fid] = db_query('SELECT * FROM {aggregator_feed} WHERE fid = :fid', array(':fid' => $fid))->fetchObject(); } return $feeds[$fid]; @@ -980,6 +583,26 @@ } /** + * Expire feed items on $feed that are older than aggregator_clear. + * + * @param $feed + * Array describing feed. + */ +function aggregator_expire($feed) { + // Remove all items that are older than flush item timer. + $age = REQUEST_TIME - variable_get('aggregator_clear', 9676800); + $iids = db_query('SELECT iid FROM {aggregator_item} WHERE fid = :fid AND timestamp < :timestamp', array(':fid' => $feed->fid, ':timestamp' => $age))->fetchCol(); + if ($iids) { + db_delete('aggregator_category_item') + ->condition('iid', $iids, 'IN') + ->execute(); + db_delete('aggregator_item') + ->condition('iid', $iids, 'IN') + ->execute(); + } +} + +/** * Format an individual feed item for display in the block. * * @param $item @@ -993,9 +616,8 @@ function theme_aggregator_block_item($item, $feed = 0) { // Display the external link to the item. - $output .= '' . check_plain($item->title) . "\n"; - - return $output; + return '' . check_plain($item->title) . "\n"; + } /** @@ -1011,6 +633,41 @@ } /** + * Check and sanitize aggregator configuration. + * + * Goes through all fetchers, parsers and processors and checks whether they are available. + * If one is missing resets to standard configuration. + * + * @return + * TRUE if this function reset the configuration FALSE if not. + */ +function aggregator_sanitize_configuration() { + $reset = FALSE; + $fetcher = variable_get('aggregator_fetcher', 'aggregator'); + if (!module_exists($fetcher)) { + $reset = TRUE; + } + $parser = variable_get('aggregator_parser', 'aggregator'); + if (!module_exists($parser)) { + $reset = TRUE; + } + $processors = variable_get('aggregator_processors', array('aggregator')); + foreach ($processors as $processor) { + if (!module_exists($processor)) { + $reset = TRUE; + break; + } + } + if ($reset) { + variable_del('aggregator_fetcher'); + variable_del('aggregator_parser'); + variable_del('aggregator_processors'); + return TRUE; + } + return FALSE; +} + +/** * Helper function for drupal_map_assoc. * * @param $count Index: modules/aggregator/aggregator.pages.inc =================================================================== RCS file: /cvs/drupal/drupal/modules/aggregator/aggregator.pages.inc,v retrieving revision 1.22 diff -u -r1.22 aggregator.pages.inc --- modules/aggregator/aggregator.pages.inc 20 Oct 2008 12:57:35 -0000 1.22 +++ modules/aggregator/aggregator.pages.inc 1 Dec 2008 19:03:07 -0000 @@ -35,8 +35,7 @@ function aggregator_page_source($arg1, $arg2 = NULL) { // If there are two arguments then this function is the categorize form, and // $arg1 is $form_state and $arg2 is $feed. Otherwise, $arg1 is $feed. - $feed = is_array($arg2) ? $arg2 : $arg1; - $feed = (object)$feed; + $feed = is_object($arg2) ? $arg2 : $arg1; drupal_set_title($feed->title); $feed_source = theme('aggregator_feed_source', $feed); Index: modules/aggregator/aggregator.parser.inc =================================================================== RCS file: modules/aggregator/aggregator.parser.inc diff -N modules/aggregator/aggregator.parser.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ modules/aggregator/aggregator.parser.inc 1 Dec 2008 19:03:07 -0000 @@ -0,0 +1,327 @@ + t('Default parser'), + 'description' => t('Parses RSS, Atom and RDF feeds.'), + ); +} + +/** + * Implementation of hook_aggregator_parse(). + */ +function aggregator_aggregator_parse($feed) { + global $channel, $image; + + // Filter the input data. + if (aggregator_parse_feed($feed->source_string, $feed)) { + $modified = empty($feed->http_headers['Last-Modified']) ? 0 : strtotime($feed->http_headers['Last-Modified']); + + // Prepare the channel data. + foreach ($channel as $key => $value) { + $channel[$key] = trim($value); + } + + // Prepare the image data (if any). + foreach ($image as $key => $value) { + $image[$key] = trim($value); + } + + if (!empty($image['LINK']) && !empty($image['URL']) && !empty($image['TITLE'])) { + $image = l(theme('image', $image['URL'], $image['TITLE']), $image['LINK'], array('html' => TRUE)); + } + else { + $image = ''; + } + + $etag = empty($feed->http_headers['ETag']) ? '' : $feed->http_headers['ETag']; + // Update the feed data. + db_merge('aggregator_feed') + ->key(array('fid' => $feed->fid)) + ->fields(array( + 'url' => $feed->url, + 'checked' => REQUEST_TIME, + 'link' => !empty($channel['LINK']) ? $channel['LINK'] : '', + 'description' => !empty($channel['DESCRIPTION']) ? $channel['DESCRIPTION'] : '', + 'image' => $image, + 'hash' => md5($feed->source_string), + 'etag' => $etag, + 'modified' => $modified, + )) + ->execute(); + + // Clear the cache. + cache_clear_all(); + + if (isset($feed->redirected)) { + watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed->title, '%url' => $feed->url)); + } + + watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed->title)); + drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed->title))); + + } +} + +/** + * Parse a feed and store its items. + * + * @param $data + * The feed data. + * @param $feed + * An object describing the feed to be parsed. + * @return + * FALSE on error, TRUE otherwise. + */ +function aggregator_parse_feed(&$data, $feed) { + global $items, $image, $channel; + + // Unset the global variables before we use them. + unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']); + $items = array(); + $image = array(); + $channel = array(); + + // Parse the data. + $xml_parser = drupal_xml_parser_create($data); + xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end'); + xml_set_character_data_handler($xml_parser, 'aggregator_element_data'); + + if (!xml_parse($xml_parser, $data, 1)) { + watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array('%site' => $feed->title, '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser)), WATCHDOG_WARNING); + drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array('%site' => $feed->title, '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error'); + return FALSE; + } + xml_parser_free($xml_parser); + + // We reverse the array such that we store the first item last, and the last + // item first. In the database, the newest item should be at the top. + $items = array_reverse($items); + + // Initialize items array. + $feed->items = array(); + foreach ($items as $item) { + + // Prepare the item: + foreach ($item as $key => $value) { + $item[$key] = trim($value); + } + + // Resolve the item's title. If no title is found, we use up to 40 + // characters of the description ending at a word boundary, but not + // splitting potential entities. + if (!empty($item['TITLE'])) { + $item['TITLE'] = $item['TITLE']; + } + elseif (!empty($item['DESCRIPTION'])) { + $item['TITLE'] = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item['DESCRIPTION'], 40)); + } + else { + $item['TITLE'] = ''; + } + + // Resolve the items link. + if (!empty($item['LINK'])) { + $item['LINK'] = $item['LINK']; + } + else { + $item['LINK'] = $feed->link; + } + $item['GUID'] = isset($item['GUID']) ? $item['GUID'] : ''; + + // Atom feeds have a CONTENT and/or SUMMARY tag instead of a DESCRIPTION tag. + if (!empty($item['CONTENT:ENCODED'])) { + $item['DESCRIPTION'] = $item['CONTENT:ENCODED']; + } + elseif (!empty($item['SUMMARY'])) { + $item['DESCRIPTION'] = $item['SUMMARY']; + } + elseif (!empty($item['CONTENT'])) { + $item['DESCRIPTION'] = $item['CONTENT']; + } + + // Try to resolve and parse the item's publication date. + $date = ''; + foreach (array('PUBDATE', 'DC:DATE', 'DCTERMS:ISSUED', 'DCTERMS:CREATED', 'DCTERMS:MODIFIED', 'ISSUED', 'CREATED', 'MODIFIED', 'PUBLISHED', 'UPDATED') as $key) { + if (!empty($item[$key])) { + $date = $item[$key]; + break; + } + } + + $item['TIMESTAMP'] = strtotime($date); + + if ($item['TIMESTAMP'] === FALSE) { + $item['TIMESTAMP'] = aggregator_parse_w3cdtf($date); // Aggregator_parse_w3cdtf() returns FALSE on failure. + } + + $item += array('AUTHOR' => '', 'DESCRIPTION' => ''); + + // Store on $feed object. This is where processors will look for parsed items. + $feed->items[] = $item; + } + + return TRUE; +} + +/** + * Callback function used by the XML parser. + */ +function aggregator_element_start($parser, $name, $attributes) { + global $item, $element, $tag, $items, $channel; + + switch ($name) { + case 'IMAGE': + case 'TEXTINPUT': + case 'CONTENT': + case 'SUMMARY': + case 'TAGLINE': + case 'SUBTITLE': + case 'LOGO': + case 'INFO': + $element = $name; + break; + case 'ID': + if ($element != 'ITEM') { + $element = $name; + } + case 'LINK': + if (!empty($attributes['REL']) && $attributes['REL'] == 'alternate') { + if ($element == 'ITEM') { + $items[$item]['LINK'] = $attributes['HREF']; + } + else { + $channel['LINK'] = $attributes['HREF']; + } + } + break; + case 'ITEM': + $element = $name; + $item += 1; + break; + case 'ENTRY': + $element = 'ITEM'; + $item += 1; + break; + } + + $tag = $name; +} + +/** + * Call-back function used by the XML parser. + */ +function aggregator_element_end($parser, $name) { + global $element; + + switch ($name) { + case 'IMAGE': + case 'TEXTINPUT': + case 'ITEM': + case 'ENTRY': + case 'CONTENT': + case 'INFO': + $element = ''; + break; + case 'ID': + if ($element == 'ID') { + $element = ''; + } + } +} + +/** + * Callback function used by the XML parser. + */ +function aggregator_element_data($parser, $data) { + global $channel, $element, $items, $item, $image, $tag; + $items += array($item => array()); + switch ($element) { + case 'ITEM': + $items[$item] += array($tag => ''); + $items[$item][$tag] .= $data; + break; + case 'IMAGE': + case 'LOGO': + $image += array($tag => ''); + $image[$tag] .= $data; + break; + case 'LINK': + if ($data) { + $items[$item] += array($tag => ''); + $items[$item][$tag] .= $data; + } + break; + case 'CONTENT': + $items[$item] += array('CONTENT' => ''); + $items[$item]['CONTENT'] .= $data; + break; + case 'SUMMARY': + $items[$item] += array('SUMMARY' => ''); + $items[$item]['SUMMARY'] .= $data; + break; + case 'TAGLINE': + case 'SUBTITLE': + $channel += array('DESCRIPTION' => ''); + $channel['DESCRIPTION'] .= $data; + break; + case 'INFO': + case 'ID': + case 'TEXTINPUT': + // The sub-element is not supported. However, we must recognize + // it or its contents will end up in the item array. + break; + default: + $channel += array($tag => ''); + $channel[$tag] .= $data; + } +} + +/** + * Parse the W3C date/time format, a subset of ISO 8601. + * + * PHP date parsing functions do not handle this format. + * See http://www.w3.org/TR/NOTE-datetime for more information. + * Originally from MagpieRSS (http://magpierss.sourceforge.net/). + * + * @param $date_str + * A string with a potentially W3C DTF date. + * @return + * A timestamp if parsed successfully or FALSE if not. + */ +function aggregator_parse_w3cdtf($date_str) { + if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) { + list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); + // Calculate the epoch for current date assuming GMT. + $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year); + if ($match[10] != 'Z') { // Z is zulu time, aka GMT + list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]); + // Zero out the variables. + if (!$tz_hour) { + $tz_hour = 0; + } + if (!$tz_min) { + $tz_min = 0; + } + $offset_secs = (($tz_hour * 60) + $tz_min) * 60; + // Is timezone ahead of GMT? If yes, subtract offset. + if ($tz_mod == '+') { + $offset_secs *= -1; + } + $epoch += $offset_secs; + } + return $epoch; + } + else { + return FALSE; + } +} Index: modules/aggregator/aggregator.processor.inc =================================================================== RCS file: modules/aggregator/aggregator.processor.inc diff -N modules/aggregator/aggregator.processor.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ modules/aggregator/aggregator.processor.inc 1 Dec 2008 19:03:07 -0000 @@ -0,0 +1,140 @@ + t('Default processor'), + 'description' => t('Creates lightweight records of feed items.'), + ); +} + +/** + * Implementation of hook_aggregator_process(). + */ +function aggregator_aggregator_process($feed) { + if (is_object($feed)) { + if (is_array($feed->items)) { + foreach ($feed->items as $item) { + // Save this item. Try to avoid duplicate entries as much as possible. If + // we find a duplicate entry, we resolve it and pass along its ID is such + // that we can update it if needed. + if (!empty($item['GUID'])) { + $entry = db_query("SELECT iid, timestamp FROM {aggregator_item} WHERE fid = :fid AND guid = :guid", array(':fid' => $feed->fid, ':guid' => $item['GUID']))->fetchObject(); + } + elseif ($item['LINK'] && $item['LINK'] != $feed->link && $item['LINK'] != $feed->url) { + $entry = db_query("SELECT iid, timestamp FROM {aggregator_item} WHERE fid = :fid AND link = :link", array(':fid' => $feed->fid, ':link' => $item['LINK']))->fetchObject(); + } + else { + $entry = db_query("SELECT iid, timestamp FROM {aggregator_item} WHERE fid = :fid AND title = :title", array(':fid' => $feed->fid, ':title' => $item['TITLE']))->fetchObject(); + } + if (!$item['TIMESTAMP']) { + $item['TIMESTAMP'] = isset($entry->timestamp) ? $entry->timestamp : REQUEST_TIME; + } + aggregator_save_item(array('iid' => (isset($entry->iid) ? $entry->iid : ''), 'fid' => $feed->fid, 'timestamp' => $item['TIMESTAMP'], 'title' => $item['TITLE'], 'link' => $item['LINK'], 'author' => $item['AUTHOR'], 'description' => $item['DESCRIPTION'], 'guid' => $item['GUID'])); + } + } + } +} + +/** + * Implementation of hook_form_aggregator_admin_form_alter(). + * + * Form alter aggregator module's own form to keep processor functionality + * separate from aggregator API functionality. + */ +function aggregator_form_aggregator_admin_form_alter(&$form, $form_state) { + if (in_array('aggregator', variable_get('aggregator_processors', array('aggregator')))) { + $info = module_invoke('aggregator', 'aggregator_process', 'info'); + $items = array(0 => t('none')) + drupal_map_assoc(array(3, 5, 10, 15, 20, 25), '_aggregator_items'); + $period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 4838400, 9676800), 'format_interval'); + + // Only wrap into a collapsible fieldset if there is a basic configuration. + if (isset($form['basic_conf'])) { + $form['modules']['aggregator'] = array( + '#type' => 'fieldset', + '#title' => t('Default processor settings'), + '#description' => $info['description'], + '#collapsible' => TRUE, + '#collapsed' => !in_array('aggregator', variable_get('aggregator_processors', array('aggregator'))), + ); + } + else { + $form['modules']['aggregator'] = array(); + } + + $form['modules']['aggregator']['aggregator_summary_items'] = array( + '#type' => 'select', + '#title' => t('Items shown in sources and categories pages') , + '#default_value' => variable_get('aggregator_summary_items', 3), + '#options' => $items, + '#description' => t('Number of feed items displayed in feed and category summary pages.'), + ); + + $form['modules']['aggregator']['aggregator_clear'] = array( + '#type' => 'select', + '#title' => t('Discard items older than'), + '#default_value' => variable_get('aggregator_clear', 9676800), + '#options' => $period, + '#description' => t('The length of time to retain feed items before discarding. (Requires a correctly configured cron maintenance task.)', array('@cron' => url('admin/reports/status'))), + ); + + $form['modules']['aggregator']['aggregator_category_selector'] = array( + '#type' => 'radios', + '#title' => t('Category selection type'), + '#default_value' => variable_get('aggregator_category_selector', 'checkboxes'), + '#options' => array('checkboxes' => t('checkboxes'), + 'select' => t('multiple selector')), + '#description' => t('The type of category selection widget displayed on categorization pages. (For a small number of categories, checkboxes are easier to use, while a multiple selector works well with large numbers of categories.)'), + ); + } +} + +/** + * Add/edit/delete an aggregator item. + * + * @param $edit + * An associative array describing the item to be added/edited/deleted. + */ +function aggregator_save_item($edit) { + if ($edit['title'] && empty($edit['iid'])) { + $edit['iid'] = db_insert('aggregator_item') + ->fields(array( + 'title' => $edit['title'], + 'link' => $edit['link'], + 'author' => $edit['author'], + 'description' => $edit['description'], + 'guid' => $edit['guid'], + 'timestamp' => $edit['timestamp'], + 'fid' => $edit['fid'], + )) + ->execute(); + } + if ($edit['iid'] && !$edit['title']) { + db_delete('aggregator_item') + ->condition('iid', $edit['iid']) + ->execute(); + db_delete('aggregator_category_item') + ->condition('iid', $edit['iid']) + ->execute(); + } + elseif ($edit['title'] && $edit['link']) { + // file the items in the categories indicated by the feed + $result = db_query('SELECT cid FROM {aggregator_category_feed} WHERE fid = :fid', array(':fid' => $edit['fid'])); + foreach ($result as $category) { + db_merge('aggregator_category_item') + ->fields(array( + 'cid' => $category->cid, + 'iid' => $edit['iid'], + )) + ->execute(); + } + } +}