--- link_checker.info 2008-04-16 14:19:17.000000000 +0200 +++ link_checker.info 2008-06-30 16:15:23.000000000 +0200 @@ -1,7 +1,7 @@ -; $Id: link_checker.info,v 1.1 2008/04/16 12:19:17 jredding Exp $ +; $Id$ name = Link Checker description = Checks the weblinks created by the link module for 404 errors -dependencies = content link +dependencies[] = content +dependencies[] = link package = CCK - - +core = 6.x --- link_checker.install 2008-04-16 14:19:17.000000000 +0200 +++ link_checker.install 2008-06-30 17:58:39.000000000 +0200 @@ -1,59 +1,75 @@ t('The base table for nodes.'), + 'fields' => array( + 'nid' => array( + 'description' => t('Nid of the node.'), + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + 'default' => 0), + 'vid' => array( + 'description' => t('Vid of the node.'), + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + 'default' => 0), + 'delta' => array( + 'description' => t('Delta of the node.'), + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + 'default' => 0), + 'last_checked' => array( + 'description' => t('Last time the link was checked.'), + 'type' => 'datetime', + 'not null' => TRUE), + 'status' => array( + 'description' => t('Status of the link.'), + 'type' => 'int', + 'size' => 'tiny', + 'not null' => TRUE, + 'default' => 0), + 'field_name' => array( + 'description' => t('Name of the cck field name.'), + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + 'default' => ''), + ), + 'indexes' => array( + 'checker_last_checked' => array('last_checked'), + 'checker_status' => array('status'), + ), + 'primary key' => array('nid', 'vid', 'delta', 'field_name'), + ); + return $schema; +} /** * Implementation of hook_install(). */ function link_checker_install() { - $created = FALSE; - switch ($GLOBALS['db_type']) { - case 'mysql': - case 'mysqli': - $query1 = db_query("CREATE TABLE IF NOT EXISTS {link_checker} (". - "vid INT(10) UNSIGNED NOT NULL DEFAULT 0,". - "nid INT(10) UNSIGNED NOT NULL DEFAULT 0,". - "delta INT(10) UNSIGNED NOT NULL DEFAULT 0,". - "last_checked DATETIME DEFAULT \"2000-01-01 00:00:00\",". - "status INT(4) NOT NULL DEFAULT 0,". - "field_name" varchar(254) NOT NULL, - "PRIMARY KEY (vid, nid, delta, field_name),". - "INDEX(last_checked),". - "INDEX(status)". - ") /*!40100 DEFAULT CHARACTER SET utf8 */"); - if($query1) { - $created = TRUE; - } - break; - - case 'pgsql': - $query1 = db_query("CREATE TABLE {link_checker} (". - "vid INT UNSIGNED NOT NULL DEFAULT 0,". - "nid INT UNSIGNED NOT NULL DEFAULT 0,". - "delta INT UNSIGNED NOT NULL DEFAULT 0,". - "last_checked DATETIME DEFAULT \"2000-01-01 00:00:00\",". - "status INT NOT NULL DEFAULT 0,". - "field_name varchar(254) NOT NULL", - "PRIMARY KEY (vid, nid, field_name, delta),". - "INDEX(last_checked),". - "INDEX(status)"); - if($query1) { - $created = TRUE; - } - break; - } // end switch - - // provide the user with some install feedback - if ($created) { - drupal_set_message(t('Link checker database tables installed')); - } - else { - drupal_set_message(t('Table install for link checker was unsuccessful.')); - } -} // end install function -/* -* Implementation of hook_uninstall -*/ + drupal_install_schema('link_checker'); +} + +/** + * Implementation of hook_uninstall(). + */ function link_checker_uninstall() { - db_query('DROP TABLE {link_checker}'); + drupal_uninstall_schema('link_checker'); db_query("DELETE FROM {variable} WHERE name like '%link_checker%'"); } --- link_checker.module 2008-04-16 14:19:17.000000000 +0200 +++ link_checker.module 2008-06-30 18:17:08.000000000 +0200 @@ -1,8 +1,15 @@ 'admin/settings/linkchecker', - 'callback' => 'drupal_get_form', - 'callback arguments' => array('link_checker_admin_settings'), - 'title' => t('Link checker'), - 'description' => t('Configure link checker'), - 'access' => user_access('administer link checker'), - 'type' => MENU_NORMAL_ITEM, - ); - } + + $items['admin/settings/linkchecker'] = array( + 'page callback' => 'drupal_get_form', + 'page arguments' => array('link_checker_admin_settings'), + 'title' => 'Link checker', + 'description' => 'Configure link checker', + 'access arguments' => array('administer link checker'), + 'type' => MENU_NORMAL_ITEM, + ); return $items; } /** * Implementation of hook_perm(). -**/ -function link_checker_perm(){ + */ +function link_checker_perm() { return array('administer link checker'); } /** - * Administrator settings + * Administrator settings * * @return Array for FAPI */ function link_checker_admin_settings() { - //Check to see if allow_url_fopen is available, if not throw an error - //so that the administrator knows that this will not work without it + // Check to see if allow_url_fopen is available, if not throw an error + // so that the administrator knows that this will not work without it. if (!ini_get('allow_url_fopen')) { - drupal_set_message(t('PHP allow_url_fopen is not enabled in order for this module to work it must be enabled! ')); + drupal_set_message(t('PHP allow_url_fopen is not enabled in order for this module to work it must be enabled! ')); } - //Create the settings form - $form = array(); - $form['link_checker_batch_quantity'] = array ( + // Create the settings form. + $form = array(); + $form['link_checker_batch_quantity'] = array( '#type' => 'select', '#title' => t('Max links to check per link field'), - '#description' => t('Determines the maximum number of links that will be checked for each link field per cron run. - A high number will cause cron to run slowly whereas a low number will require cron to be ran more often. + '#description' => t('Determines the maximum number of links that will be checked for each link field per cron run. + A high number will cause cron to run slowly whereas a low number will require cron to be ran more often. '), - '#options' => array(1=>1, 2=>2, 5=>5, 10=>10), + '#options' => array(1 => 1, 2 => 2, 5 => 5, 10 => 10), '#default_value' => variable_get('link_checker_batch_quantity', NULL), - ); + ); $form['link_checker_unpublish'] = array( - '#type' => 'checkbox', - '#title' => t('Unpublish nodes with 404 links?'), - '#description' => t('If set to yes and node is found to have one or more links that returns a 404 error - the node will be automatically unpublished'), - '#default_value' => variable_get('link_checker_unpublish', NULL), - ); - //check for which nodes have link fields + '#type' => 'checkbox', + '#title' => t('Unpublish nodes with 404 links?'), + '#description' => t('If set to yes and node is found to have one or more links that returns a 404 error the node will be automatically unpublished'), + '#default_value' => variable_get('link_checker_unpublish', NULL), + ); + // check for which nodes have link fields. foreach (node_get_types() as $type => $name) { $fields = content_fields(NULL, $type); foreach ($fields as $field) { - if ($field['type'] == 'link') { - //found a node type with a link field - $types[$type] = $name->name; - } - } - } + if ($field['type'] == 'link') { + // found a node type with a link field + $types[$type] = $name->name; + } + } + } $form['link_checker_node_types'] = array( - '#type' => 'select', - '#title' => t('Select the node types to check'), - '#description' => t('Unselected items will not be checked for 404 errors'), - '#options' => $types, - '#default_value' => variable_get('link_checker_node_types', NULL), - '#multiple' => true, - ); - - return system_settings_form($form); + '#type' => 'select', + '#title' => t('Select the node types to check'), + '#description' => t('Unselected items will not be checked for 404 errors'), + '#options' => $types, + '#default_value' => variable_get('link_checker_node_types', array()), + '#multiple' => true, + ); + + return system_settings_form($form); } -/* - * Implementation of hook_cron -*/ -function link_checker_cron(){ - //only search the node types the user wants to search through - $node_types = variable_get('link_checker_node_types', NULL); +/** + * Implementation of hook_cron(). + */ +function link_checker_cron() { + // Only search the node types the user wants to search through. + $node_types = variable_get('link_checker_node_types', array()); foreach ($node_types as $node) { - $fields = content_fields(NULL, $type); + $fields = content_fields(NULL, $node); foreach ($fields as $field) { - if ($field['type'] == 'link') { - //We're at the link field type if its multiple we'll query the link table directly - //otherwise we'll query the node type table directly + if ($field['type'] == 'link') { + // We're at the link field type. If its multiple we'll query the link table directly, + // otherwise we'll query the node type table directly. if ($field['multiple'] == 1) { $table_name = "content_". $field['field_name']; - $field_name = $field['field_name']. "_url"; - $max = variable_get('link_checker_batch_quantity', 10); - $sql = "SELECT c.nid, c.vid, c.delta, c.$field_name as link - FROM {$table_name} c - LEFT JOIN {link_checker} lc on c.nid = lc.nid && c.vid = lc.vid && c.delta = lc.delta + $field_name = $field['field_name'] ."_url"; + $max = variable_get('link_checker_batch_quantity', 10); + $sql = "SELECT c.nid, c.vid, c.delta, c.$field_name as link + FROM {$table_name} c + LEFT JOIN {link_checker} lc on c.nid = lc.nid && c.vid = lc.vid && c.delta = lc.delta WHERE c.$field_name != '' ORDER BY lc.last_checked ASC LIMIT %d "; - $links = db_query($sql, $max); - while ($link = db_fetch_object($links)) { - //send over to _link_checker_check_status to update the status in the DB - $link->field_name = $field_name; - _link_checker_check_status($link); - } - } - else { - //It is a field on the node type table + $links = db_query($sql, $max); + while ($link = db_fetch_object($links)) { + // Send over to _link_checker_check_status to update the status in the DB. + $link->field_name = $field_name; + _link_checker_check_status($link); + } + } + else { + // It is a field on the node type table. $table_name = "content_type_". $node; - $field_name = $field['field_name']. "_url"; - $max = variable_get('link_checker_batch_quantity', NULL); - $sql = "SELECT c.nid, c.vid, c.$field_name as link - FROM {$table_name} c + $field_name = $field['field_name'] ."_url"; + $max = variable_get('link_checker_batch_quantity', NULL); + $sql = "SELECT c.nid, c.vid, c.$field_name as link + FROM {$table_name} c LEFT JOIN {link_checker} lc on c.nid = lc.nid && c.vid = lc.vid && c.$field_name = lc.field_name WHERE c.$field_name != '' ORDER BY lc.last_checked ASC LIMIT %d "; - $links = db_query($sql, $max); - while ($link = db_fetch_object($links)) { - //send over to _link_checker_check_status to update the status in the DB - $link->field_name = $field_name; - _link_checker_check_status($link); - } - } - } //if field['type'] - } //foreach fields as field - } //foreach node_types as node - + $links = db_query($sql, $max); + while ($link = db_fetch_object($links)) { + // Send over to _link_checker_check_status to update the status in the DB. + $link->field_name = $field_name; + $link->delta = 0; + _link_checker_check_status($link); + } + } + } // if field['type'] + } // foreach fields as field + } // foreach node_types as node } -/* +/** * Checks the link for a status -*/ + */ function _link_checker_check_status($link) { static $message_once = TRUE; - // check php var "allow_url_fopen" is true as we need it to fetch the URL + // Check php var "allow_url_fopen" is true as we need it to fetch the URL. if (!ini_get('allow_url_fopen')) { if ($message_once) { watchdog('cron', 'PHP INI "allow_url_fopen" is false', WATCHDOG_NOTICE); @@ -154,45 +158,45 @@ } } - //Code originally written in the janode project at http://drupal.org/project/janode, thanks! + // Code originally written in the janode project at http://drupal.org/project/janode, thanks! - // There are a number of HTTP status return codes. However, below 300 + // There are a number of HTTP status return codes. However, below 300 // usually means all went ok. We use 250 series for our own internal // error messaging. Our errors are non-fatal however, so are less than // 300. Status codes above 299 are considered errors and we force the // node back into the moderation queue for admin attention. - $status = 0; // provide a default value to ensure var exists + $status = 0; // Provide a default value to ensure var exists. - // create a full URL + // Create a full URL. $url_parts = parse_url($link->link); - if (isset($url_parts['port']) && strlen($url_parts['port']) > 0) { - $url_parts['host'] .= ':' . $url_parts['port']; + if (isset($url_parts['port']) && drupal_strlen($url_parts['port']) > 0) { + $url_parts['host'] .= ':'. $url_parts['port']; } $url_parts['path'] = (isset($url_parts['path'])) ? $url_parts['path'] : (''); - $full_url = $url_parts['scheme'] . "://" . $url_parts['host'] . $url_parts['path']; - if (isset($url_parts['query']) && strlen($url_parts['query']) > 0) { - $full_url .= "?" . urlencode($url_parts['query']); + $full_url = $url_parts['scheme'] ."://". $url_parts['host'] . $url_parts['path']; + if (isset($url_parts['query']) && drupal_strlen($url_parts['query']) > 0) { + $full_url .= "?". urlencode($url_parts['query']); } - - // currently only support http + + // Currently only support http. if ($url_parts['scheme'] != 'http') { $status = _LINK_CHECKER_NOT_HTTP; } else { - if(!function_exists('stream_get_meta_data')) { // needed next - $status = _LINK_CHECKER_NO_MATA_DATA; + if (!function_exists('stream_get_meta_data')) { // needed next + $status = _LINK_CHECKER_NO_META_DATA; } - elseif(!($fp = @fopen($full_url, 'r'))) { + elseif (!($fp = @fopen($full_url, 'r'))) { $status = _LINK_CHECKER_FILE_OPEN_FAILURE; - unset($_SESSION['messages']['error']); // rough but gets rid of hostname errors, @fopen didn't work + unset($_SESSION['messages']['error']); // Rough but gets rid of hostname errors, @fopen didn't work. } else { $meta_data = @stream_get_meta_data($fp); fclose($fp); if (is_array($meta_data['wrapper_data'])) { - foreach($meta_data['wrapper_data'] as $v) { - if (strtolower(substr($v, 0, 4)) == 'http') { // look for a server header starting "http" + foreach ($meta_data['wrapper_data'] as $v) { + if (drupal_strtolower(drupal_substr($v, 0, 4)) == 'http') { // look for a server header starting "http" list($protcol, $status, $verbal) = explode(' ', $v); // and if found, assign to $status break; } @@ -205,30 +209,32 @@ restore_error_handler(); // tell the db what we have discovered... - - //First we need to check if there is an existing row - $sql = "SELECT lc.nid, lc.delta, lc.vid + + // First we need to check if there is an existing row. + $sql = "SELECT COUNT(*) FROM {link_checker} lc WHERE lc.nid = %d && lc.vid = %d && lc.delta = %d && lc.field_name = '%s' LIMIT 1"; - - if (db_num_rows(db_query($sql, $link->nid, $link->vid, $link->delta, $link->field_name)) > 0) { - //row exist so we'll update it. - $sql = "UPDATE {link_checker} lc + + if (db_result(db_query($sql, $link->nid, $link->vid, $link->delta, $link->field_name)) > 0) { + // row exist so we'll update it + $sql = "UPDATE {link_checker} lc SET status = %s, last_checked = NOW() WHERE lc.nid = %d && lc.vid = %d && lc.delta = %d && lc.field_name = '%s' "; - db_query($sql,$status, $link->nid, $link->vid, $link->delta, $link->field_name); + db_query($sql, $status, $link->nid, $link->vid, $link->delta, $link->field_name); } else { - //row doesn't exist so we'll add it in - $sql = "INSERT INTO {link_checker} (nid, vid, delta, last_checked, status, field_name) + // row doesn't exist so we'll add it in + $sql = "INSERT INTO {link_checker} (nid, vid, delta, last_checked, status, field_name) VALUES (%d, %d, %d, NOW(), '%s', '%s') "; - db_query($sql, $link->nid, $link->vid, $link->delta, $status, $link->field_name); + db_query($sql, $link->nid, $link->vid, $link->delta, $status, $link->field_name); } - // unpublish and force the node into the moderation queue + // Unpublish and force the node into the moderation queue. if ((int)$status > _LINK_CHECKER_FAILURE_PIVOT && variable_get('link_checker_unpublish', NULL)) { - //The user would like all errors above 300 unpublished.. ok we'll do that. - db_query("UPDATE {node} SET status = 0, moderate = 1 WHERE nid = %d", $link->nid); + // The user would like all errors above 300 unpublished.. ok we'll do that. + $link->status = 0; + $link->moderate = 1; + node_save($link); // Use node_save so cache is cleared if needed. } }