diff -uprN /home/files/coding/drupal/spam/filters/bayesian/bayesian.info sites/all/modules/spam/filters/bayesian/bayesian.info --- /home/files/coding/drupal/spam/filters/bayesian/bayesian.info 2009-09-01 10:28:00.000000000 +1000 +++ sites/all/modules/spam/filters/bayesian/bayesian.info 1970-01-01 10:00:00.000000000 +1000 @@ -1,12 +0,0 @@ -; $Id: bayesian.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $ -name = Bayesian filter -description = A bayesian filter. -package = Spam -dependencies[] = spam -core = 6.x -; Information added by drupal.org packaging script on 2009-09-01 -version = "6.x-1.x-dev" -core = "6.x" -project = "spam" -datestamp = "1251764880" - diff -uprN /home/files/coding/drupal/spam/filters/bayesian/bayesian.install sites/all/modules/spam/filters/bayesian/bayesian.install --- /home/files/coding/drupal/spam/filters/bayesian/bayesian.install 2008-12-25 16:42:10.000000000 +1100 +++ sites/all/modules/spam/filters/bayesian/bayesian.install 1970-01-01 10:00:00.000000000 +1000 @@ -1,105 +0,0 @@ -. All rights reserved. - * - * Provides a generic Bayesian filter for use with other modules. - * Defines hooks for use with the Spam API. - */ - -/** - * Implementation of hook_schema(). - */ -function bayesian_schema() { - $schema['bayesian_tokens'] = array( - 'description' => t('TODO'), - 'fields' => array( - 'tid' => array( - 'description' => t('TODO'), - 'type' => 'serial', - 'unsigned' => 1, - 'not null' => TRUE, - ), - 'class' => array( - 'description' => t('TODO'), - 'type' => 'varchar', - 'length' => 32, - 'not null' => TRUE, - 'default' => '', - ), - 'token' => array( - 'description' => t('TODO'), - 'type' => 'varchar', - 'length' => 255, - 'not null' => TRUE, - 'default' => '', - ), - 'yes_count' => array( - 'description' => t('TODO'), - 'type' => 'int', - 'unsigned' => 1, - 'not null' => TRUE, - 'default' => 0, - ), - 'no_count' => array( - 'description' => t('TODO'), - 'type' => 'int', - 'unsigned' => 1, - 'not null' => TRUE, - 'default' => 0, - ), - 'probability' => array( - 'description' => t('TODO'), - 'type' => 'int', - 'unsigned' => 1, - 'not null' => TRUE, - 'default' => 0, - ), - 'last' => array( - 'description' => t('TODO'), - 'type' => 'int', - 'unsigned' => 1, - 'not null' => TRUE, - 'default' => 0, - ), - ), - 'indexes' => array( - 'yes_count' => array('yes_count'), - 'no_count' => array('no_count'), - 'probability' => array('probability'), - 'last' => array('last'), - ), - 'unique keys' => array( - 'token' => array('class', 'token'), - ), - 'primary key' => array('tid'), - ); - - return $schema; -} - -/** - * Install bayesian module schema. - */ -function bayesian_install() { - // Create tables. - drupal_install_schema('bayesian'); -} - -/** - * Completely uninstall the spam module. - */ -function bayesian_uninstall() { - // Remove tables. - drupal_uninstall_schema('bayesian'); - - $tables = array('bayesian_tokens'); - foreach ($tables as $table) { - } - drupal_set_message(t('All bayesian module configuration data and tables have been deleted.')); -} - diff -uprN /home/files/coding/drupal/spam/filters/bayesian/bayesian.module sites/all/modules/spam/filters/bayesian/bayesian.module --- /home/files/coding/drupal/spam/filters/bayesian/bayesian.module 2008-12-25 16:42:10.000000000 +1100 +++ sites/all/modules/spam/filters/bayesian/bayesian.module 1970-01-01 10:00:00.000000000 +1000 @@ -1,189 +0,0 @@ -. All rights reserved. - * - * Provides a generic Bayesian filter for use with other modules. - * Defines hooks for use with the Spam API. - */ - -/** - * Spam API Hook - */ -function bayesian_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { - switch ($op) { - - case 'filter': - if (!module_invoke('spam', 'filter_enabled', 'bayesian', $type, $content, $fields, $extra)) return; - return bayesian_spam_filter($content, $type, $fields, $extra); - - case 'filter_module': - return 'bayesian'; - break; - - case 'filter_info': - return array( - 'name' => t('Bayesian filter'), - 'module' => t('bayesian'), - 'description' => t('A bayesian spam filter.'), - 'help' => t('The bayesian filter can learn to tell the difference between valid content spam content.'), - ); - break; - - case 'filter_install': - return array( - 'status' => SPAM_FILTER_ENABLED, - ); - - case 'mark_as_spam': - case 'mark_as_not_spam': - if (!module_invoke('spam', 'filter_enabled', 'bayesian', $type, $content, $fields, $extra)) return; - spam_log(SPAM_DEBUG, 'bayesian_spamapi', t('@op', array('@op' => $op)), $type, $extra['id']); - $fields = spam_invoke_module($type, 'filter_fields', $extra['content']); - $tokenizer = variable_get('bayesian_tokenizer', 'bayesian_tokenize'); - $tokens = $tokenizer($extra['content'], $type, $fields, $extra); - bayesian_tokens_update('spam', $tokens, ($op == 'mark_as_spam' ? TRUE : FALSE), $type, $extra['id']); - break; - } -} - -/** - * Determine whether or not the content is spam. - */ -function bayesian_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { - $class = 'spam'; - $id = spam_invoke_module($type, 'content_id', $content, $extra); - $tokenizer = variable_get('bayesian_tokenizer', 'bayesian_tokenize'); - $tokens = $tokenizer($content, $type, $fields, $extra); - if (is_array($tokens)) { - foreach ($tokens as $token) { - $p = db_fetch_object(db_query("SELECT probability FROM {bayesian_tokens} WHERE class = '%s' AND token = '%s'", $class, $token)); - if (!$p-> 'help' => t('The bayesian filtability = variable_get('bayesian_default_probability', 40); - } - $t["$token,$p->probak; - - case 'filter_install': 50); - } - } - else { - // No tokens, return default score. - $action['total'] = variable_get('bayesian_default_probability', 40); - return $action; - } - - /* Sort token array so those tokens with the largest "drift" come first. - * Drift is this distance from a median of 50%. - */ - asort($t); - - /* Take the n most "interesting" tokens from the top of the token array. - * The larger a token's drift, the more interesting it is. - */ - $keys = array_keys($t); - $max = variable_get('bayesian_interesting_tokens', 15); - $total = 0; - for ($i = 0; $i < $max; $i++) { - if ($pair = array_pop($keys)) { - $p = explode(',', $pair); - $total = $total + $p[1]; - $action['bayesian'][$i] = array( - 'token' => $p[0], - 'probability' => $p[1], - ); - spam_log(SPAM_DEBUG, 'bayesian_spam_filter', t('interesting token [@count] (@token) probability(@probability)', array('@token' => $p[0], '@probability' => $p[1], '@count' => $i + 1)), $type, $id); - } - else { - // we've looked at all the tokens - break; - } - } - - $probability = round($total / $i, 1); - spam_log(SPAM_VERBOSE, 'bayesian_spam_filter', t('total(@total) count(@count) probability(@probability)', array('@probability' => $probability, '@total' => $total, '@count' => $i)), $type, $id); - - $action['total'] = $probability; - return $action; -} - -/** - * Update token probabilities in database. - */ -function bayesian_tokens_update($class, $tokens, $yes, $type = NULL, $id = 0) { - if (!is_array($tokens) || empty($tokens)) return; - foreach ($tokens as $token) { - $old = db_fetch_object(db_query("SELECT probability, yes_count, no_count FROM {bayesian_tokens} WHERE class = '%s' AND token = '%s'", $class, $token)); - if ($old->probability) { - $total = $old->yes_count + $old->no_count + 1; - $probability = spam_sanitize_score(($old->yes_count + ($yes ? 1 : 0)) / $total * 100); - spam_log(SPAM_DEBUG, 'bayesian_tokens_update', t('update token(@token) class(@class) yes(@yes) no(@no) prob(@prob): added @new', array('@token' => $token, '@class' => $class, '@yes' => $old->yes_count + ($yes ? 1 : 0), '@no' => $old->no_count + ($yes ? 0 : 1), '@prob' => $probability, '@new' => $yes ? 'yes' : 'no')), $type, $id); - if ($yes) { - db_query("UPDATE {bayesian_tokens} SET yes_count = yes_count + 1, probability = %d, last = %d WHERE class = '%s' AND token = '%s'", $probability, time(), $class, $token); - } - else { - db_query("UPDATE {bayesian_tokens} SET no_count = no_count + 1, probability = %d, last = %d WHERE class = '%s' AND token = '%s'", $probability, time(), $class, $token); - } - } - else { - $probability = ($yes ? 99 : 1); - spam_log(SPAM_DEBUG, 'bayesian_tokens_update', t('insert token(@token) class(@class) probability(@probability)', array('@token' => $token, '@class' => $class, '@probability' => $probability)), $type, $id); - db_query("INSERT INTO {bayesian_tokens} (class, token, yes_count, no_count, probability, last) VALUES('%s', '%s', %d, %d, %d, %d)", $class, $token, ($yes ? 1 : 0), ($yes ? 0 : 1), $probability, time()); - } - } -} - -/** - * Split content into an array of tokens. - */ -function bayesian_tokenize($content, $type, $fields, $extra = array(), $tag = NULL) { - static $tokens = array(); - - $id = spam_invoke_module($type, 'content_id', $content, $extra); - - if (is_object($content)) { - $content = (array)$content; - } - - if (!isset($tokens["$type-$id-$tag"])) { - $string = spam_get_text($content, $type, $fields, $extra); - - $URI = "(http://|https://|ftp://|mailto:)"; - // strip out unwanted html/url noise - $sanitized = preg_replace("'(www\.)|()|(href=)|(target=)|(src=)'i", '', $string); - $sanitized = preg_replace($URI, '', $sanitized); - - // Strip out values that should not be considered part of tokens, so - // things like '{viagra}' and 'vi.agra' are counted as hits towards - // 'viagra' - $sanitized = preg_replace("/[()\{\}\[\]#.,]/", '', $sanitized); - - // Force all tokens to lowercase, again to aggregate tokens. This both - // lowers the total token number of rows in the spam_tokens table and - // increases the strength of individual tokens by linking them to - // capitalized versions. - $sanitized = drupal_strtolower($sanitized); - - // divide sanitized string into tokens - $delimiters = " \t\n\r-_<>'\"`/|*%^&+=~:;?"; - $tok = strtok($sanitized, $delimiters); - while ($tok !== FALSE) { - // Only inspect the token if over minimum length. - if (drupal_strlen($tok) >= variable_get('bayesian_minimum_token_length', 3)) { - // If the token is longer than 255 characters, truncate it. - $toks[] = htmlspecialchars(drupal_substr("$tag$tok", 0, 254)); - } - $tok = strtok($delimiters); - } - - // allow external module ability to extract additional tokens - $hook = spam_invoke_api('tokenize', $string, $tag); - if ($hook['tokens']) { - $toks = array_merge($toks, $hook['tokens']); - } - $tokens["$type-$id-$tag"] = $toks; - } - - return $tokens["$type-$id-$tag"]; -} diff -uprN /home/files/coding/drupal/spam/filters/custom/custom.info sites/all/modules/spam/filters/custom/custom.info --- /home/files/coding/drupal/spam/filters/custom/custom.info 2009-09-01 10:28:00.000000000 +1000 +++ sites/all/modules/spam/filters/custom/custom.info 1970-01-01 10:00:00.000000000 +1000 @@ -1,12 +0,0 @@ -; $Id: custom.info,v 1.1.2.2.2.1 2008/12/25 05:42:10 jeremy Exp $ -name = Custom filter -description = Allows the creation of custom spam filter rules. -package = Spam -dependencies[] = spam -core = 6.x -; Information added by drupal.org packaging script on 2009-09-01 -version = "6.x-1.x-dev" -core = "6.x" -project = "spam" -datestamp = "1251764880" - diff -uprN /home/files/coding/drupal/spam/filters/custom/custom.install sites/all/modules/spam/filters/custom/custom.install --- /home/files/coding/drupal/spam/filters/custom/custom.install 2008-12-25 16:42:10.000000000 +1100 +++ sites/all/modules/spam/filters/custom/custom.install 1970-01-01 10:00:00.000000000 +1000 @@ -1,43 +0,0 @@ - array( - 'cid' => array('type' => 'serial', 'unsigned' => TRUE, 'not null' => TRUE, 'disp-width' => '11'), - 'filter' => array('type' => 'varchar', 'length' => '255', 'not null' => TRUE, 'default' => ''), - 'style' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), - 'status' => array('type' => 'int', 'not null' => FALSE, 'default' => 0, 'disp-width' => '2'), - 'scan' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), - 'action' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), - 'matches' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'), - 'last' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'), - 'weight' => array('type' => 'int', 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), - ), - 'primary key' => array('cid'), - 'indexes' => array( - 'filter' => array('filter'), - 'last' => array('last'), - 'matches' => array('matches'), - 'weight' => array('weight'), - ), - ); - - return $schema; -} - -function custom_install() { - // Create my tables. - drupal_install_schema('custom'); -} - -function custom_uninstall() { - // Remove tables. - drupal_uninstall_schema('custom'); - drupal_set_message('The spam_custom table has been dropped.'); -} diff -uprN /home/files/coding/drupal/spam/filters/custom/custom.module sites/all/modules/spam/filters/custom/custom.module --- /home/files/coding/drupal/spam/filters/custom/custom.module 2009-08-06 00:25:13.000000000 +1000 +++ sites/all/modules/spam/filters/custom/custom.module 1970-01-01 10:00:00.000000000 +1000 @@ -1,584 +0,0 @@ -. - * - * Allows manual definition of words and regular expressions to detect spam - * content. - */ - -define(SPAM_CUSTOM_STYLE_PLAIN, 0); -define(SPAM_CUSTOM_STYLE_REGEX, 1); - -define(SPAM_CUSTOM_STATUS_NOTSPAM, -2); -define(SPAM_CUSTOM_STATUS_PROBABLYNOT, -1); -define(SPAM_CUSTOM_STATUS_DISABLED, 0); -define(SPAM_CUSTOM_STATUS_PROBABLY, 1); -define(SPAM_CUSTOM_STATUS_SPAM, 2); - -define(SPAM_CUSTOM_SCAN_CONTENT, 0x1); -define(SPAM_CUSTOM_SCAN_REFERRER, 0x4); -define(SPAM_CUSTOM_SCAN_USERAGENT, 0x8); - -// TODO: support actions -//define(SPAM_CUSTOM_ACTION_DELETE, 0x1); -//define(SPAM_CUSTOM_ACTION_MAIL, 0x2); - -/** - * Spam API Hook - */ -function custom_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { - switch ($op) { - case 'filter': - if (!module_invoke('spam', 'filter_enabled', 'custom', $type, $content, $fields, $extra)) return; - return custom_spam_filter($content, $type, $fields, $extra); - - case 'filter_module': - return 'custom'; - - case 'filter_info': - return array( - 'name' => t('Custom filter'), - 'module' => t('custom'), - 'description' => t('Custom spam filters.'), - 'help' => t('The custom spam filter module allows you to manually define custom spam filter rules.'), - ); - - case 'filter_install': - return array( - 'status' => SPAM_FILTER_ENABLED, - 'gain' => 250, - 'weight' => -4, - ); - } -} - -/** - * Drupal _menu() hook. - */ -function custom_menu() { - $items = array(); - - $items['admin/settings/spam/filters/custom'] = array( - 'title' => 'Custom', - 'page callback' => 'drupal_get_form', - 'page arguments' => array('custom_admin_settings'), - 'access arguments' => array('administer spam'), - 'description' => 'Configure the custom spam filter module.', - 'type' => MENU_LOCAL_TASK, - ); - - $items['admin/settings/spam/filters/custom/list'] = array( - 'title' => 'List', - 'page callback' => 'drupal_get_form', - 'page arguments' => array('custom_admin_settings'), - 'access arguments' => array('administer spam'), - 'description' => 'Configure the custom spam filter module.', - 'type' => MENU_DEFAULT_LOCAL_TASK, - ); - $items['admin/settings/spam/filters/custom/create'] = array( - 'title' => 'Create', - 'page callback' => 'drupal_get_form', - 'page arguments' => array('custom_admin_filter'), - 'access arguments' => array('administer spam'), - 'description' => 'Create a custom spam filter.', - 'type' => MENU_LOCAL_TASK, - ); - $items["admin/settings/spam/filters/custom/%/edit"] = array( - 'title' => 'Create', - 'page callback' => 'drupal_get_form', - 'page arguments' => array('custom_admin_filter', 5), - 'access arguments' => array('administer spam'), - 'description' => 'Edit a custom spam filter.', - 'type' => MENU_LOCAL_TASK, - ); - - return $items; -} - -/** - * Adminsitrative interface for configuring custom spam filter rules. - */ -function custom_admin_settings() { - $form = array(); - - $form['options'] = array( - '#type' => 'fieldset', - '#title' => t('Options'), - '#prefix' => '
plain text. If you would like to define a regular expression, your filter must be formatted as a Perl-compatible regular expression.'),
- '#options' => array(SPAM_CUSTOM_STYLE_PLAIN => t('Plain text'), SPAM_CUSTOM_STYLE_REGEX => t('Regular expression')),
- '#default_value' => $custom->cid ? $custom->style : SPAM_CUSTOM_STYLE_PLAIN,
- '#required' => TRUE,
- );
- $options = array(SPAM_CUSTOM_SCAN_CONTENT => ('Content'), SPAM_CUSTOM_SCAN_REFERRER => t('Referrer'), SPAM_CUSTOM_SCAN_USERAGENT => t('User agent'));
- $scan = array();
- if ($custom->scan & SPAM_CUSTOM_SCAN_CONTENT) {
- $scan[] = SPAM_CUSTOM_SCAN_CONTENT;
- }
- if ($custom->scan & SPAM_CUSTOM_SCAN_REFERRER) {
- $scan[] = SPAM_CUSTOM_SCAN_REFERRER;
- }
- if ($custom->scan & SPAM_CUSTOM_SCAN_USERAGENT) {
- $scan[] = SPAM_CUSTOM_SCAN_USERAGENT;
- }
- $form['scan'] = array(
- '#type' => 'checkboxes',
- '#title' => t('Scan'),
- '#description' => t('Specify where you\'d like to apply your custom filter.'),
- '#options' => $options,
- '#required' => TRUE,
- '#default_value' => !empty($scan) ? $scan : array(SPAM_CUSTOM_SCAN_CONTENT),
- );
- $options = array();
- $form['status'] = array(
- '#type' => 'radios',
- '#title' => t('Status'),
- '#description' => t('Select the status to apply when your custom filter matches site content. Filters are tested in the order they are displayed above, thus if content matches a filter that says to mark it as spam, and another to mark it as not spam, the first to match will be the actual status applied.'),
- '#options' => array(
- SPAM_CUSTOM_STATUS_DISABLED => t('Disabled'),
- SPAM_CUSTOM_STATUS_SPAM => t('Mark as spam'),
- SPAM_CUSTOM_STATUS_PROBABLY => t('Mark as probably spam'),
- SPAM_CUSTOM_STATUS_PROBABLYNOT => t('Mark as probably not spam'),
- SPAM_CUSTOM_STATUS_NOTSPAM => t('Mark as not spam')),
- '#default_value' => $custom->cid ? $custom->status : SPAM_CUSTOM_STATUS_SPAM,
- '#required' => TRUE,
- );
- $form['weight'] = array(
- '#type' => 'weight',
- '#title' => t('Weight'),
- '#description' => t('Give your custom filter a weight. "Lighter" filters with smaller weights will run before "heavier" filters with larger weights.'),
- '#default_value' => $custom->weight,
- );
-
- $form['submit'] = array(
- '#type' => 'submit',
- '#value' => $custom->cid ? t('Update filter') : t('Create filter'),
- );
-
- if ($custom->cid) {
- $form['cid'] = array(
- '#type' => 'hidden',
- '#value' => $custom->cid,
- );
- }
-
- return $form;
-}
-
-/**
- * Be sure that the custom filter is valid.
- */
-function custom_admin_filter_validate($form, &$form_state) {
- if ($form_state['values']['style'] == SPAM_CUSTOM_STYLE_REGEX) {
- if (preg_match($form_state['values']['filter'], 'test') === FALSE) {
- form_set_error('filter', t('Failed to validate your filter\'s regular expression. It must be properly formatted as a Perl-compatible regular expression. Review the above error for details on the specific problem with your expression.'));
- }
- }
- if (isset($form_state['values']['cid'])) {
- // update
- $cid = db_result(db_query("SELECT cid FROM {spam_custom} WHERE filter = '%s' AND cid <> %d", $form_state['values']['filter'], $form_state['values']['cid']));
- if ($cid) {
- form_set_error($cid, t('Custom filter %filter already exists', array('%filter' => $form_state['values']['filter'])));
- }
- }
- else {
- // create
- $cid = db_result(db_query("SELECT cid FROM {spam_custom} WHERE filter = '%s'", $form_state['values']['filter']));
- if ($cid) {
- form_set_error($cid, t('Custom filter %filter already exists', array('%filter' => $form_state['values']['filter'])));
- }
- }
-}
-
-/**
- * Create/update custom filer.
- */
-function custom_admin_filter_submit($form, &$form_state) {
- $scan = 0;
- if (is_array($form_state['values']['scan'])) {
- foreach ($form_state['values']['scan'] as $s) {
- $scan += $s;
- }
- }
- if (isset($form_state['values']['cid'])) {
- db_query("UPDATE {spam_custom} SET filter = '%s', style = %d, status = %d, scan = %d, weight = %d WHERE cid = %d", $form_state['values']['filter'], $form_state['values']['style'], $form_state['values']['status'], $scan, $form_state['values']['weight'], $form_state['values']['cid']);
- drupal_set_message(t('Custom filter %filter updated.', array('%filter' => $form_state['values']['filter'])));
- }
- else {
- db_query("INSERT INTO {spam_custom} (filter, style, status, scan, weight) VALUES ('%s', %d, %d, %d, %d)", $form_state['values']['filter'], $form_state['values']['style'], $form_state['values']['status'], $scan, $form_state['values']['weight']);
- drupal_set_message(t('Custom filter %filter created.', array('%filter' => $form_state['values']['filter'])));
- }
- drupal_goto('admin/settings/spam/filters/custom');
-}
-
-/**
- * Perform bulk operations on the filters.
- */
-function custom_admin_settings_submit($form, &$form_state) {
- if (is_array($form_state['values']['custom'])) {
- foreach ($form_state['values']['custom'] as $cid => $selected) {
- if ($selected) {
- $process[] = $cid;
- }
- }
- }
- if (!empty($process)) {
- foreach (module_invoke_all('spam_custom_operations') as $operation => $op) {
- $options[$operation] = $op;
- }
- $operation = $form_state['values']['operation'];
- if (isset($options[$operation])) {
- $function = $options[$operation]['callback'];
- $arguments = $options[$operation]['callback arguments'];
- //TODO: Why is order different than spam.module, and why 1 at a time?
- foreach ($process as $cid) {
- call_user_func_array($function, array_merge($arguments, array($cid)));
- }
- }
- }
-}
-
-/**
- * Perform custom operations.
- * TODO: Confirmation would be nice.
- */
-function custom_spam_filter_operations($op, $cid) {
- $filter = db_fetch_object(db_query('SELECT cid, status, filter FROM {spam_custom} WHERE cid = %d', $cid));
- switch ($op) {
- case 'delete':
- if ($filter->cid) {
- db_query('DELETE FROM {spam_custom} WHERE cid = %d', $cid);
- drupal_set_message(t('Deleted custom filter %filter.', array('%filter' => $filter->filter)));
- }
- break;
- case 'disable':
- if ($filter->cid && $filter->status != SPAM_CUSTOM_STATUS_DISABLED) {
- db_query('UPDATE {spam_custom} SET status = %d WHERE cid = %d', SPAM_CUSTOM_STATUS_DISABLED, $cid);
- drupal_set_message(t('Disabled custom filter %filter.', array('%filter' => $filter->filter)));
- }
- break;
- }
-}
-
-/**
- * Apply enabled custom filter rules against content.
- */
-function custom_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) {
- $probably = $probably_not = 0;
- $id = spam_invoke_module($type, 'content_id', $content, $extra);
- $result = db_query('SELECT cid, filter, style, status, scan, action FROM {spam_custom} WHERE status <> %d ORDER BY weight ASC', SPAM_CUSTOM_STATUS_DISABLED);
- while ($custom = db_fetch_object($result)) {
- $scan = '';
- if ($custom->scan & SPAM_CUSTOM_SCAN_CONTENT) {
- // scan content
- if (is_object($content)) {
- $content = (array)$content;
- }
- $scan .= spam_get_text($content, $type, $fields, $extra);
- spam_log(SPAM_DEBUG, 'custom_spam_filter', t('scanning content with %filter.', array('%filter' => $custom->filter)), $type, $id);
- }
- if ($custom->scan & SPAM_CUSTOM_SCAN_REFERRER) {
- // scan referrer
- // TODO: Determine if this is a live scan. If not, don't scan referrer.
- $scan .= $_SERVER['HTTP_REFERER'];
- spam_log(SPAM_DEBUG, 'custom_spam_filter', t('scanning referrer with %filter.', array('%filter' => $custom->filter)), $type, $id);
- }
- if ($custom->scan & SPAM_CUSTOM_SCAN_USERAGENT) {
- // scan user agent
- // TODO: Determine if this is a live scan. If not, don't scan user agent.
- $scan .= $_SERVER['HTTP_USER_AGENT'];
- spam_log(SPAM_DEBUG, 'custom_spam_filter', t('scanning user agent with %filter.', array('%filter' => $custom->filter)), $type, $id);
- }
- switch ($custom->style) {
- case SPAM_CUSTOM_STYLE_PLAIN:
- $match = preg_match_all("/$custom->filter/", $scan, $matches);
- break;
- case SPAM_CUSTOM_STYLE_REGEX:
- $match = preg_match_all($custom->filter, $scan, $matches);
- break;
- }
- if ($match) {
- // Record that we've had one or more matches.
- db_query('UPDATE {spam_custom} SET matches = matches + %d, last = %d WHERE cid = %d', $match, time(), $custom->cid);
- spam_log(SPAM_VERBOSE, 'custom_spam_filter', t('matched with %filter.', array('%filter' => $custom->filter)), $type, $id);
-
- $action['custom'][] = array(
- 'filter' => $custom->filter,
- 'status' => $custom->status,
- 'style' => $custom->style,
- 'scan' => $custom->scan,
- 'extra' => $custom->extra,
- );
-
- switch ($custom->status) {
-
- case SPAM_CUSTOM_STATUS_SPAM:
- spam_log(SPAM_VERBOSE, 'custom_spam_filter', t('content is spam.'), $type, $id);
- // no need to scan any more, we've found spam
- $action['total'] = 99;
- return $action;
-
- case SPAM_CUSTOM_STATUS_NOTSPAM:
- spam_log(SPAM_VERBOSE, 'custom_spam_filter', t('content is not spam.'), $type, $id);
- // no need to scan any more, we've found non-spam
- $action['total'] = 1;
- return $action;
-
- case SPAM_CUSTOM_STATUS_PROBABLYNOT:
- spam_log(SPAM_DEBUG, 'custom_spam_filter', t('content is probably not spam.'), $type, $id);
- // maintain internal counter that this is probably not spam
- $probably_not += $match;
- break;
-
- case SPAM_CUSTOM_STATUS_PROBABLY:
- spam_log(SPAM_DEBUG, 'custom_spam_filter', t('content is probably spam.'), $type, $id);
- // maintain internal counter that this is probably spam
- $probably += $match;
- break;
- }
- }
- }
-
- if ($probably && $probably_not) {
- if ($probably >= $probably_not) {
- $probably -= $probably_not;
- $probably_not = 0;
- }
- else {
- $probably_not -= $probably;
- $probably = 0;
- }
- }
- if ($probably) {
- spam_log(SPAM_VERBOSE, 'custom_spam_filter', t('matched adjusted total of !number probably spam rule(s).', array('!number' => $probably)), $type, $id);
- if ($probably >= variable_get('spam_custom_probably', 3)) {
- $action['total'] = 99;
- }
- else {
- $action['total'] = variable_get('spam_custom_probably_value', variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD));
- }
- }
- else if ($probably_not) {
- spam_log(SPAM_VERBOSE, 'custom_spam_filter', t('matched adjusted total of !number probably-not spam rule(s).', array('!number' => $probably_not)), $type, $id);
- if ($probably_not >= variable_get('spam_custom_probablynot', 3)) {
- $action['total'] = 1;
- }
- else {
- $action['total'] = variable_get('spam_custom_probablynot_value', 40);
- }
- }
- else {
- // No matched filters, so don't change the overall spam score.
- $action['total'] = 0;
- }
- return $action;
-}
diff -uprN /home/files/coding/drupal/spam/filters/custom/custom-upgrade.inc sites/all/modules/spam/filters/custom/custom-upgrade.inc
--- /home/files/coding/drupal/spam/filters/custom/custom-upgrade.inc 2008-12-25 16:42:10.000000000 +1100
+++ sites/all/modules/spam/filters/custom/custom-upgrade.inc 1970-01-01 10:00:00.000000000 +1000
@@ -1,72 +0,0 @@
-style) {
- case SPAM_CUSTOM_PLAIN:
- case SPAM_CUSTOM_URL:
- $style = SPAM_CUSTOM_STYLE_PLAIN;
- break;
- case SPAM_CUSTOM_REGEX:
- $style = SPAM_CUSTOM_STYLE_REGEX;
- break;
- }
- switch ($old->effect) {
- case SPAM_CUSTOM_DISABLED:
- default:
- $status = SPAM_CUSTOM_STATUS_DISABLED;
- break;
- case SPAM_CUSTOM_MAYBE_SPAM:
- case SPAM_CUSTOM_USUALLY_SPAM:
- $status = SPAM_CUSTOM_STATUS_PROBABLY;
- break;
- case SPAM_CUSTOM_USUALLY_NOTSPAM:
- case SPAM_CUSTOM_MAYBE_NOTSPAM:
- $status = SPAM_CUSTOM_STATUS_PROBABLYNOT;
- break;
- case SPAM_CUSTOM_NEVER_SPAM:
- $status = SPAM_CUSTOM_STATUS_NOTSPAM;
- break;
- case SPAM_CUSTOM_ALWAYS_SPAM:
- $status = SPAM_CUSTOM_STATUS_SPAM;
- break;
- }
- if ($old->action & SPAM_CUSTOM_ACTION_HEADER || $old->action & SPAM_CUSTOM_ACTION_BODY) {
- $scan = SPAM_CUSTOM_SCAN_CONTENT;
- }
- else {
- $scan = SPAM_CUSTOM_SCAN_CONTENT;
- $status = SPAM_CUSTOM_STATUS_DISABLED;
- }
- db_query("INSERT INTO {spam_custom} (filter, style, status, scan, matches, last) VALUES('%s', %d, %d, %d, %d, %d)", $old->filter, $style, $status, $scan, $old->matches, $old->last);
- }
- // Done with upgrade, drop old table.
- $ret = array();
- db_drop_table($ret, 'old_spam_custom');
- }
-}
-
diff -uprN /home/files/coding/drupal/spam/filters/duplicate/duplicate.info sites/all/modules/spam/filters/duplicate/duplicate.info
--- /home/files/coding/drupal/spam/filters/duplicate/duplicate.info 2009-09-01 10:28:00.000000000 +1000
+++ sites/all/modules/spam/filters/duplicate/duplicate.info 1970-01-01 10:00:00.000000000 +1000
@@ -1,12 +0,0 @@
-; $Id: duplicate.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $
-name = Duplicate filter
-description = A duplication detecting spam filter.
-package = Spam
-dependencies[] = spam
-core = 6.x
-; Information added by drupal.org packaging script on 2009-09-01
-version = "6.x-1.x-dev"
-core = "6.x"
-project = "spam"
-datestamp = "1251764880"
-
diff -uprN /home/files/coding/drupal/spam/filters/duplicate/duplicate.install sites/all/modules/spam/filters/duplicate/duplicate.install
--- /home/files/coding/drupal/spam/filters/duplicate/duplicate.install 2008-12-25 16:42:10.000000000 +1100
+++ sites/all/modules/spam/filters/duplicate/duplicate.install 1970-01-01 10:00:00.000000000 +1000
@@ -1,45 +0,0 @@
- t('The base table for the Duplicate submodule'),
- 'fields' => array(
- 'iid' => array('type' => 'serial', 'unsigned' => TRUE, 'not null' => TRUE, 'disp-width' => '11'),
- 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
- 'content_hash' => array('type' => 'char', 'length' => '32', 'not null' => TRUE, 'default' => ''),
- 'hostname' => array('type' => 'varchar', 'length' => '15', 'not null' => TRUE, 'default' => ''),
- 'duplicate_hash' => array('type' => 'int', 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
- 'duplicate_ip' => array('type' => 'int', 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
- 'spam' => array('type' => 'int', 'size' => 'tiny', 'not null' => TRUE, 'default' => 0, 'disp-width' => '4'),
- 'expired' => array('type' => 'int', 'size' => 'tiny', 'not null' => TRUE, 'default' => 0, 'disp-width' => '4'),
- 'timestamp' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'),
- ),
- 'primary key' => array('iid'),
- 'indexes' => array(
- 'content_hash' => array('content_hash'),
- 'hostname' => array('hostname'),
- 'sid' => array('sid'),
- 'spam' => array('spam'),
- 'timestamp' => array('timestamp'),
- ),
- );
-
- return $schema;
-}
-
-function duplicate_install() {
- // Create my tables.
- drupal_install_schema('duplicate');
-}
-
-function duplicate_uninstall() {
- // Remove tables.
- drupal_uninstall_schema('duplicate');
- drupal_set_message('The spam_duplicate table has been dropped.');
-}
diff -uprN /home/files/coding/drupal/spam/filters/duplicate/duplicate.module sites/all/modules/spam/filters/duplicate/duplicate.module
--- /home/files/coding/drupal/spam/filters/duplicate/duplicate.module 2009-06-18 05:32:47.000000000 +1000
+++ sites/all/modules/spam/filters/duplicate/duplicate.module 1970-01-01 10:00:00.000000000 +1000
@@ -1,400 +0,0 @@
-. All rights reserved.
- *
- * Detects spam by looking for duplication of content, or posting IP.
- */
-
-define('DUPLICATE_BLACKLIST_SILENT', 0);
-define('DUPLICATE_BLACKLIST_NOTIFY', 1);
-define('DUPLICATE_BLACKLIST_BLOCK', 2);
-
-define('DUPLICATE_DEFAULT_THRESHOLD', 2);
-define('DUPLICATE_DEFAULT_BLACKLIST', 3);
-
-define('DUPLICATE_NOT_SPAM', 0);
-define('DUPLICATE_SPAM', 1);
-
-/**
- * Drupal _menu() hook.
- */
-function duplicate_menu() {
- $items = array();
-
- $items['admin/settings/spam/filters/duplicate'] = array(
- 'title' => 'Duplicate',
- 'page callback' => 'drupal_get_form',
- 'page arguments' => array('duplicate_admin_settings'),
- 'access arguments' => array('administer spam'),
- 'description' => 'Configure the spam duplicate filter.',
- 'type' => MENU_LOCAL_TASK,
- );
- $items['duplicate/denied/ip'] = array(
- 'page callback' => 'duplicate_denied_ip',
- 'type' => MENU_CALLBACK,
- 'access callback' => TRUE,
- );
- $items['duplicate/denied/post'] = array(
- 'page callback' => 'duplicate_denied_post',
- 'type' => MENU_CALLBACK,
- 'access callback' => TRUE,
- );
-
- $items['admin/reports/spam/blocked_ip'] = array(
- 'title' => 'Blocked IPs',
- 'access arguments' => array('administer spam'),
- 'page callback' => 'spam_logs_blocked_ip',
- 'type' => MENU_LOCAL_TASK,
- );
-
- return $items;
-}
-
-/**
- * The arg() function may not be availble early in the bootstrap process,
- * so we reimplement it here.
- */
-function _duplicate_arg() {
- static $arguments, $q;
-
- if (empty($arguments) || $q != $_GET['q']) {
- $arguments = explode('/', $_GET['q']);
- $q = $_GET['q'];
- }
-
- if (isset($arguments[$index])) {
- return $arguments[$index];
- }
-}
-
-
-/**
- * If IP blacklisting and IP blocking are both enabled, perform a database
- * query on each page load to see if the current visitor has been blacklisted.
- */
-function duplicate_init() {
- // Allow notification to blacklisted IP, if enabled.
- if (_duplicate_arg(0) == 'duplicate' && _duplicate_arg(1) == 'denied' && _duplicate_arg(2) == 'ip') return;
-
- // Only perform database queries if functionality is enabled.
- if ((variable_get('duplicate_blacklist_action', DUPLICATE_BLACKLIST_NOTIFY) == DUPLICATE_BLACKLIST_BLOCK) && (variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST) > -1)) {
- // Blacklisting and IP blocking enabled.
- $duplicate_ip = (int)db_query("SELECT COUNT(iid) FROM {spam_duplicate} WHERE hostname = '%s' AND spam = %d", ip_address(), DUPLICATE_SPAM);
- if ($duplicate_ip >= variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST)) {
- if (user_access('bypass filters')) {
- spam_log(SPAM_DEBUG, 'duplicate_init', t('Found !count spam for IP !ip, ignoring because user !user (uid !uid) is configured to bypass filters', array('!count' => $duplicate_ip, '!ip' => ip_address(), '!user' => $user->name, 'uid' => $user->uid)), $type, $id);
- return;
- }
- drupal_goto("duplicate/denied/ip");
- }
- }
-}
-
-/**
- * Spam API Hook
- */
-function duplicate_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) {
-
- switch ($op) {
-
- case 'filter':
- if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) return;
- return duplicate_spam_filter($content, $type, $fields, $extra);
-
- case 'filter_module':
- return 'duplicate';
-
- case 'insert':
- if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) return;
- if (is_array($extra) && $extra['sid'] && $extra['host'] &&
- !empty($content) && !empty($fields)) {
- $hash = _duplicate_content_hash($content, $fields);
- db_query("INSERT INTO {spam_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $extra['host'], time());
- $action = _duplicate_action();
- if (is_array($action) && !empty($action)) {
- if (isset($action['redirect'])) {
- drupal_goto($action['redirect']);
- }
- }
- }
- break;
-
- case 'update':
- if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) return;
- if (is_array($extra) && $extra['sid'] && $extra['host'] &&
- !empty($content) && !empty($fields)) {
- $hash = _duplicate_content_hash($content, $fields);
- db_query("UPDATE {spam_duplicate} SET content_hash = '%s', hostname = '%s', timestamp = %d WHERE sid = %d", $hash, $extra['host'], time(), $extra['sid']);
- if (!db_affected_rows()) {
- db_query("INSERT INTO {spam_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $extra['host'], time());
- }
- $action = _duplicate_action();
- if (is_array($action) && !empty($action)) {
- if (isset($action['redirect'])) {
- drupal_goto($action['redirect']);
- }
- }
- }
- break;
-
- case 'delete':
- if (is_array($extra) && $extra['sid'] && !empty($content) && !empty($fields)) {
- db_query("DELETE FROM {spam_duplicate} WHERE sid = %d", $extra['sid']);
- }
- break;
-
- case 'filter_info':
- return array(
- 'name' => t('Duplicate filter'),
- 'module' => t('duplicate'),
- 'description' => t('A duplication spam filter.'),
- 'help' => t('The duplicate filter detects spam by detecting content duplication.'),
- );
- break;
-
- case 'filter_install':
- return array(
- 'status' => SPAM_FILTER_ENABLED,
- 'weight' => -8,
- );
-
- case 'mark_as_spam':
- if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) return;
- db_query('UPDATE {spam_duplicate} SET spam = %d WHERE sid = %d', DUPLICATE_SPAM, $extra['sid']);
- if (!db_affected_rows() && $extra['id'] && $extra['sid']) {
- $content = spam_invoke_module($type, 'load', $extra['id']);
- $fields = spam_invoke_module($type, 'filter_fields', $content);
- $hash = _duplicate_content_hash($content, $fields);
- $hostname = spam_invoke_module($type, 'hostname', $extra['id']);
- db_query("INSERT INTO {spam_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $hostname, time());
- }
- $action = _duplicate_action();
- if (is_array($action) && isset($action['redirect'])) {
- return $action['redirect'];
- }
- break;
-
- case 'mark_as_not_spam':
- if (!module_invoke('spam', 'filter_enabled', 'duplicate', $type, $content, $fields, $extra)) return;
- db_query('UPDATE {spam_duplicate} SET spam = %d WHERE sid = %d', DUPLICATE_NOT_SPAM, $extra['sid']);
- if (!db_affected_rows() && $extra['id'] && $extra['sid']) {
- // Updating content that we've not filtered before. Retrive all the
- // data we need to add it to the spam_duplicate table.
- $fields = spam_invoke_module($type, 'filter_fields', $extra['content']);
- $hash = _duplicate_content_hash($extra['content'], $fields);
- $hostname = spam_invoke_module($type, 'hostname', $extra['id']);
- db_query("INSERT INTO {spam_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $hostname, time());
- }
- break;
-
- }
-}
-
-/**
- *
- */
-function duplicate_admin_settings() {
- $form['content'] = array(
- '#type' => 'fieldset',
- '#title' => t('Content'),
- '#collapsible' => TRUE,
- );
- $limits = drupal_map_assoc(range(2, 15));
- $limits[-1] = t('unlimited');
- $form['content']['duplicate_threshold'] = array(
- '#type' => 'select',
- '#title' => t('Duplication threshold'),
- '#default_value' => variable_get('duplicate_threshold', DUPLICATE_DEFAULT_THRESHOLD),
- '#options' => $limits,
- '#description' => t('Specify how many times the same identical content can be posted before it will be considered spam. When tuning this filter, note that users may accidentally submit the same content multiple times causing an otherwise acceptible posting to be duplicated.'),
- );
- $form['content']['duplicate_post_message'] = array(
- '#type' => 'textarea',
- '#title' => t('Duplicate post message'),
- '#default_value' => variable_get('duplicate_post_message', t('You have attempted to post the same identical content multiple times, causing your posts to be flagged as potential spam. If this has happened in error, please report this error along with your IP address (%IP) to a @site site administrator. We apologize for any inconvenience.
', array('@site' => variable_get('site_name', 'Drupal')))), - '#description' => t('Message to show visitors when their content has been blocked because it was posted multiple times. The text "%IP" will be replaced by the visitors actual IP address.'), - ); - - $form['ip'] = array( - '#type' => 'fieldset', - '#title' => t('IP'), - '#collapsible' => TRUE, - ); - $limits = drupal_map_assoc(range(1, 15)); - $limits[-1] = t('unlimited'); - $form['ip']['duplicate_blacklist'] = array( - '#type' => 'select', - '#title' => t('IP blacklist threshold'), - '#default_value' => variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST), - '#options' => $limits, - '#description' => t('Specify how many times a given IP address is allowed to post possible spam content before the IP address is blacklisted and prevented from posting any additional content.'), - ); - $form['ip']['duplicate_blacklist_action'] = array( - '#type' => 'radios', - '#title' => t('IP blacklist action'), - '#options' => array(t('Silently prevent visitor from posting'), t('Notify blacklisted visitor when posting, prevent from posting'), t('Notify blacklisted visitor, prevent from visiting site')), - '#default_value' => variable_get('duplicate_blacklist_action', DUPLICATE_BLACKLIST_NOTIFY), - '#description' => t('Select an action from the above options. If notification is enabled, the user will be redirected to a custom page displaying the "Blacklisted IP message" defined below. If you only prevent users from posting, they will be able to view all site content. If you prevent a user from visiting your site, they will only ever see the "Blacklisted IP message".'), - ); - $form['ip']['duplicate_blacklist_message'] = array( - '#type' => 'textarea', - '#title' => t('Blacklisted IP message'), - '#default_value' => variable_get('duplicate_blacklist_message', t('You are currently not allowed to post content to @site, as previous content posted by your IP address (%IP) has been flagged as potential spam.
If you have not posted spam to @site, please report this error along with your IP address to a site administrator. We apologize for any inconvenience.
', array('@site' => variable_get('site_name', 'Drupal')))), - '#description' => t('Message to show visitors when their IP has been blacklisted. The text "%IP" will be replaced by the visitors actual IP address.') - ); - - return system_settings_form($form); -} - -/** - * Save the configuration. - */ -function duplicate_admin_settings_submit($form, &$form_state) { -/* TODO The 'op' element in the form values is deprecated. - Each button can have #validate and #submit functions associated with it. - Thus, there should be one button that submits the form and which invokes - the normal form_id_validate and form_id_submit handlers. Any additional - buttons which need to invoke different validate or submit functionality - should have button-specific functions. */ - if ($form_state['values']['op'] == t('Reset to defaults')) { - variable_del('duplicate_threshold'); - variable_del('duplicate_post_message'); - variable_del('duplicate_blacklist'); - variable_del('duplicate_blacklist_action'); - variable_del('duplicate_blacklist_message'); - drupal_set_message('Configuration reset to defaults.'); - } - else { - variable_set('duplicate_threshold', $form_state['values']['duplicate_threshold']); - variable_set('duplicate_post_message', $form_state['values']['duplicate_post_message']); - variable_set('duplicate_blacklist', $form_state['values']['duplicate_blacklist']); - variable_set('duplicate_blacklist_action', $form_state['values']['duplicate_blacklist_action']); - variable_set('duplicate_blacklist_message', $form_state['values']['duplicate_blacklist_message']); - drupal_set_message('Configuration saved.'); - } -} - -/** - * Get and md5 hash of all content truncated together. - */ -function _duplicate_content_hash($content, $fields) { - if (is_object($content)) { - $content = (array)$content; - } - $hash = ''; - foreach ($fields['main'] as $field) { - $hash .= $content[$field]; - } - return md5($hash); -} - -/** - * Determine whether or not the content is spam. - */ -function duplicate_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { - $score = 0; - $action = array(); - $hash = _duplicate_content_hash($content, $fields); - $id = spam_invoke_module($type, 'content_id', $content, $extra); - $duplicate_hash = db_result(db_query("SELECT COUNT(d.iid) FROM {spam_duplicate} d LEFT JOIN {spam_tracker} t ON d.sid = t.sid WHERE content_hash = '%s' AND content_id <> %d", $hash, $id)) + 1; - if ($duplicate_hash >= variable_get('duplicate_threshold', DUPLICATE_DEFAULT_THRESHOLD)) { - $sids = db_query("SELECT sid FROM {spam_duplicate} WHERE content_hash = '%s'", $hash); - if (!$filter_test) { - while ($sid = db_result($sids)) { - $unpublish = db_fetch_object(db_query('SELECT content_type, content_id, score FROM {spam_tracker} WHERE sid = %d', $sid)); - spam_mark_as_spam($unpublish->content_type, $unpublish->content_id, array('score' => 99)); - } - // Update counter tracking that we've blocked a duplicate posting of this - // content. (It will actually increment the counter on - // "duplicate_threshold" rows.) - db_query("UPDATE {spam_duplicate} SET duplicate_hash = duplicate_hash + 1 WHERE content_hash = '%s'", $hash); - } - $action['hash'] = array( - 'score' => 99, - 'description' => t('Content is identical to %count other existing posts.', array('%count' => variable_get('duplicate_threshold', DUPLICATE_DEFAULT_THRESHOLD))), - ); - $action['total'] = 99; - $action['redirect'] = 'duplicate/denied/post'; - _duplicate_action($action); - return $action; - } - - $duplicate_ip = db_result(db_query("SELECT COUNT(iid) FROM {spam_duplicate} WHERE hostname = '%s' AND spam = %d", ip_address(), DUPLICATE_SPAM)); - if ($duplicate_ip >= variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST) && (variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST) > -1)) { - $action['ip'] = array( - 'score' => 99, - 'description' => t('Content was posted by the same IP address used to post %count other spam posts.', array('%count' => variable_get('duplicate_blacklist', DUPLICATE_DEFAULT_BLACKLIST))), - ); - $action['total'] = 99; - $action['redirect'] = 'duplicate/denied/ip'; - } - - return $action; -} - -function _duplicate_action($register = array()) { - static $action = array(); - - if (!empty($register)) { - $action = $register; - } - - return $action; -} - -/** - * - */ -function duplicate_denied_ip() { - $message = strtr(variable_get('duplicate_blacklist_message', t('You are currently not allowed to post content to @site, as previous content posted by your IP address (%IP) has been flagged as potential spam.
If you have not posted spam to @site, please report this error along with your IP address to a site administrator. We apologize for any inconvenience.
')), array('@site' => variable_get('site_name', 'Drupal'), '%IP' => ip_address())); - spam_denied_page($message, t('Your IP address has been blocked by our spam filter.')); -} - -/** - * - */ -function duplicate_denied_post() { - $message = strtr(variable_get('duplicate_post_message', t('You have attempted to post the same identical content multiple times, causing your posts to be flagged as potential spam. If this has happened in error, please report this error along with your IP address (%IP) to a @site site administrator. We apologize for any inconvenience.
')), array('@site' => variable_get('site_name', 'Drupal'), '%IP' => ip_address())); - spam_denied_page($message, t('You have attempted to post the same content multiple times.')); -} - -function spam_logs_blocked_ip() { - drupal_set_title(t('Spam Module Blocked IPs')); - - $header = array( - array('data' => t('IP Address'), 'field' => 'hostname'), - array('data' => t('Last Seen'), 'field' => 'timestamp', 'sort' => 'desc'), - array('data' => t('Counter'), 'field' => 'count'), - ); - - // This SQL is *nasty*, so if you think you can do better, please be my guest! - // This unfortunately has to be SQL because the pager module can't be told - // how many rows we've got (so we can't do our own processing in PHP and - // still get paging to work properly). - $sql = "SELECT * FROM (SELECT DISTINCT x.hostname, x.timestamp, COUNT(x.hostname) AS count FROM (SELECT timestamp, hostname FROM {spam_tracker} WHERE score > %d ORDER BY timestamp DESC) AS x GROUP BY x.hostname) AS y WHERE y.count >= %d"; - $arguments = array(variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD), variable_get('spam_blacklist_ip', DUPLICATE_DEFAULT_BLACKLIST)); - - $count_sql = preg_replace('/^SELECT \* /', 'SELECT count(hostname) ', $sql); - - $result = pager_query($sql . tablesort_sql($header), 50, 0, $count_sql, $arguments); - - while ($log = db_fetch_object($result)) { - $rows[] = array('data' => array( - $log->hostname, - format_date($log->timestamp, 'small'), - $log->count - ) - ); - } - - if (!$rows) { - $rows[] = array(array('data' => t('No log messages available.'), 'colspan' => 6)); - } - - return theme('table', $header, $rows) . theme('pager', NULL, 50, 0); -} diff -uprN /home/files/coding/drupal/spam/filters/node_age/node_age.info sites/all/modules/spam/filters/node_age/node_age.info --- /home/files/coding/drupal/spam/filters/node_age/node_age.info 2009-09-01 10:28:00.000000000 +1000 +++ sites/all/modules/spam/filters/node_age/node_age.info 1970-01-01 10:00:00.000000000 +1000 @@ -1,13 +0,0 @@ -; $Id: node_age.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $ -name = Spam node age filter -description = A node-age comment filter plug-in for the spam module. -package = Spam -dependencies[] = spam -dependencies[] = comment -core = 6.x -; Information added by drupal.org packaging script on 2009-09-01 -version = "6.x-1.x-dev" -core = "6.x" -project = "spam" -datestamp = "1251764880" - diff -uprN /home/files/coding/drupal/spam/filters/node_age/node_age.module sites/all/modules/spam/filters/node_age/node_age.module --- /home/files/coding/drupal/spam/filters/node_age/node_age.module 2009-08-07 03:55:31.000000000 +1000 +++ sites/all/modules/spam/filters/node_age/node_age.module 1970-01-01 10:00:00.000000000 +1000 @@ -1,196 +0,0 @@ -. All rights reserved. - * - */ - -/** - * Drupal _menu() hook. - */ -function node_age_menu() { - $items = array(); - - $items['admin/settings/spam/filters/node-age'] = array( - 'title' => 'Node age', - 'page callback' => 'drupal_get_form', - 'page arguments' => array('node_age_admin_settings'), - 'access arguments' => array('administer spam'), - 'description' => 'Configure the node age filter.', - 'type' => MENU_LOCAL_TASK, - ); - - return $items; -} - -function node_age_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { - - switch ($op) { - case 'filter': - if (!module_invoke('spam', 'filter_enabled', 'node_age', $type, $content, $fields, $extra)) return; - return node_age_spam_filter($content, $type, $fields, $extra); - - case 'filter_module': - return 'node_age'; - - case 'filter_info': - return array( - 'name' => t('Node age'), - 'module' => t('node_age'), - 'description' => t('A node-age comment spam filter.'), - 'help' => t('The node-age filter assigns a higher spam probability to comments made against older nodes.'), - ); - break; - - case 'filter_install': - return array( - 'status' => SPAM_FILTER_ENABLED, - 'gain' => 150, - 'weight' => -2, - ); - - } -} - -/** - * Determine if a comment is being posted against too old of a node. - */ -function node_age_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { - if ($type == 'comment') { - $action = array(); - $id = spam_invoke_module($type, 'content_id', $content, $extra); - if (arg(0) == 'comment' && arg(1) == 'reply' && is_numeric(arg(2))) { - $nid = arg(2); - spam_log(SPAM_DEBUG, 'node_age_spam_filter', t('retrieved nid (@nid) from url', array('@nid' => $nid)), $type, $id); - } - else { - $nid = db_result(db_query('SELECT nid FROM {comments} WHERE cid = %d', $id)); - spam_log(SPAM_DEBUG, 'node_age_spam_filter', t('retrieved nid (@nid) from database', array('@nid' => $nid)), $type, $id); - } - - if (!$nid) { - spam_log(SPAM_LOG, 'node_age_spam_filter', t('warning: nid not found for comment (@cid), skipping', array('@cid' => $id)), $type, $id); - $action['total'] = 0; - return $action; - } - - $node = spam_invoke_module('node', 'load', $nid); - if (is_object($node)) { - $timestamp_field = variable_get('node_age_filter_on', 'created'); - if ($node->$timestamp_field < (time() - variable_get('node_age_limit_long', 4838400))) { - $action['total'] = variable_get('node_age_weight_long', 99); - spam_log(SPAM_DEBUG, 'node_age_spam_filter', t('node (@nid) older than long limit, spam probability(@weight)', array('@nid' => $nid, '@weight' => $action['total'])), $type, $id); - } - else if ($node->$timestamp_field < (time() - variable_get('node_age_limit_short', 2419200))) { - $action['total'] = variable_get('node_age_weight_short', 85); - spam_log(SPAM_DEBUG, 'node_age_spam_filter', t('node (@nid) older than short limit, spam probability(@weight)', array('@nid' => $nid, '@weight' => $action['total'])), $type, $id); - } - else { - $action['total'] = 0; - spam_log(SPAM_DEBUG, 'node_age_spam_filter', t('node (@nid) is recent.', array('@nid' => $nid)), $type, $id); - } - } else { - spam_log(SPAM_LOG, 'node_age_spam_filter', t('warning: nid (@nid) does not map to node, skipping', array('@nid' => $nid)), $type, $id); - $action['total'] = 0; - } - } - else { - spam_log(SPAM_DEBUG, 'node_age_spam_filter', t('content type is not comment, skipping'), $type, $id); - $action['total'] = 0; - } - return $action; -} - -/** - * Module administrative configuration options. - */ -function node_age_admin_settings() { - $form = array(); - $form['short'] = array( - '#type' => 'fieldset', - '#title' => 'Old content', - '#collapsible' => TRUE, - '#collapsed' => TRUE, - ); - $limits = drupal_map_assoc(spam_range(604800, 14515200, 604800), 'format_interval'); - $weights = drupal_map_assoc(array(60, 65, 70, 75, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99)); - $form['short']['node_age_limit_short'] = array( - '#type' => 'select', - '#title' => t('Old content is content not published within the past'), - '#options' => $limits, - '#required' => TRUE, - '#default_value' => variable_get('node_age_limit_short', 2419200), - ); - $form['short']['node_age_weight_short'] = array( - '#type' => 'select', - '#title' => t('Probability that comments posted to old content is spam'), - '#options' => $weights, - '#required' => TRUE, - '#description' => t('Probability that comments posted to old content are spam, as a percentage.'), - '#default_value' => variable_get('node_age_weight_short', 85), - ); - - $form['long'] = array( - '#type' => 'fieldset', - '#title' => 'Really old content', - '#collapsible' => TRUE, - '#collapsed' => TRUE, - ); - $form['long']['node_age_limit_long'] = array( - '#type' => 'select', - '#title' => t('Really old content is content not published within the past'), - '#options' => $limits, - '#required' => TRUE, - '#default_value' => variable_get('node_age_limit_long', 4838400), - ); - $form['long']['node_age_weight_long'] = array( - '#type' => 'select', - '#title' => t('Probability that comments posted to really old content is spam'), - '#options' => $weights, - '#required' => TRUE, - '#description' => t('Probability that comments posted to really old content are spam, as a percentage.'), - '#default_value' => variable_get('node_age_weight_long', 99), - ); - return system_settings_form($form); -} - -/** - * Validate the configuration. - */ -function node_age_admin_settings_validate($form, &$form_state) { - $limit_short = $form_state['values']['node_age_limit_short']; - $limit_long = $form_state['values']['node_age_limit_long']; - if ($limit_short >= $limit_long) { - form_set_error('node_age_limit_long', t('Really old content has to be older than old content.')); - } -} - -/** - * Save the configuration. - */ -function node_age_admin_settings_submit($form, &$form_state) { -/* TODO The 'op' element in the form values is deprecated. - Each button can have #validate and #submit functions associated with it. - Thus, there should be one button that submits the form and which invokes - the normal form_id_validate and form_id_submit handlers. Any additional - buttons which need to invoke different validate or submit functionality - should have button-specific functions. */ - if ($form_state['values']['op'] == t('Reset to defaults')) { - variable_del('node_age_limit_short'); - variable_del('node_age_weight_short'); - variable_del('node_age_limit_long'); - variable_del('node_age_weight_long'); - drupal_set_message('Configuration reset to defaults.'); - } - else { - variable_set('node_age_limit_short', $form_state['values']['node_age_limit_short']); - variable_set('node_age_weight_short', $form_state['values']['node_age_weight_short']); - variable_set('node_age_limit_long', $form_state['values']['node_age_limit_long']); - variable_set('node_age_weight_long', $form_state['values']['node_age_weight_long']); - drupal_set_message('Configuration saved.'); - } -} diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.info sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.info --- /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.info 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.info 2009-12-02 19:13:25.000000000 +1100 @@ -0,0 +1,12 @@ +; $Id: bayesian.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $ +name = Spam Bayesian filter +description = A bayesian filter. +package = Spam +dependencies[] = spam +core = 6.x +; Information added by drupal.org packaging script on 2009-09-01 +version = "6.x-1.x-dev" +core = "6.x" +project = "spam" +datestamp = "1251764880" + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.install sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.install --- /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.install 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.install 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,120 @@ +. All rights reserved. + * + * Provides a generic Bayesian filter for use with other modules. + * Defines hooks for use with the Spam API. + */ + +/** + * Implementation of hook_schema(). + */ +function spam_filter_bayesian_schema() { + $schema['spam_filter_bayesian_tokens'] = array( + 'description' => t('TODO'), + 'fields' => array( + 'tid' => array( + 'description' => t('TODO'), + 'type' => 'serial', + 'unsigned' => 1, + 'not null' => TRUE, + ), + 'class' => array( + 'description' => t('TODO'), + 'type' => 'varchar', + 'length' => 32, + 'not null' => TRUE, + 'default' => '', + ), + 'token' => array( + 'description' => t('TODO'), + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + 'default' => '', + ), + 'yes_count' => array( + 'description' => t('TODO'), + 'type' => 'int', + 'unsigned' => 1, + 'not null' => TRUE, + 'default' => 0, + ), + 'no_count' => array( + 'description' => t('TODO'), + 'type' => 'int', + 'unsigned' => 1, + 'not null' => TRUE, + 'default' => 0, + ), + 'probability' => array( + 'description' => t('TODO'), + 'type' => 'int', + 'unsigned' => 1, + 'not null' => TRUE, + 'default' => 0, + ), + 'last' => array( + 'description' => t('TODO'), + 'type' => 'int', + 'unsigned' => 1, + 'not null' => TRUE, + 'default' => 0, + ), + ), + 'indexes' => array( + 'yes_count' => array('yes_count'), + 'no_count' => array('no_count'), + 'probability' => array('probability'), + 'last' => array('last'), + ), + 'unique keys' => array( + 'token' => array('class', 'token'), + ), + 'primary key' => array('tid'), + ); + + return $schema; +} + +/** + * Install bayesian module schema. + */ +function spam_filter_bayesian_install() { + // Create tables. + drupal_install_schema('spam_filter_bayesian'); +} + +/** + * Completely uninstall the spam module. + */ +function spam_filter_bayesian_uninstall() { + // Remove tables. + drupal_uninstall_schema('spam_filter_bayesian'); + + $tables = array('spam_filter_bayesian_tokens'); + foreach ($tables as $table) { + } + drupal_set_message(t('All bayesian module configuration data and tables have been deleted.')); +} + +/** + * Fix variable namespacing. + */ +function spam_filter_bayesian_update_6101() { + $ret = array(); + $old_vars = array('bayesian_default_probability', 'bayesian_interesting_tokens', 'bayesian_minimum_token_length', 'bayesian_tokenizer'); + foreach ( $old_vars as $var ) { + $test = variable_get($var, null); + if ( !empty($test) ) { + $ret[] = update_sql('UPDATE {variables} SET name = %s WHERE name = %s', 'spam_filter_'.$var, $var); + } + } + return $ret; + } + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.install~ sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.install~ --- /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.install~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.install~ 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,120 @@ +. All rights reserved. + * + * Provides a generic Bayesian filter for use with other modules. + * Defines hooks for use with the Spam API. + */ + +/** + * Implementation of hook_schema(). + */ +function spam_bayesian_schema() { + $schema['spam_bayesian_tokens'] = array( + 'description' => t('TODO'), + 'fields' => array( + 'tid' => array( + 'description' => t('TODO'), + 'type' => 'serial', + 'unsigned' => 1, + 'not null' => TRUE, + ), + 'class' => array( + 'description' => t('TODO'), + 'type' => 'varchar', + 'length' => 32, + 'not null' => TRUE, + 'default' => '', + ), + 'token' => array( + 'description' => t('TODO'), + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + 'default' => '', + ), + 'yes_count' => array( + 'description' => t('TODO'), + 'type' => 'int', + 'unsigned' => 1, + 'not null' => TRUE, + 'default' => 0, + ), + 'no_count' => array( + 'description' => t('TODO'), + 'type' => 'int', + 'unsigned' => 1, + 'not null' => TRUE, + 'default' => 0, + ), + 'probability' => array( + 'description' => t('TODO'), + 'type' => 'int', + 'unsigned' => 1, + 'not null' => TRUE, + 'default' => 0, + ), + 'last' => array( + 'description' => t('TODO'), + 'type' => 'int', + 'unsigned' => 1, + 'not null' => TRUE, + 'default' => 0, + ), + ), + 'indexes' => array( + 'yes_count' => array('yes_count'), + 'no_count' => array('no_count'), + 'probability' => array('probability'), + 'last' => array('last'), + ), + 'unique keys' => array( + 'token' => array('class', 'token'), + ), + 'primary key' => array('tid'), + ); + + return $schema; +} + +/** + * Install bayesian module schema. + */ +function spam_bayesian_install() { + // Create tables. + drupal_install_schema('spam_bayesian'); +} + +/** + * Completely uninstall the spam module. + */ +function spam_bayesian_uninstall() { + // Remove tables. + drupal_uninstall_schema('spam_bayesian'); + + $tables = array('spam_bayesian_tokens'); + foreach ($tables as $table) { + } + drupal_set_message(t('All bayesian module configuration data and tables have been deleted.')); +} + +/** + * Fix variable namespacing. + */ +function spam_bayesian_update_6101() { + $ret = array(); + $old_vars = array('bayesian_default_probability', 'bayesian_interesting_tokens', 'bayesian_minimum_token_length', 'bayesian_tokenizer'); + foreach ( $old_vars as $var ) { + $test = variable_get($var, null); + if ( !empty($test) ) { + $ret[] = update_sql('UPDATE {variables} SET name = %s WHERE name = %s', 'spam_'.$var, $var); + } + } + return $ret; + } + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.module sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.module --- /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.module 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.module 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,189 @@ +. All rights reserved. + * + * Provides a generic Bayesian filter for use with other modules. + * Defines hooks for use with the Spam API. + */ + +/** + * Spam API Hook + */ +function spam_filter_bayesian_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + switch ($op) { + + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_bayesian', $type, $content, $fields, $extra)) return; + return spam_filter_bayesian_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_filter_bayesian'; + break; + + case 'filter_info': + return array( + 'name' => t('Bayesian filter'), + 'module' => t('spam_filter_bayesian'), + 'description' => t('A bayesian spam filter.'), + 'help' => t('The bayesian filter can learn to tell the difference between valid content spam content.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + ); + + case 'mark_as_spam': + case 'mark_as_not_spam': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_bayesian', $type, $content, $fields, $extra)) return; + spam_log(SPAM_DEBUG, 'spam_filter_bayesian_spamapi', t('@op', array('@op' => $op)), $type, $extra['id']); + $fields = spam_invoke_module($type, 'filter_fields', $extra['content']); + $tokenizer = variable_get('spam_filter_bayesian_tokenizer', 'spam_filter_bayesian_tokenize'); + $tokens = $tokenizer($extra['content'], $type, $fields, $extra); + spam_filter_bayesian_tokens_update('spam', $tokens, ($op == 'mark_as_spam' ? TRUE : FALSE), $type, $extra['id']); + break; + } +} + +/** + * Determine whether or not the content is spam. + */ +function spam_filter_bayesian_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + $class = 'spam'; + $id = spam_invoke_module($type, 'content_id', $content, $extra); + $tokenizer = variable_get('spam_filter_bayesian_tokenizer', 'spam_filter_bayesian_tokenize'); + $tokens = $tokenizer($content, $type, $fields, $extra); + if (is_array($tokens)) { + foreach ($tokens as $token) { + $p = db_fetch_object(db_query("SELECT probability FROM {spam_filter_bayesian_tokens} WHERE class = '%s' AND token = '%s'", $class, $token)); + if (!$p->probability) { + $p->probability = variable_get('spam_filter_bayesian_default_probability', 40); + } + $t["$token,$p->probability"] = abs($p->probability - 50); + } + } + else { + // No tokens, return default score. + $action['total'] = variable_get('spam_filter_bayesian_default_probability', 40); + return $action; + } + + /* Sort token array so those tokens with the largest "drift" come first. + * Drift is this distance from a median of 50%. + */ + asort($t); + + /* Take the n most "interesting" tokens from the top of the token array. + * The larger a token's drift, the more interesting it is. + */ + $keys = array_keys($t); + $max = variable_get('spam_filter_bayesian_interesting_tokens', 15); + $total = 0; + for ($i = 0; $i < $max; $i++) { + if ($pair = array_pop($keys)) { + $p = explode(',', $pair); + $total = $total + $p[1]; + $action['spam_filter_bayesian'][$i] = array( + 'token' => $p[0], + 'probability' => $p[1], + ); + spam_log(SPAM_DEBUG, 'spam_filter_bayesian_spam_filter', t('interesting token [@count] (@token) probability(@probability)', array('@token' => $p[0], '@probability' => $p[1], '@count' => $i + 1)), $type, $id); + } + else { + // we've looked at all the tokens + break; + } + } + + $probability = round($total / $i, 1); + spam_log(SPAM_VERBOSE, 'spam_filter_bayesian_spam_filter', t('total(@total) count(@count) probability(@probability)', array('@probability' => $probability, '@total' => $total, '@count' => $i)), $type, $id); + + $action['total'] = $probability; + return $action; +} + +/** + * Update token probabilities in database. + */ +function spam_filter_bayesian_tokens_update($class, $tokens, $yes, $type = NULL, $id = 0) { + if (!is_array($tokens) || empty($tokens)) return; + foreach ($tokens as $token) { + $old = db_fetch_object(db_query("SELECT probability, yes_count, no_count FROM {spam_filter_bayesian_tokens} WHERE class = '%s' AND token = '%s'", $class, $token)); + if ($old->probability) { + $total = $old->yes_count + $old->no_count + 1; + $probability = spam_sanitize_score(($old->yes_count + ($yes ? 1 : 0)) / $total * 100); + spam_log(SPAM_DEBUG, 'spam_filter_bayesian_tokens_update', t('update token(@token) class(@class) yes(@yes) no(@no) prob(@prob): added @new', array('@token' => $token, '@class' => $class, '@yes' => $old->yes_count + ($yes ? 1 : 0), '@no' => $old->no_count + ($yes ? 0 : 1), '@prob' => $probability, '@new' => $yes ? 'yes' : 'no')), $type, $id); + if ($yes) { + db_query("UPDATE {spam_filter_bayesian_tokens} SET yes_count = yes_count + 1, probability = %d, last = %d WHERE class = '%s' AND token = '%s'", $probability, time(), $class, $token); + } + else { + db_query("UPDATE {spam_filter_bayesian_tokens} SET no_count = no_count + 1, probability = %d, last = %d WHERE class = '%s' AND token = '%s'", $probability, time(), $class, $token); + } + } + else { + $probability = ($yes ? 99 : 1); + spam_log(SPAM_DEBUG, 'spam_filter_bayesian_tokens_update', t('insert token(@token) class(@class) probability(@probability)', array('@token' => $token, '@class' => $class, '@probability' => $probability)), $type, $id); + db_query("INSERT INTO {spam_filter_bayesian_tokens} (class, token, yes_count, no_count, probability, last) VALUES('%s', '%s', %d, %d, %d, %d)", $class, $token, ($yes ? 1 : 0), ($yes ? 0 : 1), $probability, time()); + } + } +} + +/** + * Split content into an array of tokens. + */ +function spam_filter_bayesian_tokenize($content, $type, $fields, $extra = array(), $tag = NULL) { + static $tokens = array(); + + $id = spam_invoke_module($type, 'content_id', $content, $extra); + + if (is_object($content)) { + $content = (array)$content; + } + + if (!isset($tokens["$type-$id-$tag"])) { + $string = spam_get_text($content, $type, $fields, $extra); + + $URI = "(http://|https://|ftp://|mailto:)"; + // strip out unwanted html/url noise + $sanitized = preg_replace("'(www\.)|()|(href=)|(target=)|(src=)'i", '', $string); + $sanitized = preg_replace($URI, '', $sanitized); + + // Strip out values that should not be considered part of tokens, so + // things like '{viagra}' and 'vi.agra' are counted as hits towards + // 'viagra' + $sanitized = preg_replace("/[()\{\}\[\]#.,]/", '', $sanitized); + + // Force all tokens to lowercase, again to aggregate tokens. This both + // lowers the total token number of rows in the spam_tokens table and + // increases the strength of individual tokens by linking them to + // capitalized versions. + $sanitized = drupal_strtolower($sanitized); + + // divide sanitized string into tokens + $delimiters = " \t\n\r-_<>'\"`/|*%^&+=~:;?"; + $tok = strtok($sanitized, $delimiters); + while ($tok !== FALSE) { + // Only inspect the token if over minimum length. + if (drupal_strlen($tok) >= variable_get('spam_filter_bayesian_minimum_token_length', 3)) { + // If the token is longer than 255 characters, truncate it. + $toks[] = htmlspecialchars(drupal_substr("$tag$tok", 0, 254)); + } + $tok = strtok($delimiters); + } + + // allow external module ability to extract additional tokens + $hook = spam_invoke_api('tokenize', $string, $tag); + if ($hook['tokens']) { + $toks = array_merge($toks, $hook['tokens']); + } + $tokens["$type-$id-$tag"] = $toks; + } + + return $tokens["$type-$id-$tag"]; +} diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.module~ sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.module~ --- /home/files/coding/drupal/spam/filters/spam_filter_bayesian/spam_filter_bayesian.module~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_bayesian/spam_filter_bayesian.module~ 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,189 @@ +. All rights reserved. + * + * Provides a generic Bayesian filter for use with other modules. + * Defines hooks for use with the Spam API. + */ + +/** + * Spam API Hook + */ +function spam_bayesian_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + switch ($op) { + + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_bayesian', $type, $content, $fields, $extra)) return; + return spam_bayesian_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_bayesian'; + break; + + case 'filter_info': + return array( + 'name' => t('Bayesian filter'), + 'module' => t('spam_bayesian'), + 'description' => t('A bayesian spam filter.'), + 'help' => t('The bayesian filter can learn to tell the difference between valid content spam content.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + ); + + case 'mark_as_spam': + case 'mark_as_not_spam': + if (!module_invoke('spam', 'filter_enabled', 'spam_bayesian', $type, $content, $fields, $extra)) return; + spam_log(SPAM_DEBUG, 'spam_bayesian_spamapi', t('@op', array('@op' => $op)), $type, $extra['id']); + $fields = spam_invoke_module($type, 'filter_fields', $extra['content']); + $tokenizer = variable_get('spam_bayesian_tokenizer', 'spam_bayesian_tokenize'); + $tokens = $tokenizer($extra['content'], $type, $fields, $extra); + spam_bayesian_tokens_update('spam', $tokens, ($op == 'mark_as_spam' ? TRUE : FALSE), $type, $extra['id']); + break; + } +} + +/** + * Determine whether or not the content is spam. + */ +function spam_bayesian_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + $class = 'spam'; + $id = spam_invoke_module($type, 'content_id', $content, $extra); + $tokenizer = variable_get('spam_bayesian_tokenizer', 'spam_bayesian_tokenize'); + $tokens = $tokenizer($content, $type, $fields, $extra); + if (is_array($tokens)) { + foreach ($tokens as $token) { + $p = db_fetch_object(db_query("SELECT probability FROM {spam_bayesian_tokens} WHERE class = '%s' AND token = '%s'", $class, $token)); + if (!$p->probability) { + $p->probability = variable_get('spam_bayesian_default_probability', 40); + } + $t["$token,$p->probability"] = abs($p->probability - 50); + } + } + else { + // No tokens, return default score. + $action['total'] = variable_get('spam_bayesian_default_probability', 40); + return $action; + } + + /* Sort token array so those tokens with the largest "drift" come first. + * Drift is this distance from a median of 50%. + */ + asort($t); + + /* Take the n most "interesting" tokens from the top of the token array. + * The larger a token's drift, the more interesting it is. + */ + $keys = array_keys($t); + $max = variable_get('spam_bayesian_interesting_tokens', 15); + $total = 0; + for ($i = 0; $i < $max; $i++) { + if ($pair = array_pop($keys)) { + $p = explode(',', $pair); + $total = $total + $p[1]; + $action['spam_bayesian'][$i] = array( + 'token' => $p[0], + 'probability' => $p[1], + ); + spam_log(SPAM_DEBUG, 'spam_bayesian_spam_filter', t('interesting token [@count] (@token) probability(@probability)', array('@token' => $p[0], '@probability' => $p[1], '@count' => $i + 1)), $type, $id); + } + else { + // we've looked at all the tokens + break; + } + } + + $probability = round($total / $i, 1); + spam_log(SPAM_VERBOSE, 'spam_bayesian_spam_filter', t('total(@total) count(@count) probability(@probability)', array('@probability' => $probability, '@total' => $total, '@count' => $i)), $type, $id); + + $action['total'] = $probability; + return $action; +} + +/** + * Update token probabilities in database. + */ +function spam_bayesian_tokens_update($class, $tokens, $yes, $type = NULL, $id = 0) { + if (!is_array($tokens) || empty($tokens)) return; + foreach ($tokens as $token) { + $old = db_fetch_object(db_query("SELECT probability, yes_count, no_count FROM {spam_bayesian_tokens} WHERE class = '%s' AND token = '%s'", $class, $token)); + if ($old->probability) { + $total = $old->yes_count + $old->no_count + 1; + $probability = spam_sanitize_score(($old->yes_count + ($yes ? 1 : 0)) / $total * 100); + spam_log(SPAM_DEBUG, 'spam_bayesian_tokens_update', t('update token(@token) class(@class) yes(@yes) no(@no) prob(@prob): added @new', array('@token' => $token, '@class' => $class, '@yes' => $old->yes_count + ($yes ? 1 : 0), '@no' => $old->no_count + ($yes ? 0 : 1), '@prob' => $probability, '@new' => $yes ? 'yes' : 'no')), $type, $id); + if ($yes) { + db_query("UPDATE {spam_bayesian_tokens} SET yes_count = yes_count + 1, probability = %d, last = %d WHERE class = '%s' AND token = '%s'", $probability, time(), $class, $token); + } + else { + db_query("UPDATE {spam_bayesian_tokens} SET no_count = no_count + 1, probability = %d, last = %d WHERE class = '%s' AND token = '%s'", $probability, time(), $class, $token); + } + } + else { + $probability = ($yes ? 99 : 1); + spam_log(SPAM_DEBUG, 'spam_bayesian_tokens_update', t('insert token(@token) class(@class) probability(@probability)', array('@token' => $token, '@class' => $class, '@probability' => $probability)), $type, $id); + db_query("INSERT INTO {spam_bayesian_tokens} (class, token, yes_count, no_count, probability, last) VALUES('%s', '%s', %d, %d, %d, %d)", $class, $token, ($yes ? 1 : 0), ($yes ? 0 : 1), $probability, time()); + } + } +} + +/** + * Split content into an array of tokens. + */ +function spam_bayesian_tokenize($content, $type, $fields, $extra = array(), $tag = NULL) { + static $tokens = array(); + + $id = spam_invoke_module($type, 'content_id', $content, $extra); + + if (is_object($content)) { + $content = (array)$content; + } + + if (!isset($tokens["$type-$id-$tag"])) { + $string = spam_get_text($content, $type, $fields, $extra); + + $URI = "(http://|https://|ftp://|mailto:)"; + // strip out unwanted html/url noise + $sanitized = preg_replace("'(www\.)|()|(href=)|(target=)|(src=)'i", '', $string); + $sanitized = preg_replace($URI, '', $sanitized); + + // Strip out values that should not be considered part of tokens, so + // things like '{viagra}' and 'vi.agra' are counted as hits towards + // 'viagra' + $sanitized = preg_replace("/[()\{\}\[\]#.,]/", '', $sanitized); + + // Force all tokens to lowercase, again to aggregate tokens. This both + // lowers the total token number of rows in the spam_tokens table and + // increases the strength of individual tokens by linking them to + // capitalized versions. + $sanitized = drupal_strtolower($sanitized); + + // divide sanitized string into tokens + $delimiters = " \t\n\r-_<>'\"`/|*%^&+=~:;?"; + $tok = strtok($sanitized, $delimiters); + while ($tok !== FALSE) { + // Only inspect the token if over minimum length. + if (drupal_strlen($tok) >= variable_get('spam_bayesian_minimum_token_length', 3)) { + // If the token is longer than 255 characters, truncate it. + $toks[] = htmlspecialchars(drupal_substr("$tag$tok", 0, 254)); + } + $tok = strtok($delimiters); + } + + // allow external module ability to extract additional tokens + $hook = spam_invoke_api('tokenize', $string, $tag); + if ($hook['tokens']) { + $toks = array_merge($toks, $hook['tokens']); + } + $tokens["$type-$id-$tag"] = $toks; + } + + return $tokens["$type-$id-$tag"]; +} diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.info sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.info --- /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.info 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.info 2009-12-02 19:13:25.000000000 +1100 @@ -0,0 +1,12 @@ +; $Id: custom.info,v 1.1.2.2.2.1 2008/12/25 05:42:10 jeremy Exp $ +name = Spam Custom filter +description = Allows the creation of custom spam filter rules. +package = Spam +dependencies[] = spam +core = 6.x +; Information added by drupal.org packaging script on 2009-09-01 +version = "6.x-1.x-dev" +core = "6.x" +project = "spam" +datestamp = "1251764880" + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.install sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.install --- /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.install 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.install 2009-12-09 14:37:34.000000000 +1100 @@ -0,0 +1,60 @@ + array( + 'cid' => array('type' => 'serial', 'unsigned' => TRUE, 'not null' => TRUE, 'disp-width' => '11'), + 'filter' => array('type' => 'varchar', 'length' => '255', 'not null' => TRUE, 'default' => ''), + 'style' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), + 'status' => array('type' => 'int', 'not null' => FALSE, 'default' => 0, 'disp-width' => '2'), + 'scan' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), + 'action' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), + 'matches' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'), + 'last' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'), + 'weight' => array('type' => 'int', 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), + ), + 'primary key' => array('cid'), + 'indexes' => array( + 'filter' => array('filter'), + 'last' => array('last'), + 'matches' => array('matches'), + 'weight' => array('weight'), + ), + ); + + return $schema; +} + +function spam_filter_custom_install() { + // Create my tables. + drupal_install_schema('spam_filter_custom'); +} + +function spam_filter_custom_uninstall() { + // Remove tables. + drupal_uninstall_schema('spam_filter_custom'); + drupal_set_message('The spam_filter_custom table has been dropped.'); +} + +/** + * Fix variable namespacing. + */ +function spam_filter_custom_update_6101() { + $ret = array(); + $old_vars = array('custom_probably', 'custom_probablynot', 'custom_probablynot_value', 'custom_probably_value'); + foreach ( $old_vars as $var ) { + $test = variable_get($var, null); + if ( !empty($test) ) { + $ret[] = update_sql('UPDATE {variables} SET name = %s WHERE name = %s', 'spam_filter_'.$var, $var); + } + } + return $ret; + } + + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.install~ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.install~ --- /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.install~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.install~ 2009-12-09 14:37:34.000000000 +1100 @@ -0,0 +1,60 @@ + array( + 'cid' => array('type' => 'serial', 'unsigned' => TRUE, 'not null' => TRUE, 'disp-width' => '11'), + 'filter' => array('type' => 'varchar', 'length' => '255', 'not null' => TRUE, 'default' => ''), + 'style' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), + 'status' => array('type' => 'int', 'not null' => FALSE, 'default' => 0, 'disp-width' => '2'), + 'scan' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), + 'action' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), + 'matches' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'), + 'last' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'), + 'weight' => array('type' => 'int', 'not null' => FALSE, 'default' => 0, 'disp-width' => '3'), + ), + 'primary key' => array('cid'), + 'indexes' => array( + 'filter' => array('filter'), + 'last' => array('last'), + 'matches' => array('matches'), + 'weight' => array('weight'), + ), + ); + + return $schema; +} + +function spam_filter_custom_install() { + // Create my tables. + drupal_install_schema('spam_filter_custom'); +} + +function spam_filter_custom_uninstall() { + // Remove tables. + drupal_uninstall_schema('spam_filter_custom'); + drupal_set_message('The spam_filter_custom table has been dropped.'); +} + +/** + * Fix variable namespacing. + */ +function spam_filter_custom_update_6101() { + $ret = array(); + $old_vars = array('custom_probably', 'custom_probablynot', 'custom_probablynot_value', 'custom_probably_value'); + foreach ( $old_vars as $var ) { + $test = variable_get($var, null); + if ( !empty($test) ) { + $ret[] = update_sql('UPDATE {variables} SET name = %s WHERE name = %s', 'spam_filter_'.$var, $var); + } + } + return $ret; + } + + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.module sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.module --- /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.module 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.module 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,584 @@ +. + * + * Allows manual definition of words and regular expressions to detect spam + * content. + */ + +define(SPAM_FILTER_CUSTOM_STYLE_PLAIN, 0); +define(SPAM_FILTER_CUSTOM_STYLE_REGEX, 1); + +define(SPAM_FILTER_CUSTOM_STATUS_NOTSPAM, -2); +define(SPAM_FILTER_CUSTOM_STATUS_PROBABLYNOT, -1); +define(SPAM_FILTER_CUSTOM_STATUS_DISABLED, 0); +define(SPAM_FILTER_CUSTOM_STATUS_PROBABLY, 1); +define(SPAM_FILTER_CUSTOM_STATUS_SPAM, 2); + +define(SPAM_FILTER_CUSTOM_SCAN_CONTENT, 0x1); +define(SPAM_FILTER_CUSTOM_SCAN_REFERRER, 0x4); +define(SPAM_FILTER_CUSTOM_SCAN_USERAGENT, 0x8); + +// TODO: support actions +//define(SPAM_FILTER_CUSTOM_ACTION_DELETE, 0x1); +//define(SPAM_FILTER_CUSTOM_ACTION_MAIL, 0x2); + +/** + * Spam API Hook + */ +function spam_filter_custom_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + switch ($op) { + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_custom', $type, $content, $fields, $extra)) return; + return spam_filter_custom_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_filter_custom'; + + case 'filter_info': + return array( + 'name' => t('Custom filter'), + 'module' => t('spam_filter_custom'), + 'description' => t('Custom spam filters.'), + 'help' => t('The custom spam filter module allows you to manually define custom spam filter rules.'), + ); + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + 'gain' => 250, + 'weight' => -4, + ); + } +} + +/** + * Drupal _menu() hook. + */ +function spam_filter_custom_menu() { + $items = array(); + + $items['admin/settings/spam/filters/custom'] = array( + 'title' => 'Custom', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('spam_filter_custom_admin_settings'), + 'access arguments' => array('administer spam'), + 'description' => 'Configure the custom spam filter module.', + 'type' => MENU_LOCAL_TASK, + ); + + $items['admin/settings/spam/filters/custom/list'] = array( + 'title' => 'List', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('spam_filter_custom_admin_settings'), + 'access arguments' => array('administer spam'), + 'description' => 'Configure the custom spam filter module.', + 'type' => MENU_DEFAULT_LOCAL_TASK, + ); + $items['admin/settings/spam/filters/custom/create'] = array( + 'title' => 'Create', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('spam_filter_custom_admin_filter'), + 'access arguments' => array('administer spam'), + 'description' => 'Create a custom spam filter.', + 'type' => MENU_LOCAL_TASK, + ); + $items["admin/settings/spam/filters/custom/%/edit"] = array( + 'title' => 'Create', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('spam_filter_custom_admin_filter', 5), + 'access arguments' => array('administer spam'), + 'description' => 'Edit a custom spam filter.', + 'type' => MENU_LOCAL_TASK, + ); + + return $items; +} + +/** + * Adminsitrative interface for configuring custom spam filter rules. + */ +function spam_filter_custom_admin_settings() { + $form = array(); + + $form['options'] = array( + '#type' => 'fieldset', + '#title' => t('Options'), + '#prefix' => 'plain text. If you would like to define a regular expression, your filter must be formatted as a Perl-compatible regular expression.'),
+ '#options' => array(SPAM_FILTER_CUSTOM_STYLE_PLAIN => t('Plain text'), SPAM_FILTER_CUSTOM_STYLE_REGEX => t('Regular expression')),
+ '#default_value' => $spam_filter_custom->cid ? $spam_filter_custom->style : SPAM_FILTER_CUSTOM_STYLE_PLAIN,
+ '#required' => TRUE,
+ );
+ $options = array(SPAM_FILTER_CUSTOM_SCAN_CONTENT => ('Content'), SPAM_FILTER_CUSTOM_SCAN_REFERRER => t('Referrer'), SPAM_FILTER_CUSTOM_SCAN_USERAGENT => t('User agent'));
+ $scan = array();
+ if ($spam_filter_custom->scan & SPAM_FILTER_CUSTOM_SCAN_CONTENT) {
+ $scan[] = SPAM_FILTER_CUSTOM_SCAN_CONTENT;
+ }
+ if ($spam_filter_custom->scan & SPAM_FILTER_CUSTOM_SCAN_REFERRER) {
+ $scan[] = SPAM_FILTER_CUSTOM_SCAN_REFERRER;
+ }
+ if ($spam_filter_custom->scan & SPAM_FILTER_CUSTOM_SCAN_USERAGENT) {
+ $scan[] = SPAM_FILTER_CUSTOM_SCAN_USERAGENT;
+ }
+ $form['scan'] = array(
+ '#type' => 'checkboxes',
+ '#title' => t('Scan'),
+ '#description' => t('Specify where you\'d like to apply your custom filter.'),
+ '#options' => $options,
+ '#required' => TRUE,
+ '#default_value' => !empty($scan) ? $scan : array(SPAM_FILTER_CUSTOM_SCAN_CONTENT),
+ );
+ $options = array();
+ $form['status'] = array(
+ '#type' => 'radios',
+ '#title' => t('Status'),
+ '#description' => t('Select the status to apply when your custom filter matches site content. Filters are tested in the order they are displayed above, thus if content matches a filter that says to mark it as spam, and another to mark it as not spam, the first to match will be the actual status applied.'),
+ '#options' => array(
+ SPAM_FILTER_CUSTOM_STATUS_DISABLED => t('Disabled'),
+ SPAM_FILTER_CUSTOM_STATUS_SPAM => t('Mark as spam'),
+ SPAM_FILTER_CUSTOM_STATUS_PROBABLY => t('Mark as probably spam'),
+ SPAM_FILTER_CUSTOM_STATUS_PROBABLYNOT => t('Mark as probably not spam'),
+ SPAM_FILTER_CUSTOM_STATUS_NOTSPAM => t('Mark as not spam')),
+ '#default_value' => $spam_filter_custom->cid ? $spam_filter_custom->status : SPAM_FILTER_CUSTOM_STATUS_SPAM,
+ '#required' => TRUE,
+ );
+ $form['weight'] = array(
+ '#type' => 'weight',
+ '#title' => t('Weight'),
+ '#description' => t('Give your custom filter a weight. "Lighter" filters with smaller weights will run before "heavier" filters with larger weights.'),
+ '#default_value' => $spam_filter_custom->weight,
+ );
+
+ $form['submit'] = array(
+ '#type' => 'submit',
+ '#value' => $spam_filter_custom->cid ? t('Update filter') : t('Create filter'),
+ );
+
+ if ($spam_filter_custom->cid) {
+ $form['cid'] = array(
+ '#type' => 'hidden',
+ '#value' => $spam_filter_custom->cid,
+ );
+ }
+
+ return $form;
+}
+
+/**
+ * Be sure that the custom filter is valid.
+ */
+function spam_filter_custom_admin_filter_validate($form, &$form_state) {
+ if ($form_state['values']['style'] == SPAM_FILTER_CUSTOM_STYLE_REGEX) {
+ if (preg_match($form_state['values']['filter'], 'test') === FALSE) {
+ form_set_error('filter', t('Failed to validate your filter\'s regular expression. It must be properly formatted as a Perl-compatible regular expression. Review the above error for details on the specific problem with your expression.'));
+ }
+ }
+ if (isset($form_state['values']['cid'])) {
+ // update
+ $cid = db_result(db_query("SELECT cid FROM {spam_filter_custom} WHERE filter = '%s' AND cid <> %d", $form_state['values']['filter'], $form_state['values']['cid']));
+ if ($cid) {
+ form_set_error($cid, t('Custom filter %filter already exists', array('%filter' => $form_state['values']['filter'])));
+ }
+ }
+ else {
+ // create
+ $cid = db_result(db_query("SELECT cid FROM {spam_filter_custom} WHERE filter = '%s'", $form_state['values']['filter']));
+ if ($cid) {
+ form_set_error($cid, t('Custom filter %filter already exists', array('%filter' => $form_state['values']['filter'])));
+ }
+ }
+}
+
+/**
+ * Create/update custom filer.
+ */
+function spam_filter_custom_admin_filter_submit($form, &$form_state) {
+ $scan = 0;
+ if (is_array($form_state['values']['scan'])) {
+ foreach ($form_state['values']['scan'] as $s) {
+ $scan += $s;
+ }
+ }
+ if (isset($form_state['values']['cid'])) {
+ db_query("UPDATE {spam_filter_custom} SET filter = '%s', style = %d, status = %d, scan = %d, weight = %d WHERE cid = %d", $form_state['values']['filter'], $form_state['values']['style'], $form_state['values']['status'], $scan, $form_state['values']['weight'], $form_state['values']['cid']);
+ drupal_set_message(t('Custom filter %filter updated.', array('%filter' => $form_state['values']['filter'])));
+ }
+ else {
+ db_query("INSERT INTO {spam_filter_custom} (filter, style, status, scan, weight) VALUES ('%s', %d, %d, %d, %d)", $form_state['values']['filter'], $form_state['values']['style'], $form_state['values']['status'], $scan, $form_state['values']['weight']);
+ drupal_set_message(t('Custom filter %filter created.', array('%filter' => $form_state['values']['filter'])));
+ }
+ drupal_goto('admin/settings/spam/filters/custom');
+}
+
+/**
+ * Perform bulk operations on the filters.
+ */
+function spam_filter_custom_admin_settings_submit($form, &$form_state) {
+ if (is_array($form_state['values']['spam_filter_custom'])) {
+ foreach ($form_state['values']['spam_filter_custom'] as $cid => $selected) {
+ if ($selected) {
+ $process[] = $cid;
+ }
+ }
+ }
+ if (!empty($process)) {
+ foreach (module_invoke_all('spam_filter_custom_operations') as $operation => $op) {
+ $options[$operation] = $op;
+ }
+ $operation = $form_state['values']['operation'];
+ if (isset($options[$operation])) {
+ $function = $options[$operation]['callback'];
+ $arguments = $options[$operation]['callback arguments'];
+ //TODO: Why is order different than spam.module, and why 1 at a time?
+ foreach ($process as $cid) {
+ call_user_func_array($function, array_merge($arguments, array($cid)));
+ }
+ }
+ }
+}
+
+/**
+ * Perform custom operations.
+ * TODO: Confirmation would be nice.
+ */
+function spam_filter_custom_spam_filter_operations($op, $cid) {
+ $filter = db_fetch_object(db_query('SELECT cid, status, filter FROM {spam_filter_custom} WHERE cid = %d', $cid));
+ switch ($op) {
+ case 'delete':
+ if ($filter->cid) {
+ db_query('DELETE FROM {spam_filter_custom} WHERE cid = %d', $cid);
+ drupal_set_message(t('Deleted custom filter %filter.', array('%filter' => $filter->filter)));
+ }
+ break;
+ case 'disable':
+ if ($filter->cid && $filter->status != SPAM_FILTER_CUSTOM_STATUS_DISABLED) {
+ db_query('UPDATE {spam_filter_custom} SET status = %d WHERE cid = %d', SPAM_FILTER_CUSTOM_STATUS_DISABLED, $cid);
+ drupal_set_message(t('Disabled custom filter %filter.', array('%filter' => $filter->filter)));
+ }
+ break;
+ }
+}
+
+/**
+ * Apply enabled custom filter rules against content.
+ */
+function spam_filter_custom_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) {
+ $probably = $probably_not = 0;
+ $id = spam_invoke_module($type, 'content_id', $content, $extra);
+ $result = db_query('SELECT cid, filter, style, status, scan, action FROM {spam_filter_custom} WHERE status <> %d ORDER BY weight ASC', SPAM_FILTER_CUSTOM_STATUS_DISABLED);
+ while ($spam_filter_custom = db_fetch_object($result)) {
+ $scan = '';
+ if ($spam_filter_custom->scan & SPAM_FILTER_CUSTOM_SCAN_CONTENT) {
+ // scan content
+ if (is_object($content)) {
+ $content = (array)$content;
+ }
+ $scan .= spam_get_text($content, $type, $fields, $extra);
+ spam_log(SPAM_DEBUG, 'spam_filter_custom_spam_filter', t('scanning content with %filter.', array('%filter' => $spam_filter_custom->filter)), $type, $id);
+ }
+ if ($spam_filter_custom->scan & SPAM_FILTER_CUSTOM_SCAN_REFERRER) {
+ // scan referrer
+ // TODO: Determine if this is a live scan. If not, don't scan referrer.
+ $scan .= $_SERVER['HTTP_REFERER'];
+ spam_log(SPAM_DEBUG, 'spam_filter_custom_spam_filter', t('scanning referrer with %filter.', array('%filter' => $spam_filter_custom->filter)), $type, $id);
+ }
+ if ($spam_filter_custom->scan & SPAM_FILTER_CUSTOM_SCAN_USERAGENT) {
+ // scan user agent
+ // TODO: Determine if this is a live scan. If not, don't scan user agent.
+ $scan .= $_SERVER['HTTP_USER_AGENT'];
+ spam_log(SPAM_DEBUG, 'spam_filter_custom_spam_filter', t('scanning user agent with %filter.', array('%filter' => $spam_filter_custom->filter)), $type, $id);
+ }
+ switch ($spam_filter_custom->style) {
+ case SPAM_FILTER_CUSTOM_STYLE_PLAIN:
+ $match = preg_match_all("/$spam_filter_custom->filter/", $scan, $matches);
+ break;
+ case SPAM_FILTER_CUSTOM_STYLE_REGEX:
+ $match = preg_match_all($spam_filter_custom->filter, $scan, $matches);
+ break;
+ }
+ if ($match) {
+ // Record that we've had one or more matches.
+ db_query('UPDATE {spam_filter_custom} SET matches = matches + %d, last = %d WHERE cid = %d', $match, time(), $spam_filter_custom->cid);
+ spam_log(SPAM_VERBOSE, 'spam_filter_custom_spam_filter', t('matched with %filter.', array('%filter' => $spam_filter_custom->filter)), $type, $id);
+
+ $action['spam_filter_custom'][] = array(
+ 'filter' => $spam_filter_custom->filter,
+ 'status' => $spam_filter_custom->status,
+ 'style' => $spam_filter_custom->style,
+ 'scan' => $spam_filter_custom->scan,
+ 'extra' => $spam_filter_custom->extra,
+ );
+
+ switch ($spam_filter_custom->status) {
+
+ case SPAM_FILTER_CUSTOM_STATUS_SPAM:
+ spam_log(SPAM_VERBOSE, 'spam_filter_custom_spam_filter', t('content is spam.'), $type, $id);
+ // no need to scan any more, we've found spam
+ $action['total'] = 99;
+ return $action;
+
+ case SPAM_FILTER_CUSTOM_STATUS_NOTSPAM:
+ spam_log(SPAM_VERBOSE, 'spam_filter_custom_spam_filter', t('content is not spam.'), $type, $id);
+ // no need to scan any more, we've found non-spam
+ $action['total'] = 1;
+ return $action;
+
+ case SPAM_FILTER_CUSTOM_STATUS_PROBABLYNOT:
+ spam_log(SPAM_DEBUG, 'spam_filter_custom_spam_filter', t('content is probably not spam.'), $type, $id);
+ // maintain internal counter that this is probably not spam
+ $probably_not += $match;
+ break;
+
+ case SPAM_FILTER_CUSTOM_STATUS_PROBABLY:
+ spam_log(SPAM_DEBUG, 'spam_filter_custom_spam_filter', t('content is probably spam.'), $type, $id);
+ // maintain internal counter that this is probably spam
+ $probably += $match;
+ break;
+ }
+ }
+ }
+
+ if ($probably && $probably_not) {
+ if ($probably >= $probably_not) {
+ $probably -= $probably_not;
+ $probably_not = 0;
+ }
+ else {
+ $probably_not -= $probably;
+ $probably = 0;
+ }
+ }
+ if ($probably) {
+ spam_log(SPAM_VERBOSE, 'spam_filter_custom_spam_filter', t('matched adjusted total of !number probably spam rule(s).', array('!number' => $probably)), $type, $id);
+ if ($probably >= variable_get('spam_filter_custom_probably', 3)) {
+ $action['total'] = 99;
+ }
+ else {
+ $action['total'] = variable_get('spam_filter_custom_probably_value', variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD));
+ }
+ }
+ else if ($probably_not) {
+ spam_log(SPAM_VERBOSE, 'spam_filter_custom_spam_filter', t('matched adjusted total of !number probably-not spam rule(s).', array('!number' => $probably_not)), $type, $id);
+ if ($probably_not >= variable_get('spam_filter_custom_probablynot', 3)) {
+ $action['total'] = 1;
+ }
+ else {
+ $action['total'] = variable_get('spam_filter_custom_probablynot_value', 40);
+ }
+ }
+ else {
+ // No matched filters, so don't change the overall spam score.
+ $action['total'] = 0;
+ }
+ return $action;
+}
diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.module~ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.module~
--- /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom.module~ 1970-01-01 10:00:00.000000000 +1000
+++ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom.module~ 2009-12-09 14:32:27.000000000 +1100
@@ -0,0 +1,584 @@
+.
+ *
+ * Allows manual definition of words and regular expressions to detect spam
+ * content.
+ */
+
+define(SPAM_CUSTOM_STYLE_PLAIN, 0);
+define(SPAM_CUSTOM_STYLE_REGEX, 1);
+
+define(SPAM_CUSTOM_STATUS_NOTSPAM, -2);
+define(SPAM_CUSTOM_STATUS_PROBABLYNOT, -1);
+define(SPAM_CUSTOM_STATUS_DISABLED, 0);
+define(SPAM_CUSTOM_STATUS_PROBABLY, 1);
+define(SPAM_CUSTOM_STATUS_SPAM, 2);
+
+define(SPAM_CUSTOM_SCAN_CONTENT, 0x1);
+define(SPAM_CUSTOM_SCAN_REFERRER, 0x4);
+define(SPAM_CUSTOM_SCAN_USERAGENT, 0x8);
+
+// TODO: support actions
+//define(SPAM_CUSTOM_ACTION_DELETE, 0x1);
+//define(SPAM_CUSTOM_ACTION_MAIL, 0x2);
+
+/**
+ * Spam API Hook
+ */
+function spam_custom_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) {
+ switch ($op) {
+ case 'filter':
+ if (!module_invoke('spam', 'filter_enabled', 'spam_custom', $type, $content, $fields, $extra)) return;
+ return spam_custom_spam_filter($content, $type, $fields, $extra);
+
+ case 'filter_module':
+ return 'spam_custom';
+
+ case 'filter_info':
+ return array(
+ 'name' => t('Custom filter'),
+ 'module' => t('spam_custom'),
+ 'description' => t('Custom spam filters.'),
+ 'help' => t('The custom spam filter module allows you to manually define custom spam filter rules.'),
+ );
+
+ case 'filter_install':
+ return array(
+ 'status' => SPAM_FILTER_ENABLED,
+ 'gain' => 250,
+ 'weight' => -4,
+ );
+ }
+}
+
+/**
+ * Drupal _menu() hook.
+ */
+function spam_custom_menu() {
+ $items = array();
+
+ $items['admin/settings/spam/filters/custom'] = array(
+ 'title' => 'Custom',
+ 'page callback' => 'drupal_get_form',
+ 'page arguments' => array('spam_custom_admin_settings'),
+ 'access arguments' => array('administer spam'),
+ 'description' => 'Configure the custom spam filter module.',
+ 'type' => MENU_LOCAL_TASK,
+ );
+
+ $items['admin/settings/spam/filters/custom/list'] = array(
+ 'title' => 'List',
+ 'page callback' => 'drupal_get_form',
+ 'page arguments' => array('spam_custom_admin_settings'),
+ 'access arguments' => array('administer spam'),
+ 'description' => 'Configure the custom spam filter module.',
+ 'type' => MENU_DEFAULT_LOCAL_TASK,
+ );
+ $items['admin/settings/spam/filters/custom/create'] = array(
+ 'title' => 'Create',
+ 'page callback' => 'drupal_get_form',
+ 'page arguments' => array('spam_custom_admin_filter'),
+ 'access arguments' => array('administer spam'),
+ 'description' => 'Create a custom spam filter.',
+ 'type' => MENU_LOCAL_TASK,
+ );
+ $items["admin/settings/spam/filters/custom/%/edit"] = array(
+ 'title' => 'Create',
+ 'page callback' => 'drupal_get_form',
+ 'page arguments' => array('spam_custom_admin_filter', 5),
+ 'access arguments' => array('administer spam'),
+ 'description' => 'Edit a custom spam filter.',
+ 'type' => MENU_LOCAL_TASK,
+ );
+
+ return $items;
+}
+
+/**
+ * Adminsitrative interface for configuring custom spam filter rules.
+ */
+function spam_custom_admin_settings() {
+ $form = array();
+
+ $form['options'] = array(
+ '#type' => 'fieldset',
+ '#title' => t('Options'),
+ '#prefix' => 'plain text. If you would like to define a regular expression, your filter must be formatted as a Perl-compatible regular expression.'),
+ '#options' => array(SPAM_CUSTOM_STYLE_PLAIN => t('Plain text'), SPAM_CUSTOM_STYLE_REGEX => t('Regular expression')),
+ '#default_value' => $spam_custom->cid ? $spam_custom->style : SPAM_CUSTOM_STYLE_PLAIN,
+ '#required' => TRUE,
+ );
+ $options = array(SPAM_CUSTOM_SCAN_CONTENT => ('Content'), SPAM_CUSTOM_SCAN_REFERRER => t('Referrer'), SPAM_CUSTOM_SCAN_USERAGENT => t('User agent'));
+ $scan = array();
+ if ($spam_custom->scan & SPAM_CUSTOM_SCAN_CONTENT) {
+ $scan[] = SPAM_CUSTOM_SCAN_CONTENT;
+ }
+ if ($spam_custom->scan & SPAM_CUSTOM_SCAN_REFERRER) {
+ $scan[] = SPAM_CUSTOM_SCAN_REFERRER;
+ }
+ if ($spam_custom->scan & SPAM_CUSTOM_SCAN_USERAGENT) {
+ $scan[] = SPAM_CUSTOM_SCAN_USERAGENT;
+ }
+ $form['scan'] = array(
+ '#type' => 'checkboxes',
+ '#title' => t('Scan'),
+ '#description' => t('Specify where you\'d like to apply your custom filter.'),
+ '#options' => $options,
+ '#required' => TRUE,
+ '#default_value' => !empty($scan) ? $scan : array(SPAM_CUSTOM_SCAN_CONTENT),
+ );
+ $options = array();
+ $form['status'] = array(
+ '#type' => 'radios',
+ '#title' => t('Status'),
+ '#description' => t('Select the status to apply when your custom filter matches site content. Filters are tested in the order they are displayed above, thus if content matches a filter that says to mark it as spam, and another to mark it as not spam, the first to match will be the actual status applied.'),
+ '#options' => array(
+ SPAM_CUSTOM_STATUS_DISABLED => t('Disabled'),
+ SPAM_CUSTOM_STATUS_SPAM => t('Mark as spam'),
+ SPAM_CUSTOM_STATUS_PROBABLY => t('Mark as probably spam'),
+ SPAM_CUSTOM_STATUS_PROBABLYNOT => t('Mark as probably not spam'),
+ SPAM_CUSTOM_STATUS_NOTSPAM => t('Mark as not spam')),
+ '#default_value' => $spam_custom->cid ? $spam_custom->status : SPAM_CUSTOM_STATUS_SPAM,
+ '#required' => TRUE,
+ );
+ $form['weight'] = array(
+ '#type' => 'weight',
+ '#title' => t('Weight'),
+ '#description' => t('Give your custom filter a weight. "Lighter" filters with smaller weights will run before "heavier" filters with larger weights.'),
+ '#default_value' => $spam_custom->weight,
+ );
+
+ $form['submit'] = array(
+ '#type' => 'submit',
+ '#value' => $spam_custom->cid ? t('Update filter') : t('Create filter'),
+ );
+
+ if ($spam_custom->cid) {
+ $form['cid'] = array(
+ '#type' => 'hidden',
+ '#value' => $spam_custom->cid,
+ );
+ }
+
+ return $form;
+}
+
+/**
+ * Be sure that the custom filter is valid.
+ */
+function spam_custom_admin_filter_validate($form, &$form_state) {
+ if ($form_state['values']['style'] == SPAM_CUSTOM_STYLE_REGEX) {
+ if (preg_match($form_state['values']['filter'], 'test') === FALSE) {
+ form_set_error('filter', t('Failed to validate your filter\'s regular expression. It must be properly formatted as a Perl-compatible regular expression. Review the above error for details on the specific problem with your expression.'));
+ }
+ }
+ if (isset($form_state['values']['cid'])) {
+ // update
+ $cid = db_result(db_query("SELECT cid FROM {spam_custom} WHERE filter = '%s' AND cid <> %d", $form_state['values']['filter'], $form_state['values']['cid']));
+ if ($cid) {
+ form_set_error($cid, t('Custom filter %filter already exists', array('%filter' => $form_state['values']['filter'])));
+ }
+ }
+ else {
+ // create
+ $cid = db_result(db_query("SELECT cid FROM {spam_custom} WHERE filter = '%s'", $form_state['values']['filter']));
+ if ($cid) {
+ form_set_error($cid, t('Custom filter %filter already exists', array('%filter' => $form_state['values']['filter'])));
+ }
+ }
+}
+
+/**
+ * Create/update custom filer.
+ */
+function spam_custom_admin_filter_submit($form, &$form_state) {
+ $scan = 0;
+ if (is_array($form_state['values']['scan'])) {
+ foreach ($form_state['values']['scan'] as $s) {
+ $scan += $s;
+ }
+ }
+ if (isset($form_state['values']['cid'])) {
+ db_query("UPDATE {spam_custom} SET filter = '%s', style = %d, status = %d, scan = %d, weight = %d WHERE cid = %d", $form_state['values']['filter'], $form_state['values']['style'], $form_state['values']['status'], $scan, $form_state['values']['weight'], $form_state['values']['cid']);
+ drupal_set_message(t('Custom filter %filter updated.', array('%filter' => $form_state['values']['filter'])));
+ }
+ else {
+ db_query("INSERT INTO {spam_custom} (filter, style, status, scan, weight) VALUES ('%s', %d, %d, %d, %d)", $form_state['values']['filter'], $form_state['values']['style'], $form_state['values']['status'], $scan, $form_state['values']['weight']);
+ drupal_set_message(t('Custom filter %filter created.', array('%filter' => $form_state['values']['filter'])));
+ }
+ drupal_goto('admin/settings/spam/filters/custom');
+}
+
+/**
+ * Perform bulk operations on the filters.
+ */
+function spam_custom_admin_settings_submit($form, &$form_state) {
+ if (is_array($form_state['values']['spam_custom'])) {
+ foreach ($form_state['values']['spam_custom'] as $cid => $selected) {
+ if ($selected) {
+ $process[] = $cid;
+ }
+ }
+ }
+ if (!empty($process)) {
+ foreach (module_invoke_all('spam_custom_operations') as $operation => $op) {
+ $options[$operation] = $op;
+ }
+ $operation = $form_state['values']['operation'];
+ if (isset($options[$operation])) {
+ $function = $options[$operation]['callback'];
+ $arguments = $options[$operation]['callback arguments'];
+ //TODO: Why is order different than spam.module, and why 1 at a time?
+ foreach ($process as $cid) {
+ call_user_func_array($function, array_merge($arguments, array($cid)));
+ }
+ }
+ }
+}
+
+/**
+ * Perform custom operations.
+ * TODO: Confirmation would be nice.
+ */
+function spam_custom_spam_filter_operations($op, $cid) {
+ $filter = db_fetch_object(db_query('SELECT cid, status, filter FROM {spam_custom} WHERE cid = %d', $cid));
+ switch ($op) {
+ case 'delete':
+ if ($filter->cid) {
+ db_query('DELETE FROM {spam_custom} WHERE cid = %d', $cid);
+ drupal_set_message(t('Deleted custom filter %filter.', array('%filter' => $filter->filter)));
+ }
+ break;
+ case 'disable':
+ if ($filter->cid && $filter->status != SPAM_CUSTOM_STATUS_DISABLED) {
+ db_query('UPDATE {spam_custom} SET status = %d WHERE cid = %d', SPAM_CUSTOM_STATUS_DISABLED, $cid);
+ drupal_set_message(t('Disabled custom filter %filter.', array('%filter' => $filter->filter)));
+ }
+ break;
+ }
+}
+
+/**
+ * Apply enabled custom filter rules against content.
+ */
+function spam_custom_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) {
+ $probably = $probably_not = 0;
+ $id = spam_invoke_module($type, 'content_id', $content, $extra);
+ $result = db_query('SELECT cid, filter, style, status, scan, action FROM {spam_custom} WHERE status <> %d ORDER BY weight ASC', SPAM_CUSTOM_STATUS_DISABLED);
+ while ($spam_custom = db_fetch_object($result)) {
+ $scan = '';
+ if ($spam_custom->scan & SPAM_CUSTOM_SCAN_CONTENT) {
+ // scan content
+ if (is_object($content)) {
+ $content = (array)$content;
+ }
+ $scan .= spam_get_text($content, $type, $fields, $extra);
+ spam_log(SPAM_DEBUG, 'spam_custom_spam_filter', t('scanning content with %filter.', array('%filter' => $spam_custom->filter)), $type, $id);
+ }
+ if ($spam_custom->scan & SPAM_CUSTOM_SCAN_REFERRER) {
+ // scan referrer
+ // TODO: Determine if this is a live scan. If not, don't scan referrer.
+ $scan .= $_SERVER['HTTP_REFERER'];
+ spam_log(SPAM_DEBUG, 'spam_custom_spam_filter', t('scanning referrer with %filter.', array('%filter' => $spam_custom->filter)), $type, $id);
+ }
+ if ($spam_custom->scan & SPAM_CUSTOM_SCAN_USERAGENT) {
+ // scan user agent
+ // TODO: Determine if this is a live scan. If not, don't scan user agent.
+ $scan .= $_SERVER['HTTP_USER_AGENT'];
+ spam_log(SPAM_DEBUG, 'spam_custom_spam_filter', t('scanning user agent with %filter.', array('%filter' => $spam_custom->filter)), $type, $id);
+ }
+ switch ($spam_custom->style) {
+ case SPAM_CUSTOM_STYLE_PLAIN:
+ $match = preg_match_all("/$spam_custom->filter/", $scan, $matches);
+ break;
+ case SPAM_CUSTOM_STYLE_REGEX:
+ $match = preg_match_all($spam_custom->filter, $scan, $matches);
+ break;
+ }
+ if ($match) {
+ // Record that we've had one or more matches.
+ db_query('UPDATE {spam_custom} SET matches = matches + %d, last = %d WHERE cid = %d', $match, time(), $spam_custom->cid);
+ spam_log(SPAM_VERBOSE, 'spam_custom_spam_filter', t('matched with %filter.', array('%filter' => $spam_custom->filter)), $type, $id);
+
+ $action['spam_custom'][] = array(
+ 'filter' => $spam_custom->filter,
+ 'status' => $spam_custom->status,
+ 'style' => $spam_custom->style,
+ 'scan' => $spam_custom->scan,
+ 'extra' => $spam_custom->extra,
+ );
+
+ switch ($spam_custom->status) {
+
+ case SPAM_CUSTOM_STATUS_SPAM:
+ spam_log(SPAM_VERBOSE, 'spam_custom_spam_filter', t('content is spam.'), $type, $id);
+ // no need to scan any more, we've found spam
+ $action['total'] = 99;
+ return $action;
+
+ case SPAM_CUSTOM_STATUS_NOTSPAM:
+ spam_log(SPAM_VERBOSE, 'spam_custom_spam_filter', t('content is not spam.'), $type, $id);
+ // no need to scan any more, we've found non-spam
+ $action['total'] = 1;
+ return $action;
+
+ case SPAM_CUSTOM_STATUS_PROBABLYNOT:
+ spam_log(SPAM_DEBUG, 'spam_custom_spam_filter', t('content is probably not spam.'), $type, $id);
+ // maintain internal counter that this is probably not spam
+ $probably_not += $match;
+ break;
+
+ case SPAM_CUSTOM_STATUS_PROBABLY:
+ spam_log(SPAM_DEBUG, 'spam_custom_spam_filter', t('content is probably spam.'), $type, $id);
+ // maintain internal counter that this is probably spam
+ $probably += $match;
+ break;
+ }
+ }
+ }
+
+ if ($probably && $probably_not) {
+ if ($probably >= $probably_not) {
+ $probably -= $probably_not;
+ $probably_not = 0;
+ }
+ else {
+ $probably_not -= $probably;
+ $probably = 0;
+ }
+ }
+ if ($probably) {
+ spam_log(SPAM_VERBOSE, 'spam_custom_spam_filter', t('matched adjusted total of !number probably spam rule(s).', array('!number' => $probably)), $type, $id);
+ if ($probably >= variable_get('spam_custom_probably', 3)) {
+ $action['total'] = 99;
+ }
+ else {
+ $action['total'] = variable_get('spam_custom_probably_value', variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD));
+ }
+ }
+ else if ($probably_not) {
+ spam_log(SPAM_VERBOSE, 'spam_custom_spam_filter', t('matched adjusted total of !number probably-not spam rule(s).', array('!number' => $probably_not)), $type, $id);
+ if ($probably_not >= variable_get('spam_custom_probablynot', 3)) {
+ $action['total'] = 1;
+ }
+ else {
+ $action['total'] = variable_get('spam_custom_probablynot_value', 40);
+ }
+ }
+ else {
+ // No matched filters, so don't change the overall spam score.
+ $action['total'] = 0;
+ }
+ return $action;
+}
diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom-upgrade.inc sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom-upgrade.inc
--- /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom-upgrade.inc 1970-01-01 10:00:00.000000000 +1000
+++ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom-upgrade.inc 2009-12-09 14:39:37.000000000 +1100
@@ -0,0 +1,72 @@
+style) {
+ case SPAM_FILTER_CUSTOM_PLAIN:
+ case SPAM_FILTER_CUSTOM_URL:
+ $style = SPAM_FILTER_CUSTOM_STYLE_PLAIN;
+ break;
+ case SPAM_FILTER_CUSTOM_REGEX:
+ $style = SPAM_FILTER_CUSTOM_STYLE_REGEX;
+ break;
+ }
+ switch ($old->effect) {
+ case SPAM_FILTER_CUSTOM_DISABLED:
+ default:
+ $status = SPAM_FILTER_CUSTOM_STATUS_DISABLED;
+ break;
+ case SPAM_FILTER_CUSTOM_MAYBE_SPAM:
+ case SPAM_FILTER_CUSTOM_USUALLY_SPAM:
+ $status = SPAM_FILTER_CUSTOM_STATUS_PROBABLY;
+ break;
+ case SPAM_FILTER_CUSTOM_USUALLY_NOTSPAM:
+ case SPAM_FILTER_CUSTOM_MAYBE_NOTSPAM:
+ $status = SPAM_FILTER_CUSTOM_STATUS_PROBABLYNOT;
+ break;
+ case SPAM_FILTER_CUSTOM_NEVER_SPAM:
+ $status = SPAM_FILTER_CUSTOM_STATUS_NOTSPAM;
+ break;
+ case SPAM_FILTER_CUSTOM_ALWAYS_SPAM:
+ $status = SPAM_FILTER_CUSTOM_STATUS_SPAM;
+ break;
+ }
+ if ($old->action & SPAM_FILTER_CUSTOM_ACTION_HEADER || $old->action & SPAM_FILTER_CUSTOM_ACTION_BODY) {
+ $scan = SPAM_FILTER_CUSTOM_SCAN_CONTENT;
+ }
+ else {
+ $scan = SPAM_FILTER_CUSTOM_SCAN_CONTENT;
+ $status = SPAM_FILTER_CUSTOM_STATUS_DISABLED;
+ }
+ db_query("INSERT INTO {spam_filter_custom} (filter, style, status, scan, matches, last) VALUES('%s', %d, %d, %d, %d, %d)", $old->filter, $style, $status, $scan, $old->matches, $old->last);
+ }
+ // Done with upgrade, drop old table.
+ $ret = array();
+ db_drop_table($ret, 'old_spam_custom');
+ }
+}
+
diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom-upgrade.inc~ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom-upgrade.inc~
--- /home/files/coding/drupal/spam/filters/spam_filter_custom/spam_filter_custom-upgrade.inc~ 1970-01-01 10:00:00.000000000 +1000
+++ sites/all/modules/spam/filters/spam_filter_custom/spam_filter_custom-upgrade.inc~ 2009-12-09 14:39:37.000000000 +1100
@@ -0,0 +1,72 @@
+style) {
+ case SPAM_CUSTOM_PLAIN:
+ case SPAM_CUSTOM_URL:
+ $style = SPAM_CUSTOM_STYLE_PLAIN;
+ break;
+ case SPAM_CUSTOM_REGEX:
+ $style = SPAM_CUSTOM_STYLE_REGEX;
+ break;
+ }
+ switch ($old->effect) {
+ case SPAM_CUSTOM_DISABLED:
+ default:
+ $status = SPAM_CUSTOM_STATUS_DISABLED;
+ break;
+ case SPAM_CUSTOM_MAYBE_SPAM:
+ case SPAM_CUSTOM_USUALLY_SPAM:
+ $status = SPAM_CUSTOM_STATUS_PROBABLY;
+ break;
+ case SPAM_CUSTOM_USUALLY_NOTSPAM:
+ case SPAM_CUSTOM_MAYBE_NOTSPAM:
+ $status = SPAM_CUSTOM_STATUS_PROBABLYNOT;
+ break;
+ case SPAM_CUSTOM_NEVER_SPAM:
+ $status = SPAM_CUSTOM_STATUS_NOTSPAM;
+ break;
+ case SPAM_CUSTOM_ALWAYS_SPAM:
+ $status = SPAM_CUSTOM_STATUS_SPAM;
+ break;
+ }
+ if ($old->action & SPAM_CUSTOM_ACTION_HEADER || $old->action & SPAM_CUSTOM_ACTION_BODY) {
+ $scan = SPAM_CUSTOM_SCAN_CONTENT;
+ }
+ else {
+ $scan = SPAM_CUSTOM_SCAN_CONTENT;
+ $status = SPAM_CUSTOM_STATUS_DISABLED;
+ }
+ db_query("INSERT INTO {spam_custom} (filter, style, status, scan, matches, last) VALUES('%s', %d, %d, %d, %d, %d)", $old->filter, $style, $status, $scan, $old->matches, $old->last);
+ }
+ // Done with upgrade, drop old table.
+ $ret = array();
+ db_drop_table($ret, 'old_spam_custom');
+ }
+}
+
diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.info sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.info
--- /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.info 1970-01-01 10:00:00.000000000 +1000
+++ sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.info 2009-12-02 19:13:25.000000000 +1100
@@ -0,0 +1,12 @@
+; $Id: duplicate.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $
+name = Spam Duplicate filter
+description = A duplication detecting spam filter.
+package = Spam
+dependencies[] = spam
+core = 6.x
+; Information added by drupal.org packaging script on 2009-09-01
+version = "6.x-1.x-dev"
+core = "6.x"
+project = "spam"
+datestamp = "1251764880"
+
diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.install sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.install
--- /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.install 1970-01-01 10:00:00.000000000 +1000
+++ sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.install 2009-12-09 14:32:27.000000000 +1100
@@ -0,0 +1,60 @@
+ t('The base table for the Duplicate submodule'),
+ 'fields' => array(
+ 'iid' => array('type' => 'serial', 'unsigned' => TRUE, 'not null' => TRUE, 'disp-width' => '11'),
+ 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
+ 'content_hash' => array('type' => 'char', 'length' => '32', 'not null' => TRUE, 'default' => ''),
+ 'hostname' => array('type' => 'varchar', 'length' => '15', 'not null' => TRUE, 'default' => ''),
+ 'duplicate_hash' => array('type' => 'int', 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
+ 'duplicate_ip' => array('type' => 'int', 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
+ 'spam' => array('type' => 'int', 'size' => 'tiny', 'not null' => TRUE, 'default' => 0, 'disp-width' => '4'),
+ 'expired' => array('type' => 'int', 'size' => 'tiny', 'not null' => TRUE, 'default' => 0, 'disp-width' => '4'),
+ 'timestamp' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'),
+ ),
+ 'primary key' => array('iid'),
+ 'indexes' => array(
+ 'content_hash' => array('content_hash'),
+ 'hostname' => array('hostname'),
+ 'sid' => array('sid'),
+ 'spam' => array('spam'),
+ 'timestamp' => array('timestamp'),
+ ),
+ );
+
+ return $schema;
+}
+
+function spam_filter_duplicate_install() {
+ // Create my tables.
+ drupal_install_schema('spam_filter_duplicate');
+}
+
+function spam_filter_duplicate_uninstall() {
+ // Remove tables.
+ drupal_uninstall_schema('spam_filter_duplicate');
+ drupal_set_message('The spam_filter_duplicate table has been dropped.');
+}
+
+/**
+ * Fix variable namespacing.
+ */
+function spam_filter_duplicate_update_6101() {
+ $ret = array();
+ $old_vars = array('duplicate_blacklist', 'duplicate_blacklist_action', 'duplicate_blacklist_message', 'duplicate_post_message', 'duplicate_threshold');
+ foreach ( $old_vars as $var ) {
+ $test = variable_get($var, null);
+ if ( !empty($test) ) {
+ $ret[] = update_sql('UPDATE {variables} SET name = %s WHERE name = %s', 'spam_filter_'.$var, $var);
+ }
+ }
+ return $ret;
+ }
diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.install~ sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.install~
--- /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.install~ 1970-01-01 10:00:00.000000000 +1000
+++ sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.install~ 2009-12-09 14:32:27.000000000 +1100
@@ -0,0 +1,60 @@
+ t('The base table for the Duplicate submodule'),
+ 'fields' => array(
+ 'iid' => array('type' => 'serial', 'unsigned' => TRUE, 'not null' => TRUE, 'disp-width' => '11'),
+ 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
+ 'content_hash' => array('type' => 'char', 'length' => '32', 'not null' => TRUE, 'default' => ''),
+ 'hostname' => array('type' => 'varchar', 'length' => '15', 'not null' => TRUE, 'default' => ''),
+ 'duplicate_hash' => array('type' => 'int', 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
+ 'duplicate_ip' => array('type' => 'int', 'not null' => TRUE, 'default' => 0, 'disp-width' => '11'),
+ 'spam' => array('type' => 'int', 'size' => 'tiny', 'not null' => TRUE, 'default' => 0, 'disp-width' => '4'),
+ 'expired' => array('type' => 'int', 'size' => 'tiny', 'not null' => TRUE, 'default' => 0, 'disp-width' => '4'),
+ 'timestamp' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => FALSE, 'default' => 0, 'disp-width' => '11'),
+ ),
+ 'primary key' => array('iid'),
+ 'indexes' => array(
+ 'content_hash' => array('content_hash'),
+ 'hostname' => array('hostname'),
+ 'sid' => array('sid'),
+ 'spam' => array('spam'),
+ 'timestamp' => array('timestamp'),
+ ),
+ );
+
+ return $schema;
+}
+
+function spam_filter_duplicate_install() {
+ // Create my tables.
+ drupal_install_schema('spam_filter_duplicate');
+}
+
+function spam_filter_duplicate_uninstall() {
+ // Remove tables.
+ drupal_uninstall_schema('spam_filter_duplicate');
+ drupal_set_message('The spam_filter_duplicate table has been dropped.');
+}
+
+/**
+ * Fix variable namespacing.
+ */
+function spam_filter_duplicate_update_6101() {
+ $ret = array();
+ $old_vars = array('duplicate_blacklist', 'duplicate_blacklist_action', 'duplicate_blacklist_message', 'duplicate_post_message', 'duplicate_threshold');
+ foreach ( $old_vars as $var ) {
+ $test = variable_get($var, null);
+ if ( !empty($test) ) {
+ $ret[] = update_sql('UPDATE {variables} SET name = %s WHERE name = %s', 'spam_'.$var, $var);
+ }
+ }
+ return $ret;
+ }
diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.module sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.module
--- /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.module 1970-01-01 10:00:00.000000000 +1000
+++ sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.module 2009-12-09 15:46:20.000000000 +1100
@@ -0,0 +1,400 @@
+. All rights reserved.
+ *
+ * Detects spam by looking for duplication of content, or posting IP.
+ */
+
+define('SPAM_FILTER_DUPLICATE_BLACKLIST_SILENT', 0);
+define('SPAM_FILTER_DUPLICATE_BLACKLIST_NOTIFY', 1);
+define('SPAM_FILTER_DUPLICATE_BLACKLIST_BLOCK', 2);
+
+define('SPAM_FILTER_DUPLICATE_DEFAULT_THRESHOLD', 2);
+define('SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST', 3);
+
+define('SPAM_FILTER_DUPLICATE_NOT_SPAM', 0);
+define('SPAM_FILTER_DUPLICATE_SPAM', 1);
+
+/**
+ * Drupal _menu() hook.
+ */
+function spam_filter_duplicate_menu() {
+ $items = array();
+
+ $items['admin/settings/spam/filters/duplicate'] = array(
+ 'title' => 'Duplicate',
+ 'page callback' => 'drupal_get_form',
+ 'page arguments' => array('spam_filter_duplicate_admin_settings'),
+ 'access arguments' => array('administer spam'),
+ 'description' => 'Configure the spam duplicate filter.',
+ 'type' => MENU_LOCAL_TASK,
+ );
+ $items['duplicate/denied/ip'] = array(
+ 'page callback' => 'spam_filter_duplicate_denied_ip',
+ 'type' => MENU_CALLBACK,
+ 'access callback' => TRUE,
+ );
+ $items['duplicate/denied/post'] = array(
+ 'page callback' => 'spam_filter_duplicate_denied_post',
+ 'type' => MENU_CALLBACK,
+ 'access callback' => TRUE,
+ );
+
+ $items['admin/reports/spam/blocked_ip'] = array(
+ 'title' => 'Blocked IPs',
+ 'access arguments' => array('administer spam'),
+ 'page callback' => 'spam_logs_blocked_ip',
+ 'type' => MENU_LOCAL_TASK,
+ );
+
+ return $items;
+}
+
+/**
+ * The arg() function may not be availble early in the bootstrap process,
+ * so we reimplement it here.
+ */
+function _spam_filter_duplicate_arg() {
+ static $arguments, $q;
+
+ if (empty($arguments) || $q != $_GET['q']) {
+ $arguments = explode('/', $_GET['q']);
+ $q = $_GET['q'];
+ }
+
+ if (isset($arguments[$index])) {
+ return $arguments[$index];
+ }
+}
+
+
+/**
+ * If IP blacklisting and IP blocking are both enabled, perform a database
+ * query on each page load to see if the current visitor has been blacklisted.
+ */
+function spam_filter_duplicate_init() {
+ // Allow notification to blacklisted IP, if enabled.
+ if (_spam_filter_duplicate_arg(0) == 'duplicate' && _spam_filter_duplicate_arg(1) == 'denied' && _spam_filter_duplicate_arg(2) == 'ip') return;
+
+ // Only perform database queries if functionality is enabled.
+ if ((variable_get('spam_filter_duplicate_blacklist_action', SPAM_FILTER_DUPLICATE_BLACKLIST_NOTIFY) == SPAM_FILTER_DUPLICATE_BLACKLIST_BLOCK) && (variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST) > -1)) {
+ // Blacklisting and IP blocking enabled.
+ $spam_filter_duplicate_ip = (int)db_query("SELECT COUNT(iid) FROM {spam_filter_duplicate} WHERE hostname = '%s' AND spam = %d", ip_address(), SPAM_FILTER_DUPLICATE_SPAM);
+ if ($spam_filter_duplicate_ip >= variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST)) {
+ if (user_access('bypass filters')) {
+ spam_log(SPAM_DEBUG, 'spam_filter_duplicate_init', t('Found !count spam for IP !ip, ignoring because user !user (uid !uid) is configured to bypass filters', array('!count' => $spam_filter_duplicate_ip, '!ip' => ip_address(), '!user' => $user->name, 'uid' => $user->uid)), $type, $id);
+ return;
+ }
+ drupal_goto("duplicate/denied/ip");
+ }
+ }
+}
+
+/**
+ * Spam API Hook
+ */
+function spam_filter_duplicate_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) {
+
+ switch ($op) {
+
+ case 'filter':
+ if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return;
+ return spam_filter_duplicate_spam_filter($content, $type, $fields, $extra);
+
+ case 'filter_module':
+ return 'spam_filter_duplicate';
+
+ case 'insert':
+ if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return;
+ if (is_array($extra) && $extra['sid'] && $extra['host'] &&
+ !empty($content) && !empty($fields)) {
+ $hash = _spam_filter_duplicate_content_hash($content, $fields);
+ db_query("INSERT INTO {spam_filter_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $extra['host'], time());
+ $action = _spam_filter_duplicate_action();
+ if (is_array($action) && !empty($action)) {
+ if (isset($action['redirect'])) {
+ drupal_goto($action['redirect']);
+ }
+ }
+ }
+ break;
+
+ case 'update':
+ if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return;
+ if (is_array($extra) && $extra['sid'] && $extra['host'] &&
+ !empty($content) && !empty($fields)) {
+ $hash = _spam_filter_duplicate_content_hash($content, $fields);
+ db_query("UPDATE {spam_filter_duplicate} SET content_hash = '%s', hostname = '%s', timestamp = %d WHERE sid = %d", $hash, $extra['host'], time(), $extra['sid']);
+ if (!db_affected_rows()) {
+ db_query("INSERT INTO {spam_filter_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $extra['host'], time());
+ }
+ $action = _spam_filter_duplicate_action();
+ if (is_array($action) && !empty($action)) {
+ if (isset($action['redirect'])) {
+ drupal_goto($action['redirect']);
+ }
+ }
+ }
+ break;
+
+ case 'delete':
+ if (is_array($extra) && $extra['sid'] && !empty($content) && !empty($fields)) {
+ db_query("DELETE FROM {spam_filter_duplicate} WHERE sid = %d", $extra['sid']);
+ }
+ break;
+
+ case 'filter_info':
+ return array(
+ 'name' => t('Duplicate filter'),
+ 'module' => t('spam_filter_duplicate'),
+ 'description' => t('A duplication spam filter.'),
+ 'help' => t('The duplicate filter detects spam by detecting content duplication.'),
+ );
+ break;
+
+ case 'filter_install':
+ return array(
+ 'status' => SPAM_FILTER_ENABLED,
+ 'weight' => -8,
+ );
+
+ case 'mark_as_spam':
+ if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return;
+ db_query('UPDATE {spam_filter_duplicate} SET spam = %d WHERE sid = %d', SPAM_FILTER_DUPLICATE_SPAM, $extra['sid']);
+ if (!db_affected_rows() && $extra['id'] && $extra['sid']) {
+ $content = spam_invoke_module($type, 'load', $extra['id']);
+ $fields = spam_invoke_module($type, 'filter_fields', $content);
+ $hash = _spam_filter_duplicate_content_hash($content, $fields);
+ $hostname = spam_invoke_module($type, 'hostname', $extra['id']);
+ db_query("INSERT INTO {spam_filter_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $hostname, time());
+ }
+ $action = _spam_filter_duplicate_action();
+ if (is_array($action) && isset($action['redirect'])) {
+ return $action['redirect'];
+ }
+ break;
+
+ case 'mark_as_not_spam':
+ if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return;
+ db_query('UPDATE {spam_filter_duplicate} SET spam = %d WHERE sid = %d', SPAM_FILTER_DUPLICATE_NOT_SPAM, $extra['sid']);
+ if (!db_affected_rows() && $extra['id'] && $extra['sid']) {
+ // Updating content that we've not filtered before. Retrive all the
+ // data we need to add it to the spam_filter_duplicate table.
+ $fields = spam_invoke_module($type, 'filter_fields', $extra['content']);
+ $hash = _spam_filter_duplicate_content_hash($extra['content'], $fields);
+ $hostname = spam_invoke_module($type, 'hostname', $extra['id']);
+ db_query("INSERT INTO {spam_filter_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $hostname, time());
+ }
+ break;
+
+ }
+}
+
+/**
+ *
+ */
+function spam_filter_duplicate_admin_settings() {
+ $form['content'] = array(
+ '#type' => 'fieldset',
+ '#title' => t('Content'),
+ '#collapsible' => TRUE,
+ );
+ $limits = drupal_map_assoc(range(2, 15));
+ $limits[-1] = t('unlimited');
+ $form['content']['spam_filter_duplicate_threshold'] = array(
+ '#type' => 'select',
+ '#title' => t('Duplication threshold'),
+ '#default_value' => variable_get('spam_filter_duplicate_threshold', SPAM_FILTER_DUPLICATE_DEFAULT_THRESHOLD),
+ '#options' => $limits,
+ '#description' => t('Specify how many times the same identical content can be posted before it will be considered spam. When tuning this filter, note that users may accidentally submit the same content multiple times causing an otherwise acceptible posting to be duplicated.'),
+ );
+ $form['content']['spam_filter_duplicate_post_message'] = array(
+ '#type' => 'textarea',
+ '#title' => t('Duplicate post message'),
+ '#default_value' => variable_get('spam_filter_duplicate_post_message', t('You have attempted to post the same identical content multiple times, causing your posts to be flagged as potential spam. If this has happened in error, please report this error along with your IP address (%IP) to a @site site administrator. We apologize for any inconvenience.
', array('@site' => variable_get('site_name', 'Drupal')))), + '#description' => t('Message to show visitors when their content has been blocked because it was posted multiple times. The text "%IP" will be replaced by the visitors actual IP address.'), + ); + + $form['ip'] = array( + '#type' => 'fieldset', + '#title' => t('IP'), + '#collapsible' => TRUE, + ); + $limits = drupal_map_assoc(range(1, 15)); + $limits[-1] = t('unlimited'); + $form['ip']['spam_filter_duplicate_blacklist'] = array( + '#type' => 'select', + '#title' => t('IP blacklist threshold'), + '#default_value' => variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST), + '#options' => $limits, + '#description' => t('Specify how many times a given IP address is allowed to post possible spam content before the IP address is blacklisted and prevented from posting any additional content.'), + ); + $form['ip']['spam_filter_duplicate_blacklist_action'] = array( + '#type' => 'radios', + '#title' => t('IP blacklist action'), + '#options' => array(t('Silently prevent visitor from posting'), t('Notify blacklisted visitor when posting, prevent from posting'), t('Notify blacklisted visitor, prevent from visiting site')), + '#default_value' => variable_get('spam_filter_duplicate_blackZ>&!ô/„XSƒ»(´7 0/´&6€R _%DóíATE_BLACKLIST_NOTIFY), + '#description' => t('Select an action from the above options. If notification is enabled, the user will be redirected to a custom page displaying the "Blacklisted IP message" defined below. If you only prevent users from posting, they will be able to view all site content. If you prevent a user from visiting your site, they will only ever see the "Blacklisted IP message".'), + ); + $form['ip']['spam_filter_duplicate_blacklist_message'] = array( + '#type' => 'textarea', + '#title' => t('Blacklisted IP message'), + '#default_value' => variable_get('spam_filter_duplicate_blacklist_message', t('You are currently not allowed to post content to @site, as previous content posted by your IP address (%IP) has been flagged as potential spam.
If you have not posted spam to @site, please report this error along with your IP address to a site administrator. We apologize for any inconvenience.
', array('@site' => variable_get('site_name', 'Drupal')))), + '#description' => t('Message to show visitors when their IP has been blacklisted. The text "%IP" will be replaced by the visitors actual IP address.') + ); + + return system_settings_form($form); +} + +/** + * Save the configuration. + */ +function spam_filter_duplicate_admin_settings_submit($form, &$form_state) { +/* TODO The 'op' element in the form values is deprecated. + Each button can have #validate and #submit functions associated with it. + Thus, there should be one button that submits the form and which invokes + the normal form_id_validate and form_id_submit handlers. Any additional + buttons which need to invoke different validate or submit functionality + should have button-specific functions. */ + if ($form_state['values']['op'] == t('Reset to defaults')) { + variable_del('spam_filter_duplicate_threshold'); + variable_del('spam_filter_duplicate_post_message'); + variable_del('spam_filter_duplicate_blacklist'); + variable_del('spam_filter_duplicate_blacklist_action'); + variable_del('spam_filter_duplicate_blacklist_message'); + drupal_set_message('Configuration reset to defaults.'); + } + else { + variable_set('spam_filter_duplicate_threshold', $form_state['values']['spam_filter_duplicate_threshold']); + variable_set('spam_filter_duplicate_post_message', $form_state['values']['spam_filter_duplicate_post_message']); + variable_set('spam_filter_duplicate_blacklist', $form_state['values']['spam_filter_duplicate_blacklist']); + variable_set('spam_filter_duplicate_blacklist_action', $form_state['values']['spam_filter_duplicate_blacklist_action']); + variable_set('spam_filter_duplicate_blacklist_message', $form_state['values']['spam_filter_duplicate_blacklist_message']); + drupal_set_message('Configuration saved.'); + } +} + +/** + * Get and md5 hash of all content truncated together. + */ +function _spam_filter_duplicate_content_hash($content, $fields) { + if (is_object($content)) { + $content = (array)$content; + } + $hash = ''; + foreach ($fields['main'] as $field) { + $hash .= $content[$field]; + } + return md5($hash); +} + +/** + * Determine whether or not the content is spam. + */ +function spam_filter_duplicate_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + $score = 0; + $action = array(); + $hash = _spam_filter_duplicate_content_hash($content, $fields); + $id = spam_invoke_module($type, 'content_id', $content, $extra); + $spam_filter_duplicate_hash = db_result(db_query("SELECT COUNT(d.iid) FROM {spam_filter_duplicate} d LEFT JOIN {spam_tracker} t ON d.sid = t.sid WHERE content_hash = '%s' AND content_id <> %d", $hash, $id)) + 1; + if ($spam_filter_duplicate_hash >= variable_get('spam_filter_duplicate_threshold', SPAM_FILTER_DUPLICATE_DEFAULT_THRESHOLD)) { + $sids = db_query("SELECT sid FROM {spam_filter_duplicate} WHERE content_hash = '%s'", $hash); + if (!$filter_test) { + while ($sid = db_result($sids)) { + $unpublish = db_fetch_object(db_query('SELECT content_type, content_id, score FROM {spam_tracker} WHERE sid = %d', $sid)); + spam_mark_as_spam($unpublish->content_type, $unpublish->content_id, array('score' => 99)); + } + // Update counter tracking that we've blocked a duplicate posting of this + // content. (It will actually increment the counter on + // "duplicate_threshold" rows.) + db_query("UPDATE {spam_filter_duplicate} SET spam_filter_duplicate_hash = spam_filter_duplicate_hash + 1 WHERE content_hash = '%s'", $hash); + } + $action['hash'] = array( + 'score' => 99, + 'description' => t('Content is identical to %count other existing posts.', array('%count' => variable_get('spam_filter_duplicate_threshold', SPAM_FILTER_DUPLICATE_DEFAULT_THRESHOLD))), + ); + $action['total'] = 99; + $action['redirect'] = 'duplicate/denied/post'; + _spam_filter_duplicate_action($action); + return $action; + } + + $spam_filter_duplicate_ip = db_result(db_query("SELECT COUNT(iid) FROM {spam_filter_duplicate} WHERE hostname = '%s' AND spam = %d", ip_address(), SPAM_FILTER_DUPLICATE_SPAM)); + if ($spam_filter_duplicate_ip >= variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST) && (variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST) > -1)) { + $action['ip'] = array( + 'score' => 99, + 'description' => t('Content was posted by the same IP address used to post %count other spam posts.', array('%count' => variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST))), + ); + $action['total'] = 99; + $action['redirect'] = 'duplicate/denied/ip'; + } + + return $action; +} + +function _spam_filter_duplicate_action($register = array()) { + static $action = array(); + + if (!empty($register)) { + $action = $register; + } + + return $action; +} + +/** + * + */ +function spam_filter_duplicate_denied_ip() { + $message = strtr(variable_get('spam_filter_duplicate_blacklist_message', t('You are currently not allowed to post content to @site, as previous content posted by your IP address (%IP) has been flagged as potential spam.
If you have not posted spam to @site, please report this error along with your IP address to a site administrator. We apologize for any inconvenience.
')), array('@site' => variable_get('site_name', 'Drupal'), '%IP' => ip_address())); + spam_denied_page($message, t('Your IP address has been blocked by our spam filter.')); +} + +/** + * + */ +function spam_filter_duplicate_denied_post() { + $message = strtr(variable_get('spam_filter_duplicate_post_message', t('You have attempted to post the same identical content multiple times, causing your posts to be flagged as potential spam. If this has happened in error, please report this error along with your IP address (%IP) to a @site site administrator. We apologize for any inconvenience.
')), array('@site' => variable_get('site_name', 'Drupal'), '%IP' => ip_address())); + spam_denied_page($message, t('You have attempted to post the same content multiple times.')); +} + +function spam_logs_blocked_ip() { + drupal_set_title(t('Spam Module Blocked IPs')); + + $header = array( + array('data' => t('IP Address'), 'field' => 'hostname'), + array('data' => t('Last Seen'), 'field' => 'timestamp', 'sort' => 'desc'), + array('data' => t('Counter'), 'field' => 'count'), + ); + + // This SQL is *nasty*, so if you think you can do better, please be my guest! + // This unfortunately has to be SQL because the pager module can't be told + // how many rows we've got (so we can't do our own processing in PHP and + // still get paging to work properly). + $sql = "SELECT * FROM (SELECT DISTINCT x.hostname, x.timestamp, COUNT(x.hostname) AS count FROM (SELECT timestamp, hostname FROM {spam_tracker} WHERE score > %d ORDER BY timestamp DESC) AS x GROUP BY x.hostname) AS y WHERE y.count >= %d"; + $arguments = array(variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD), variable_get('spam_blacklist_ip', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST)); + + $count_sql = preg_replace('/^SELECT \* /', 'SELECT count(hostname) ', $sql); + + $result = pager_query($sql . tablesort_sql($header), 50, 0, $count_sql, $arguments); + + while ($log = db_fetch_object($result)) { + $rows[] = array('data' => array( + $log->hostname, + format_date($log->timestamp, 'small'), + $log->count + ) + ); + } + + if (!$rows) { + $rows[] = array(array('data' => t('No log messages available.'), 'colspan' => 6)); + } + + return theme('table', $header, $rows) . theme('pager', NULL, 50, 0); +} diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.module~ sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.module~ --- /home/files/coding/drupal/spam/filters/spam_filter_duplicate/spam_filter_duplicate.module~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_duplicate/spam_filter_duplicate.module~ 2009-12-09 15:46:20.000000000 +1100 @@ -0,0 +1,400 @@ +. All rights reserved. + * + * Detects spam by looking for duplication of content, or posting IP. + */ + +define('SPAM_FILTER_DUPLICATE_BLACKLIST_SILENT', 0); +define('SPAM_FILTER_DUPLICATE_BLACKLIST_NOTIFY', 1); +define('SPAM_FILTER_DUPLICATE_BLACKLIST_BLOCK', 2); + +define('SPAM_FILTER_DUPLICATE_DEFAULT_THRESHOLD', 2); +define('SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST', 3); + +define('SPAM_FILTER_DUPLICATE_NOT_SPAM', 0); +define('SPAM_FILTER_DUPLICATE_SPAM', 1); + +/** + * Drupal _menu() hook. + */ +function spam_filter_duplicate_menu() { + $items = array(); + + $items['admin/settings/spam/filters/duplicate'] = array( + 'title' => 'Duplicate', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('spam_filter_duplicate_admin_settings'), + 'access arguments' => array('administer spam'), + 'description' => 'Configure the spam duplicate filter.', + 'type' => MENU_LOCAL_TASK, + ); + $items['duplicate/denied/ip'] = array( + 'page callback' => 'spam_filter_duplicate_denied_ip', + 'type' => MENU_CALLBACK, + 'access callback' => TRUE, + ); + $items['duplicate/denied/post'] = array( + 'page callback' => 'spam_filter_duplicate_denied_post', + 'type' => MENU_CALLBACK, + 'access callback' => TRUE, + ); + + $items['admin/reports/spam/blocked_ip'] = array( + 'title' => 'Blocked IPs', + 'access arguments' => array('administer spam'), + 'page callback' => 'spam_logs_blocked_ip', + 'type' => MENU_LOCAL_TASK, + ); + + return $items; +} + +/** + * The arg() function may not be availble early in the bootstrap process, + * so we reimplement it here. + */ +function _spam_filter_duplicate_arg() { + static $arguments, $q; + + if (empty($arguments) || $q != $_GET['q']) { + $arguments = explode('/', $_GET['q']); + $q = $_GET['q']; + } + + if (isset($arguments[$index])) { + return $arguments[$index]; + } +} + + +/** + * If IP blacklisting and IP blocking are both enabled, perform a database + * query on each page load to see if the current visitor has been blacklisted. + */ +function spam_filter_duplicate_init() { + // Allow notification to blacklisted IP, if enabled. + if (_spam_filter_duplicate_arg(0) == 'duplicate' && _spam_filter_duplicate_arg(1) == 'denied' && _spam_filter_duplicate_arg(2) == 'ip') return; + + // Only perform database queries if functionality is enabled. + if ((variable_get('spam_filter_duplicate_blacklist_action', SPAM_FILTER_DUPLICATE_BLACKLIST_NOTIFY) == SPAM_FILTER_DUPLICATE_BLACKLIST_BLOCK) && (variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST) > -1)) { + // Blacklisting and IP blocking enabled. + $spam_filter_duplicate_ip = (int)db_query("SELECT COUNT(iid) FROM {spam_filter_duplicate} WHERE hostname = '%s' AND spam = %d", ip_address(), SPAM_FILTER_DUPLICATE_SPAM); + if ($spam_filter_duplicate_ip >= variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST)) { + if (user_access('bypass filters')) { + spam_log(SPAM_DEBUG, 'spam_filter_duplicate_init', t('Found !count spam for IP !ip, ignoring because user !user (uid !uid) is configured to bypass filters', array('!count' => $spam_filter_duplicate_ip, '!ip' => ip_address(), '!user' => $user->name, 'uid' => $user->uid)), $type, $id); + return; + } + drupal_goto("duplicate/denied/ip"); + } + } +} + +/** + * Spam API Hook + */ +function spam_filter_duplicate_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + + switch ($op) { + + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return; + return spam_filter_duplicate_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'duplicate'; + + case 'insert': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return; + if (is_array($extra) && $extra['sid'] && $extra['host'] && + !empty($content) && !empty($fields)) { + $hash = _spam_filter_duplicate_content_hash($content, $fields); + db_query("INSERT INTO {spam_filter_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $extra['host'], time()); + $action = _spam_filter_duplicate_action(); + if (is_array($action) && !empty($action)) { + if (isset($action['redirect'])) { + drupal_goto($action['redirect']); + } + } + } + break; + + case 'update': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return; + if (is_array($extra) && $extra['sid'] && $extra['host'] && + !empty($content) && !empty($fields)) { + $hash = _spam_filter_duplicate_content_hash($content, $fields); + db_query("UPDATE {spam_filter_duplicate} SET content_hash = '%s', hostname = '%s', timestamp = %d WHERE sid = %d", $hash, $extra['host'], time(), $extra['sid']); + if (!db_affected_rows()) { + db_query("INSERT INTO {spam_filter_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $extra['host'], time()); + } + $action = _spam_filter_duplicate_action(); + if (is_array($action) && !empty($action)) { + if (isset($action['redirect'])) { + drupal_goto($action['redirect']); + } + } + } + break; + + case 'delete': + if (is_array($extra) && $extra['sid'] && !empty($content) && !empty($fields)) { + db_query("DELETE FROM {spam_filter_duplicate} WHERE sid = %d", $extra['sid']); + } + break; + + case 'filter_info': + return array( + 'name' => t('Duplicate filter'), + 'module' => t('spam_filter_duplicate'), + 'description' => t('A duplication spam filter.'), + 'help' => t('The duplicate filter detects spam by detecting content duplication.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + 'weight' => -8, + ); + + case 'mark_as_spam': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return; + db_query('UPDATE {spam_filter_duplicate} SET spam = %d WHERE sid = %d', SPAM_FILTER_DUPLICATE_SPAM, $extra['sid']); + if (!db_affected_rows() && $extra['id'] && $extra['sid']) { + $content = spam_invoke_module($type, 'load', $extra['id']); + $fields = spam_invoke_module($type, 'filter_fields', $content); + $hash = _spam_filter_duplicate_content_hash($content, $fields); + $hostname = spam_invoke_module($type, 'hostname', $extra['id']); + db_query("INSERT INTO {spam_filter_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $hostname, time()); + } + $action = _spam_filter_duplicate_action(); + if (is_array($action) && isset($action['redirect'])) { + return $action['redirect']; + } + break; + + case 'mark_as_not_spam': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_duplicate', $type, $content, $fields, $extra)) return; + db_query('UPDATE {spam_filter_duplicate} SET spam = %d WHERE sid = %d', SPAM_FILTER_DUPLICATE_NOT_SPAM, $extra['sid']); + if (!db_affected_rows() && $extra['id'] && $extra['sid']) { + // Updating content that we've not filtered before. Retrive all the + // data we need to add it to the spam_filter_duplicate table. + $fields = spam_invoke_module($type, 'filter_fields', $extra['content']); + $hash = _spam_filter_duplicate_content_hash($extra['content'], $fields); + $hostname = spam_invoke_module($type, 'hostname', $extra['id']); + db_query("INSERT INTO {spam_filter_duplicate} (sid, content_hash, hostname, timestamp) VALUES(%d, '%s', '%s', %d)", $extra['sid'], $hash, $hostname, time()); + } + break; + + } +} + +/** + * + */ +function spam_filter_duplicate_admin_settings() { + $form['content'] = array( + '#type' => 'fieldset', + '#title' => t('Content'), + '#collapsible' => TRUE, + ); + $limits = drupal_map_assoc(range(2, 15)); + $limits[-1] = t('unlimited'); + $form['content']['spam_filter_duplicate_threshold'] = array( + '#type' => 'select', + '#title' => t('Duplication threshold'), + '#default_value' => variable_get('spam_filter_duplicate_threshold', SPAM_FILTER_DUPLICATE_DEFAULT_THRESHOLD), + '#options' => $limits, + '#description' => t('Specify how many times the same identical content can be posted before it will be considered spam. When tuning this filter, note that users may accidentally submit the same content multiple times causing an otherwise acceptible posting to be duplicated.'), + ); + $form['content']['spam_filter_duplicate_post_message'] = array( + '#type' => 'textarea', + '#title' => t('Duplicate post message'), + '#default_value' => variable_get('spam_filter_duplicate_post_message', t('You have attempted to post the same identical content multiple times, causing your posts to be flagged as potential spam. If this has happened in error, please report this error along with your IP address (%IP) to a @site site administrator. We apologize for any inconvenience.
', array('@site' => variable_get('site_name', 'Drupal')))), + '#description' => t('Message to show visitors when their content has been blocked because it was posted multiple times. The text "%IP" will be replaced by the visitors actual IP address.'), + ); + + $form['ip'] = array( + '#type' => 'fieldset', + '#title' => t('IP'), + '#collapsible' => TRUE, + ); + $limits = drupal_map_assoc(range(1, 15)); + $limits[-1] = t('unlimited'); + $form['ip']['spam_filter_duplicate_blacklist'] = array( + '#type' => 'select', + '#title' => t('IP blacklist threshold'), + '#default_value' => variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST), + '#options' => $limits, + '#description' => t('Specify how many times a given IP address is allowed to post possible spam content before the IP address is blacklisted and prevented from posting any additional content.'), + ); + $form['ip']['spam_filter_duplicate_blacklist_action'] = array( + '#type' => 'radios', + '#title' => t('IP blacklist action'), + '#options' => array(t('Silently prevent visitor from posting'), t('Notify blacklisted visitor when posting, prevent from posting'), t('Notify blacklisted visitor, prevent from visiting site')), + '#default_value' => variable_get('spam_filter_duplicate_blacklist_action', SPAM_FILTER_DUPLICATE_BLACKLIST_NOTIFY), + '#description' => t('Select an action from the above options. If notification is enabled, the user will be redirected to a custom page displaying the "Blacklisted IP message" defined below. If you only prevent users from posting, they will be able to view all site content. If you prevent a user from visiting your site, they will only ever see the "Blacklisted IP message".'), + ); + $form['ip']['spam_filter_duplicate_blacklist_message'] = array( + '#type' => 'textarea', + '#title' => t('Blacklisted IP message'), + '#default_value' => variable_get('spam_filter_duplicate_blacklist_message', t('You are currently not allowed to post content to @site, as previous content posted by your IP address (%IP) has been flagged as potential spam.
If you have not posted spam to @site, please report this error along with your IP address to a site administrator. We apologize for any inconvenience.
', array('@site' => variable_get('site_name', 'Drupal')))), + '#description' => t('Message to show visitors when their IP has been blacklisted. The text "%IP" will be replaced by the visitors actual IP address.') + ); + + return system_settings_form($form); +} + +/** + * Save the configuration. + */ +function spam_filter_duplicate_admin_settings_submit($form, &$form_state) { +/* TODO The 'op' element in the form values is deprecated. + Each button can have #validate and #submit functions associated with it. + Thus, there should be one button that submits the form and which invokes + the normal form_id_validate and form_id_submit handlers. Any additional + buttons which need to invoke different validate or submit functionality + should have button-specific functions. */ + if ($form_state['values']['op'] == t('Reset to defaults')) { + variable_del('spam_filter_duplicate_threshold'); + variable_del('spam_filter_duplicate_post_message'); + variable_del('spam_filter_duplicate_blacklist'); + variable_del('spam_filter_duplicate_blacklist_action'); + variable_del('spam_filter_duplicate_blacklist_message'); + drupal_set_message('Configuration reset to defaults.'); + } + else { + variable_set('spam_filter_duplicate_threshold', $form_state['values']['spam_filter_duplicate_threshold']); + variable_set('spam_filter_duplicate_post_message', $form_state['values']['spam_filter_duplicate_post_message']); + variable_set('spam_filter_duplicate_blacklist', $form_state['values']['spam_filter_duplicate_blacklist']); + variable_set('spam_filter_duplicate_blacklist_action', $form_state['values']['spam_filter_duplicate_blacklist_action']); + variable_set('spam_filter_duplicate_blacklist_message', $form_state['values']['spam_filter_duplicate_blacklist_message']); + drupal_set_message('Configuration saved.'); + } +} + +/** + * Get and md5 hash of all content truncated together. + */ +function _spam_filter_duplicate_content_hash($content, $fields) { + if (is_object($content)) { + $content = (array)$content; + } + $hash = ''; + foreach ($fields['main'] as $field) { + $hash .= $content[$field]; + } + return md5($hash); +} + +/** + * Determine whether or not the content is spam. + */ +function spam_filter_duplicate_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + $score = 0; + $action = array(); + $hash = _spam_filter_duplicate_content_hash($content, $fields); + $id = spam_invoke_module($type, 'content_id', $content, $extra); + $spam_filter_duplicate_hash = db_result(db_query("SELECT COUNT(d.iid) FROM {spam_filter_duplicate} d LEFT JOIN {spam_tracker} t ON d.sid = t.sid WHERE content_hash = '%s' AND content_id <> %d", $hash, $id)) + 1; + if ($spam_filter_duplicate_hash >= variable_get('spam_filter_duplicate_threshold', SPAM_FILTER_DUPLICATE_DEFAULT_THRESHOLD)) { + $sids = db_query("SELECT sid FROM {spam_filter_duplicate} WHERE content_hash = '%s'", $hash); + if (!$filter_test) { + while ($sid = db_result($sids)) { + $unpublish = db_fetch_object(db_query('SELECT content_type, content_id, score FROM {spam_tracker} WHERE sid = %d', $sid)); + spam_mark_as_spam($unpublish->content_type, $unpublish->content_id, array('score' => 99)); + } + // Update counter tracking that we've blocked a duplicate posting of this + // content. (It will actually increment the counter on + // "duplicate_threshold" rows.) + db_query("UPDATE {spam_filter_duplicate} SET spam_filter_duplicate_hash = spam_filter_duplicate_hash + 1 WHERE content_hash = '%s'", $hash); + } + $action['hash'] = array( + 'score' => 99, + 'description' => t('Content is identical to %count other existing posts.', array('%count' => variable_get('spam_filter_duplicate_threshold', SPAM_FILTER_DUPLICATE_DEFAULT_THRESHOLD))), + ); + $action['total'] = 99; + $action['redirect'] = 'duplicate/denied/post'; + _spam_filter_duplicate_action($action); + return $action; + } + + $spam_filter_duplicate_ip = db_result(db_query("SELECT COUNT(iid) FROM {spam_filter_duplicate} WHERE hostname = '%s' AND spam = %d", ip_address(), SPAM_FILTER_DUPLICATE_SPAM)); + if ($spam_filter_duplicate_ip >= variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST) && (variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST) > -1)) { + $action['ip'] = array( + 'score' => 99, + 'description' => t('Content was posted by the same IP address used to post %count other spam posts.', array('%count' => variable_get('spam_filter_duplicate_blacklist', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST))), + ); + $action['total'] = 99; + $action['redirect'] = 'duplicate/denied/ip'; + } + + return $action; +} + +function _spam_filter_duplicate_action($register = array()) { + static $action = array(); + + if (!empty($register)) { + $action = $register; + } + + return $action; +} + +/** + * + */ +function spam_filter_duplicate_denied_ip() { + $message = strtr(variable_get('spam_filter_duplicate_blacklist_message', t('You are currently not allowed to post content to @site, as previous content posted by your IP address (%IP) has been flagged as potential spam.
If you have not posted spam to @site, please report this error along with your IP address to a site administrator. We apologize for any inconvenience.
')), array('@site' => variable_get('site_name', 'Drupal'), '%IP' => ip_address())); + spam_denied_page($message, t('Your IP address has been blocked by our spam filter.')); +} + +/** + * + */ +function spam_filter_duplicate_denied_post() { + $message = strtr(variable_get('spam_filter_duplicate_post_message', t('You have attempted to post the same identical content multiple times, causing your posts to be flagged as potential spam. If this has happened in error, please report this error along with your IP address (%IP) to a @site site administrator. We apologize for any inconvenience.
')), array('@site' => variable_get('site_name', 'Drupal'), '%IP' => ip_address())); + spam_denied_page($message, t('You have attempted to post the same content multiple times.')); +} + +function spam_logs_blocked_ip() { + drupal_set_title(t('Spam Module Blocked IPs')); + + $header = array( + array('data' => t('IP Address'), 'field' => 'hostname'), + array('data' => t('Last Seen'), 'field' => 'timestamp', 'sort' => 'desc'), + array('data' => t('Counter'), 'field' => 'count'), + ); + + // This SQL is *nasty*, so if you think you can do better, please be my guest! + // This unfortunately has to be SQL because the pager module can't be told + // how many rows we've got (so we can't do our own processing in PHP and + // still get paging to work properly). + $sql = "SELECT * FROM (SELECT DISTINCT x.hostname, x.timestamp, COUNT(x.hostname) AS count FROM (SELECT timestamp, hostname FROM {spam_tracker} WHERE score > %d ORDER BY timestamp DESC) AS x GROUP BY x.hostname) AS y WHERE y.count >= %d"; + $arguments = array(variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD), variable_get('spam_blacklist_ip', SPAM_FILTER_DUPLICATE_DEFAULT_BLACKLIST)); + + $count_sql = preg_replace('/^SELECT \* /', 'SELECT count(hostname) ', $sql); + + $result = pager_query($sql . tablesort_sql($header), 50, 0, $count_sql, $arguments); + + while ($log = db_fetch_object($result)) { + $rows[] = array('data' => array( + $log->hostname, + format_date($log->timestamp, 'small'), + $log->count + ) + ); + } + + if (!$rows) { + $rows[] = array(array('data' => t('No log messages available.'), 'colspan' => 6)); + } + + return theme('table', $header, $rows) . theme('pager', NULL, 50, 0); +} diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_node_age/spam_filter_node_age.info sites/all/modules/spam/filters/spam_filter_node_age/spam_filter_node_age.info --- /home/files/coding/drupal/spam/filters/spam_filter_node_age/spam_filter_node_age.info 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_node_age/spam_filter_node_age.info 2009-12-09 14:46:50.000000000 +1100 @@ -0,0 +1,13 @@ +; $Id: node_age.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $ +name = Spam node age filter +description = A node-age comment filter plug-in for the spam module. +package = Spam +dependencies[] = spam +dependencies[] = comment +core = 6.x +; Information added by drupal.org packaging script on 2009-09-01 +version = "6.x-1.x-dev" +core = "6.x" +project = "spam" +datestamp = "1251764880" + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_node_age/spam_filter_node_age.info~ sites/all/modules/spam/filters/spam_filter_node_age/spam_filter_node_age.info~ --- /home/files/coding/drupal/spam/filters/spam_filter_node_age/spam_filter_node_age.info~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_node_age/spam_filter_node_age.info~ 2009-12-09 14:46:50.000000000 +1100 @@ -0,0 +1,13 @@ +; $Id: node_age.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $ +name = Spam node age filter +description = A node-age comment filter plug-in for the spam module. +package = Spam +dependencies[] = spam +dependencies[] = comment +core = 6.x +; Information added by drupal.org packaging script on 2009-09-01 +version = "6.x-1.x-dev" +core = "6.x" +project = "spam" +datestamp = "1251764880" + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_node_age/spam_filter_node_age.install sites/all/modules/spam/filters/spam_filter_node_age/spam_filter_node_age.install --- /home/files/coding/drupal/spam/filters/spam_filter_node_age/spam_filter_node_age.install 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_node_age/spam_filter_node_age.install 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,25 @@ +. All rights reserved. + * + */ + +/** + * Drupal _menu() hook. + */ +function spam_filter_node_age_menu() { + $items = array(); + + $items['admin/settings/spam/filters/node-age'] = array( + 'title' => 'Node age', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('spam_filter_node_age_admin_settings'), + 'access arguments' => array('administer spam'), + 'description' => 'Configure the node age filter.', + 'type' => MENU_LOCAL_TASK, + ); + + return $items; +} + +function spam_filter_node_age_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + + switch ($op) { + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_node_age', $type, $content, $fields, $extra)) return; + return spam_filter_node_age_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_filter_node_age'; + + case 'filter_info': + return array( + 'name' => t('Node age'), + 'module' => t('spam_filter_node_age'), + 'description' => t('A node-age comment spam filter.'), + 'help' => t('The node-age filter assigns a higher spam probability to comments made against older nodes.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + 'gain' => 150, + 'weight' => -2, + ); + + } +} + +/** + * Determine if a comment is being posted against too old of a node. + */ +function spam_filter_node_age_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + if ($type == 'comment') { + $action = array(); + $id = spam_invoke_module($type, 'content_id', $content, $extra); + if (arg(0) == 'comment' && arg(1) == 'reply' && is_numeric(arg(2))) { + $nid = arg(2); + spam_log(SPAM_DEBUG, 'spam_filter_node_age_spam_filter', t('retrieved nid (@nid) from url', array('@nid' => $nid)), $type, $id); + } + else { + $nid = db_result(db_query('SELECT nid FROM {comments} WHERE cid = %d', $id)); + spam_log(SPAM_DEBUG, 'spam_filter_node_age_spam_filter', t('retrieved nid (@nid) from database', array('@nid' => $nid)), $type, $id); + } + + if (!$nid) { + spam_log(SPAM_LOG, 'spam_filter_node_age_spam_filter', t('warning: nid not found for comment (@cid), skipping', array('@cid' => $id)), $type, $id); + $action['total'] = 0; + return $action; + } + + $node = spam_invoke_module('node', 'load', $nid); + if (is_object($node)) { + $timestamp_field = variable_get('spam_filter_node_age_filter_on', 'created'); + if ($node->$timestamp_field < (time() - variable_get('spam_filter_node_age_limit_long', 4838400))) { + $action['total'] = variable_get('spam_filter_node_age_weight_long', 99); + spam_log(SPAM_DEBUG, 'spam_filter_node_age_spam_filter', t('node (@nid) older than long limit, spam probability(@weight)', array('@nid' => $nid, '@weight' => $action['total'])), $type, $id); + } + else if ($node->$timestamp_field < (time() - variable_get('spam_filter_node_age_limit_short', 2419200))) { + $action['total'] = variable_get('spam_filter_node_age_weight_short', 85); + spam_log(SPAM_DEBUG, 'spam_filter_node_age_spam_filter', t('node (@nid) older than short limit, spam probability(@weight)', array('@nid' => $nid, '@weight' => $action['total'])), $type, $id); + } + else { + $action['total'] = 0; + spam_log(SPAM_DEBUG, 'spam_filter_node_age_spam_filter', t('node (@nid) is recent.', array('@nid' => $nid)), $type, $id); + } + } else { + spam_log(SPAM_LOG, 'spam_filter_node_age_spam_filter', t('warning: nid (@nid) does not map to node, skipping', array('@nid' => $nid)), $type, $id); + $action['total'] = 0; + } + } + else { + spam_log(SPAM_DEBUG, 'spam_filter_node_age_spam_filter', t('content type is not comment, skipping'), $type, $id); + $action['total'] = 0; + } + return $action; +} + +/** + * Module administrative configuration options. + */ +function spam_filter_node_age_admin_settings() { + $form = array(); + $form['short'] = array( + '#type' => 'fieldset', + '#title' => 'Old content', + '#collapsible' => TRUE, + '#collapsed' => TRUE, + ); + $limits = drupal_map_assoc(spam_range(604800, 14515200, 604800), 'format_interval'); + $weights = drupal_map_assoc(array(60, 65, 70, 75, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99)); + $form['short']['spam_filter_node_age_limit_short'] = array( + '#type' => 'select', + '#title' => t('Old content is content not published within the past'), + '#options' => $limits, + '#required' => TRUE, + '#default_value' => variable_get('spam_filter_node_age_limit_short', 2419200), + ); + $form['short']['spam_filter_node_age_weight_short'] = array( + '#type' => 'select', + '#title' => t('Probability that comments posted to old content is spam'), + '#options' => $weights, + '#required' => TRUE, + '#description' => t('Probability that comments posted to old content are spam, as a percentage.'), + '#default_value' => variable_get('spam_filter_node_age_weight_short', 85), + ); + + $form['long'] = array( + '#type' => 'fieldset', + '#title' => 'Really old content', + '#collapsible' => TRUE, + '#collapsed' => TRUE, + ); + $form['long']['spam_filter_node_age_limit_long'] = array( + '#type' => 'select', + '#title' => t('Really old content is content not published within the past'), + '#options' => $limits, + '#required' => TRUE, + '#default_value' => variable_get('spam_filter_node_age_limit_long', 4838400), + ); + $form['long']['spam_filter_node_age_weight_long'] = array( + '#type' => 'select', + '#title' => t('Probability that comments posted to really old content is spam'), + '#options' => $weights, + '#required' => TRUE, + '#description' => t('Probability that comments posted to really old content are spam, as a percentage.'), + '#default_value' => variable_get('spam_filter_node_age_weight_long', 99), + ); + return system_settings_form($form); +} + +/** + * Validate the configuration. + */ +function spam_filter_node_age_admin_settings_validate($form, &$form_state) { + $limit_short = $form_state['values']['spam_filter_node_age_limit_short']; + $limit_long = $form_state['values']['spam_filter_node_age_limit_long']; + if ($limit_short >= $limit_long) { + form_set_error('spam_filter_node_age_limit_long', t('Really old content has to be older than old content.')); + } +} + +/** + * Save the configuration. + */ +function spam_filter_node_age_admin_settings_submit($form, &$form_state) { +/* TODO The 'op' element in the form values is deprecated. + Each button can have #validate and #submit functions associated with it. + Thus, there should be one button that submits the form and which invokes + the normal form_id_validate and form_id_submit handlers. Any additional + buttons which need to invoke different validate or submit functionality + should have button-specific functions. */ + if ($form_state['values']['op'] == t('Reset to defaults')) { + variable_del('spam_filter_node_age_limit_short'); + variable_del('spam_filter_node_age_weight_short'); + variable_del('spam_filter_node_age_limit_long'); + variable_del('spam_filter_node_age_weight_long'); + drupal_set_message('Configuration reset to defaults.'); + } + else { + variable_set('spam_filter_node_age_limit_short', $form_state['values']['spam_filter_node_age_limit_short']); + variable_set('spam_filter_node_age_weight_short', $form_state['values']['spam_filter_node_age_weight_short']); + variable_set('spam_filter_node_age_limit_long', $form_state['values']['spam_filter_node_age_limit_long']); + variable_set('spam_filter_node_age_weight_long', $form_state['values']['spam_filter_node_age_weight_long']); + drupal_set_message('Configuration saved.'); + } +} \ No newline at end of file diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_node_age/spam_filter_node_age.module~ sites/all/modules/spam/filters/spam_filter_node_age/spam_filter_node_age.module~ --- /home/files/coding/drupal/spam/filters/spam_filter_node_age/spam_filter_node_age.module~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_node_age/spam_filter_node_age.module~ 2009-12-09 14:46:52.000000000 +1100 @@ -0,0 +1,196 @@ +. All rights reserved. + * + */ + +/** + * Drupal _menu() hook. + */ +function spam_node_age_menu() { + $items = array(); + + $items['admin/settings/spam/filters/node-age'] = array( + 'title' => 'Node age', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('spam_node_age_admin_settings'), + 'access arguments' => array('administer spam'), + 'description' => 'Configure the node age filter.', + 'type' => MENU_LOCAL_TASK, + ); + + return $items; +} + +function spam_node_age_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + + switch ($op) { + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_node_age', $type, $content, $fields, $extra)) return; + return spam_node_age_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_node_age'; + + case 'filter_info': + return array( + 'name' => t('Node age'), + 'module' => t('spam_node_age'), + 'description' => t('A node-age comment spam filter.'), + 'help' => t('The node-age filter assigns a higher spam probability to comments made against older nodes.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + 'gain' => 150, + 'weight' => -2, + ); + + } +} + +/** + * Determine if a comment is being posted against too old of a node. + */ +function spam_node_age_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + if ($type == 'comment') { + $action = array(); + $id = spam_invoke_module($type, 'content_id', $content, $extra); + if (arg(0) == 'comment' && arg(1) == 'reply' && is_numeric(arg(2))) { + $nid = arg(2); + spam_log(SPAM_DEBUG, 'spam_node_age_spam_filter', t('retrieved nid (@nid) from url', array('@nid' => $nid)), $type, $id); + } + else { + $nid = db_result(db_query('SELECT nid FROM {comments} WHERE cid = %d', $id)); + spam_log(SPAM_DEBUG, 'spam_node_age_spam_filter', t('retrieved nid (@nid) from database', array('@nid' => $nid)), $type, $id); + } + + if (!$nid) { + spam_log(SPAM_LOG, 'spam_node_age_spam_filter', t('warning: nid not found for comment (@cid), skipping', array('@cid' => $id)), $type, $id); + $action['total'] = 0; + return $action; + } + + $node = spam_invoke_module('node', 'load', $nid); + if (is_object($node)) { + $timestamp_field = variable_get('spam_node_age_filter_on', 'created'); + if ($node->$timestamp_field < (time() - variable_get('spam_node_age_limit_long', 4838400))) { + $action['total'] = variable_get('spam_node_age_weight_long', 99); + spam_log(SPAM_DEBUG, 'spam_node_age_spam_filter', t('node (@nid) older than long limit, spam probability(@weight)', array('@nid' => $nid, '@weight' => $action['total'])), $type, $id); + } + else if ($node->$timestamp_field < (time() - variable_get('spam_node_age_limit_short', 2419200))) { + $action['total'] = variable_get('spam_node_age_weight_short', 85); + spam_log(SPAM_DEBUG, 'spam_node_age_spam_filter', t('node (@nid) older than short limit, spam probability(@weight)', array('@nid' => $nid, '@weight' => $action['total'])), $type, $id); + } + else { + $action['total'] = 0; + spam_log(SPAM_DEBUG, 'spam_node_age_spam_filter', t('node (@nid) is recent.', array('@nid' => $nid)), $type, $id); + } + } else { + spam_log(SPAM_LOG, 'spam_node_age_spam_filter', t('warning: nid (@nid) does not map to node, skipping', array('@nid' => $nid)), $type, $id); + $action['total'] = 0; + } + } + else { + spam_log(SPAM_DEBUG, 'spam_node_age_spam_filter', t('content type is not comment, skipping'), $type, $id); + $action['total'] = 0; + } + return $action; +} + +/** + * Module administrative configuration options. + */ +function spam_node_age_admin_settings() { + $form = array(); + $form['short'] = array( + '#type' => 'fieldset', + '#title' => 'Old content', + '#collapsible' => TRUE, + '#collapsed' => TRUE, + ); + $limits = drupal_map_assoc(spam_range(604800, 14515200, 604800), 'format_interval'); + $weights = drupal_map_assoc(array(60, 65, 70, 75, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99)); + $form['short']['spam_node_age_limit_short'] = array( + '#type' => 'select', + '#title' => t('Old content is content not published within the past'), + '#options' => $limits, + '#required' => TRUE, + '#default_value' => variable_get('spam_node_age_limit_short', 2419200), + ); + $form['short']['spam_node_age_weight_short'] = array( + '#type' => 'select', + '#title' => t('Probability that comments posted to old content is spam'), + '#options' => $weights, + '#required' => TRUE, + '#description' => t('Probability that comments posted to old content are spam, as a percentage.'), + '#default_value' => variable_get('spam_node_age_weight_short', 85), + ); + + $form['long'] = array( + '#type' => 'fieldset', + '#title' => 'Really old content', + '#collapsible' => TRUE, + '#collapsed' => TRUE, + ); + $form['long']['spam_node_age_limit_long'] = array( + '#type' => 'select', + '#title' => t('Really old content is content not published within the past'), + '#options' => $limits, + '#required' => TRUE, + '#default_value' => variable_get('spam_node_age_limit_long', 4838400), + ); + $form['long']['spam_node_age_weight_long'] = array( + '#type' => 'select', + '#title' => t('Probability that comments posted to really old content is spam'), + '#options' => $weights, + '#required' => TRUE, + '#description' => t('Probability that comments posted to really old content are spam, as a percentage.'), + '#default_value' => variable_get('spam_node_age_weight_long', 99), + ); + return system_settings_form($form); +} + +/** + * Validate the configuration. + */ +function spam_node_age_admin_settings_validate($form, &$form_state) { + $limit_short = $form_state['values']['spam_node_age_limit_short']; + $limit_long = $form_state['values']['spam_node_age_limit_long']; + if ($limit_short >= $limit_long) { + form_set_error('spam_node_age_limit_long', t('Really old content has to be older than old content.')); + } +} + +/** + * Save the configuration. + */ +function spam_node_age_admin_settings_submit($form, &$form_state) { +/* TODO The 'op' element in the form values is deprecated. + Each button can have #validate and #submit functions associated with it. + Thus, there should be one button that submits the form and which invokes + the normal form_id_validate and form_id_submit handlers. Any additional + buttons which need to invoke different validate or submit functionality + should have button-specific functions. */ + if ($form_state['values']['op'] == t('Reset to defaults')) { + variable_del('spam_node_age_limit_short'); + variable_del('spam_node_age_weight_short'); + variable_del('spam_node_age_limit_long'); + variable_del('spam_node_age_weight_long'); + drupal_set_message('Configuration reset to defaults.'); + } + else { + variable_set('spam_node_age_limit_short', $form_state['values']['spam_node_age_limit_short']); + variable_set('spam_node_age_weight_short', $form_state['values']['spam_node_age_weight_short']); + variable_set('spam_node_age_limit_long', $form_state['values']['spam_node_age_limit_long']); + variable_set('spam_node_age_weight_long', $form_state['values']['spam_node_age_weight_long']); + drupal_set_message('Configuration saved.'); + } +} \ No newline at end of file diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_surbl/spam_filter_surbl.info sites/all/modules/spam/filters/spam_filter_surbl/spam_filter_surbl.info --- /home/files/coding/drupal/spam/filters/spam_filter_surbl/spam_filter_surbl.info 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_surbl/spam_filter_surbl.info 2009-09-01 10:28:00.000000000 +1000 @@ -0,0 +1,12 @@ +; $Id: surbl.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $ +name = Spam Surbl filter +description = A Surbl filter plug-in for the spam module. +package = Spam +dependencies[] = spam +core = 6.x +; Information added by drupal.org packaging script on 2009-09-01 +version = "6.x-1.x-dev" +core = "6.x" +project = "spam" +datestamp = "1251764880" + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_surbl/spam_filter_surbl.module sites/all/modules/spam/filters/spam_filter_surbl/spam_filter_surbl.module --- /home/files/coding/drupal/spam/filters/spam_filter_surbl/spam_filter_surbl.module 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_surbl/spam_filter_surbl.module 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,151 @@ +. + * + */ + +/** + * http://www.surbl.org/lists.html#multi bitmap + */ +define('SPAM_FILTER_SURBL_SC', 2); +define('SPAM_FILTER_SURBL_WS', 4); +define('SPAM_FILTER_SURBL_PH', 8); +define('SPAM_FILTER_SURBL_OB', 16); +define('SPAM_FILTER_SURBL_AB', 32); +define('SPAM_FILTER_SURBL_JP', 64); + +/** + * Spam hook_spamapi implementation. + */ +function spam_filter_surbl_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + switch ($op) { + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_surbl', $type, $content, $fields, $extra)) return; + return spam_filter_surbl_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_filter_surbl'; + + case 'filter_info': + return array( + 'name' => t('Surbl filter'), + 'module' => t('spam_filter_surbl'), + 'description' => t('A spam url filter.'), + 'help' => t('Look up URLs in SURBL to determine if is spam.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + 'gain' => 250, + 'weight' => -7, + ); + + } +} + +/** + * Extract URLs from content. + */ +function _spam_filter_surbl_url_extract($content, $type, $fields, $extra = array()) { + static $urls = array(); + $id = spam_invoke_module($type, 'content_id', $content, $extra); + + if (is_object($content)) { + $content = (array)$content; + } + + if (!isset($urls["$type-$id"])) { + $string = ''; + foreach ($fields['main'] as $field) { + $string .= $content["$field"] .' '; + } + if (is_array($fields['other'])) { + foreach ($fields['other'] as $field) { + $string .= $content["$field"] .' '; + } + } + + // TODO: Improve this matching. We don't actually extract mailto: urls. + $URI = "(http://|https://|ftp://|mailto:)"; + // Find all urls in content. + preg_match_all("!(|[ \n\r\t\(]*)($URI([a-zAiption = A Surbl filter plug-in :%_~#&=/;-]))([.,?]?)(?=(
|[ \n\r\t\)]*))!i", $string, $matches); + $u = array(); + foreach ($matches[2] as $url) { + $url = preg_replace("'$URI'", '', $url); + // get full domain (ie www.sample.com) + preg_match("/^()?([^\/\"\']+)/i", $url, $domain); + // get root domain (ie sample.com) + preg_match("/[^\.\/]+\.[^\.\/]+$/", $domain[2], $root); + $u[md5($root[0])] = htmlspecialchars(drupal_strtolower($root[0])); + } + $urls["$type-$id"] = $u; + } + + return $urls["$type-$id"]; +} + +/** + * Search for known spam urls in content. + */ +function spam_filter_surbl_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + $action = array(); + + $id = spam_invoke_module($type, 'content_id', $content, $extra); + $spam = FALSE; + + $urls = _spam_filter_surbl_url_extract($content, $type, $fields, $extra); + + if (is_array($urls) && !empty($urls)) { + foreach ($urls as $url) { + $lookup = "$url.multi.surbl.org"; + $ip = gethostbyname($lookup); + if ($ip != $lookup) { + // this domain was in a SURBL, process accordingly + preg_match("/[^\.\/]+$/", $ip, $code); + if ($code[0] & SPAM_FILTER_SURBL_SC) { + spam_log(SPAM_IMPORTANT, 'spam_filter_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('SpamCop message-body URI domains'))), $type, $id); + } + if ($code[0] & SPAM_FILTER_SURBL_WS) { + spam_log(SPAM_IMPORTANT, 'spam_filter_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('sa-blacklist domains'))), $type, $id); + } + if ($code[0] & SPAM_FILTER_SURBL_PH) { + spam_log(SPAM_IMPORTANT, 'spam_filter_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('Phishing data source'))), $type, $id); + } + if ($code[0] & SPAM_FILTER_SURBL_OB) { + spam_log(SPAM_IMPORTANT, 'spam_filter_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('Outblaze URI blacklist'))), $type, $id); + } + if ($code[0] & SPAM_FILTER_SURBL_AB) { + spam_log(SPAM_IMPORTANT, 'spam_filter_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('AbuseButler spamvertised sites'))), $type, $id); + } + if ($code[0] & SPAM_FILTER_SURBL_JP) { + spam_log(SPAM_IMPORTANT, 'spam_filter_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('jwSpamSpy + Prolocation data source'))), $type, $id); + } + $action['spam_filter_surbl'][] = array( + 'url' => $url, + 'probability' => 99, + ); + $spam = TRUE; + } + else { + spam_log(SPAM_DEBUG, 'spam_filter_surbl_spam_filter', t('not spam url(@url)', array('@url' => $url)), $type, $id); + } + } + } + + if ($spam) { + $action['total'] = 99; + } + else { + $action['total'] = 0; + } + + return $action; +} + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_surbl/spam_filter_surbl.module~ sites/all/modules/spam/filters/spam_filter_surbl/spam_filter_surbl.module~ --- /home/files/coding/drupal/spam/filters/spam_filter_surbl/spam_filter_surbl.module~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_surbl/spam_filter_surbl.module~ 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,151 @@ +. + * + */ + +/** + * http://www.surbl.org/lists.html#multi bitmap + */ +define('SPAM_SURBL_SC', 2); +define('SPAM_SURBL_WS', 4); +define('SPAM_SURBL_PH', 8); +define('SPAM_SURBL_OB', 16); +define('SPAM_SURBL_AB', 32); +define('SPAM_SURBL_JP', 64); + +/** + * Spam hook_spamapi implementation. + */ +function spam_surbl_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + switch ($op) { + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_surbl', $type, $content, $fields, $extra)) return; + return spam_surbl_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_surbl'; + + case 'filter_info': + return array( + 'name' => t('Surbl filter'), + 'module' => t('spam_surbl'), + 'description' => t('A spam url filter.'), + 'help' => t('Look up URLs in SURBL to determine if is spam.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + 'gain' => 250, + 'weight' => -7, + ); + + } +} + +/** + * Extract URLs from content. + */ +function _spam_surbl_url_extract($content, $type, $fields, $extra = array()) { + static $urls = array(); + $id = spam_invoke_module($type, 'content_id', $content, $extra); + + if (is_object($content)) { + $content = (array)$content; + } + + if (!isset($urls["$type-$id"])) { + $string = ''; + foreach ($fields['main'] as $field) { + $string .= $content["$field"] .' '; + } + if (is_array($fields['other'])) { + foreach ($fields['other'] as $field) { + $string .= $content["$field"] .' '; + } + } + + // TODO: Improve this matching. We don't actually extract mailto: urls. + $URI = "(http://|https://|ftp://|mailto:)"; + // Find all urls in content. + preg_match_all("!(|[ \n\r\t\(]*)($URI([a-zA-Z0-9@:%_~#?&=.,/;-]*[a-zA-Z0-9@:%_~#&=/;-]))([.,?]?)(?=(
|[ \n\r\t\)]*))!i", $string, $matches); + $u = array(); + foreach ($matches[2] as $url) { + $url = preg_replace("'$URI'", '', $url); + // get full domain (ie www.sample.com) + preg_match("/^()?([^\/\"\']+)/i", $url, $domain); + // get root domain (ie sample.com) + preg_match("/[^\.\/]+\.[^\.\/]+$/", $domain[2], $root); + $u[md5($root[0])] = htmlspecialchars(drupal_strtolower($root[0])); + } + $urls["$type-$id"] = $u; + } + + return $urls["$type-$id"]; +} + +/** + * Search for known spam urls in content. + */ +function spam_surbl_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + $action = array(); + + $id = spam_invoke_module($type, 'content_id', $content, $extra); + $spam = FALSE; + + $urls = _spam_surbl_url_extract($content, $type, $fields, $extra); + + if (is_array($urls) && !empty($urls)) { + foreach ($urls as $url) { + $lookup = "$url.multi.surbl.org"; + $ip = gethostbyname($lookup); + if ($ip != $lookup) { + // this domain was in a SURBL, process accordingly + preg_match("/[^\.\/]+$/", $ip, $code); + if ($code[0] & SPAM_SURBL_SC) { + spam_log(SPAM_IMPORTANT, 'spam_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('SpamCop message-body URI domains'))), $type, $id); + } + if ($code[0] & SPAM_SURBL_WS) { + spam_log(SPAM_IMPORTANT, 'spam_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('sa-blacklist domains'))), $type, $id); + } + if ($code[0] & SPAM_SURBL_PH) { + spam_log(SPAM_IMPORTANT, 'spam_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('Phishing data source'))), $type, $id); + } + if ($code[0] & SPAM_SURBL_OB) { + spam_log(SPAM_IMPORTANT, 'spam_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('Outblaze URI blacklist'))), $type, $id); + } + if ($code[0] & SPAM_SURBL_AB) { + spam_log(SPAM_IMPORTANT, 'spam_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('AbuseButler spamvertised sites'))), $type, $id); + } + if ($code[0] & SPAM_SURBL_JP) { + spam_log(SPAM_IMPORTANT, 'spam_surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('jwSpamSpy + Prolocation data source'))), $type, $id); + } + $action['spam_surbl'][] = array( + 'url' => $url, + 'probability' => 99, + ); + $spam = TRUE; + } + else { + spam_log(SPAM_DEBUG, 'spam_surbl_spam_filter', t('not spam url(@url)', array('@url' => $url)), $type, $id); + } + } + } + + if ($spam) { + $action['total'] = 99; + } + else { + $action['total'] = 0; + } + + return $action; +} + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_url/spam_filter_url.info sites/all/modules/spam/filters/spam_filter_url/spam_filter_url.info --- /home/files/coding/drupal/spam/filters/spam_filter_url/spam_filter_url.info 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_url/spam_filter_url.info 2009-12-09 14:06:14.000000000 +1100 @@ -0,0 +1,13 @@ +; $Id: url.info,v 1.1.2.1.2.1 2008/12/25 05:42:11 jeremy Exp $ +name = Spam URL filter +description = A URL filter plug-in for the spam module. +package = Spam +dependencies[] = spam +dependencies[] = spam_filter_bayesian +core = 6.x +; Information added by drupal.org packaging script on 2009-09-01 +version = "6.x-1.x-dev" +core = "6.x" +project = "spam" +datestamp = "1251764880" + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_url/spam_filter_url.info~ sites/all/modules/spam/filters/spam_filter_url/spam_filter_url.info~ --- /home/files/coding/drupal/spam/filters/spam_filter_url/spam_filter_url.info~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_url/spam_filter_url.info~ 2009-12-09 14:06:14.000000000 +1100 @@ -0,0 +1,13 @@ +; $Id: url.info,v 1.1.2.1.2.1 2008/12/25 05:42:11 jeremy Exp $ +name = Spam URL filter +description = A URL filter plug-in for the spam module. +package = Spam +dependencies[] = spam +dependencies[] = spam_bayesian +core = 6.x +; Information added by drupal.org packaging script on 2009-09-01 +version = "6.x-1.x-dev" +core = "6.x" +project = "spam" +datestamp = "1251764880" + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_url/spam_filter_url.install sites/all/modules/spam/filters/spam_filter_url/spam_filter_url.install --- /home/files/coding/drupal/spam/filters/spam_filter_url/spam_filter_url.install 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_url/spam_filter_url.install 2009-12-09 14:32:27.000000000 +1100 @@ -0,0 +1,26 @@ +. All rights reserved. + * + */ + +function spam_filter_url_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + // Don't both with this hook unless the filter is actually enabled. + + switch ($op) { + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_url', $type, $content, $fields, $extra)) return; + return spam_filter_url_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_filter_url'; + + case 'filter_info': + return array( + 'name' => t('URL filter'), + 'module' => t('spam_filter_url'), + 'description' => t('A spam url filter.'), + 'help' => t('The url filter blocks posts containing spam-URLs, automatically learned with the bayesian filter module.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + 'gain' => 250, + 'weight' => -6, + ); + + case 'mark_as_spam': + case 'mark_as_not_spam': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_url', $type, $content, $fields, $extra)) return; + spam_log(SPAM_DEBUG, 'spam_filter_url_spamapi', t('@op', array('@op' => $op)), $type, $extra['id']); + $fields = spam_invoke_module($type, 'filter_fields', $extra['content']); + $spam_filter_urls = _spam_filter_url_extract($extra['content'], $type, $fields, $extra); + spam_filter_url_update($spam_filter_urls, ($op == 'mark_as_spam' ? TRUE : FALSE), $type, $extra['id']); + break; + } +} + +function _spam_filter_url_extract($content, $type, $fields, $extra = array()) { + static $spam_filter_urls = array(); + $id = spam_invoke_module($type, 'content_id', $content, $extra); + + if (is_object($content)) { + $content = (array)$content; + } + + if (!isset($spam_filter_urls["$type-$id"])) { + $string = ''; + foreach ($fields['main'] as $field) { + $string .= $content["$field"] .' '; + } + if (is_array($fields['other'])) { + foreach ($fields['other'] as $field) { + $string .= $content["$field"] .' '; + } + } + + // TODO: Improve this matching. We don't actually extract mailto: urls. + $URI = "(http://|https://|ftp://|mailto:)"; + // Find all urls in content. + preg_match_all("!(|[ \n\r\t\(]*)($URI([a-zA-Z0-9@:%_~#?&=.,/;-]*[a-zA-Z0-9@:%_~#&=/;-]))([.,?]?)(?=(
|[ \n\r\t\)]*))!i", $string, $matches); + foreach ($matches[2] as $spam_filter_url) { + $spam_filter_url = preg_replace("'$URI'", '', $spam_filter_url); + // get full domain (ie www.sample.com) + preg_match("/^()?([^\/\"\']+)/i", $spam_filter_url, $domain); + // get root domain (ie sample.com) + preg_match("/[^\.\/]+\.[^\.\/]+$/", $domain[2], $root); + $spam_filter_url = htmlspecialchars(drupal_strtolower($root[0])); + _spam_filter_url_count($spam_filter_url); + $u[] = $spam_filter_url; + } + $spam_filter_urls["$type-$id"] = $u; + } + + return $spam_filter_urls["$type-$id"]; +} + +/** + * Search for known spam urls in content. + */ +function spam_filter_url_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + $action = array(); + + $id = spam_invoke_module($type, 'content_id', $content, $extra); + $spam = FALSE; + + $spam_filter_urls = _spam_filter_url_extract($content, $type, $fields, $extra); + + if (is_array($spam_filter_urls) && !empty($spam_filter_urls)) { + $count = _spam_filter_url_count(); + + $limit = variable_get('spam_filter_url_limit_total', 10); + if ($limit > -1 && $count['total'] > $limit) { + spam_log(SPAM_VERBOSE, 'spam_filter_url_spam_filter', t('total urls(@total) > spam_filter_url_limit_total(@limit)', array('@total' => $count['total'], '@limit' => variable_get('spam_filter_url_limit_total', 10))), $type, $id); + $action['spam_filter_url'][] = array( + 'limit' => 'total', + 'total' => $count['total'], + ); + $action['total'] = 99; + return $action; + } + $limit = variable_get('spam_filter_url_limit_repeat', 5); + if ($limit > -1) { + // Sort urls from most repeated to least repeated. + asort($count); + + // skip count['total'] + array_pop($count); + + $max = array_pop($count); + if ($max > $limit) { + spam_log(SPAM_VERBOSE, 'spam_filter_url_spam_filter', t('repeated urls(@total) > spam_filter_url_limit_repeat(@limit)', array('@total' => $max, '@limit' => variable_get('spam_filter_url_limit_repeat', 5))), $type, $id); + $action['spam_filter_url'][] = array( + 'limit' => 'repeat', + 'total' => $max, + ); + $action['total'] = 99; + } + } + + foreach ($spam_filter_urls as $spam_filter_url) { + $p = db_fetch_object(db_query("SELECT probability FROM {spam_filter_bayesian_tokens} WHERE class = 'spam_filter_url' AND token = '%s'", $spam_filter_url)); + $action['spam_filter_url'][] = array( + 'spam_filter_url' => $spam_filter_url, + 'probability' => $p->probability, + ); + if ($p->probability >= variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD)) { + spam_log(SPAM_VERBOSE, 'spam_filter_url_spam_filter', t('found spam url(@url) probability(@probability)', array('@url' => $spam_filter_url, '@probability' => $p->probability)), $type, $id); + $spam = TRUE; + break; + } + spam_log(SPAM_DEBUG, 'spam_filter_url_spam_filter', t('not spam url(@url) probability(@probability)', array('@url' => $spam_filter_url, '@probability' => $p->probability)), $type, $id); + } + } + + if ($spam) { + $action['total'] = 99; + } + else { + $action['total'] = 0; + } + + return $action; +} + +/** + * Update url probabilities in database. + */ +function spam_filter_url_update($spam_filter_urls, $yes, $type, $id) { + module_invoke('spam_filter_bayesian', 'tokens_update', 'spam_filter_url', $spam_filter_urls, $yes, $type, $id); +} + +/** + * Keep track of the total number of URLs found in the current content. + * + * @param $spam_filter_url A URL to be added to a static array. + * @return Array of URLs showing how many times each URL is present, and + * the total number of arrays. + */ +function _spam_filter_url_count($spam_filter_url = NULL) { + // build up an array of all URLs seen in current content + static $spam_filter_urls = array(); + + if ($spam_filter_url != NULL) { + $spam_filter_urls["$spam_filter_url"]++; + $spam_filter_urls['total']++; + } + + return $spam_filter_urls; +} + diff -uprN /home/files/coding/drupal/spam/filters/spam_filter_url/spam_filter_url.module~ sites/all/modules/spam/filters/spam_filter_url/spam_filter_url.module~ --- /home/files/coding/drupal/spam/filters/spam_filter_url/spam_filter_url.module~ 1970-01-01 10:00:00.000000000 +1000 +++ sites/all/modules/spam/filters/spam_filter_url/spam_filter_url.module~ 2009-12-09 14:49:00.000000000 +1100 @@ -0,0 +1,182 @@ +. All rights reserved. + * + */ + +function spam_filter_url_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { + // Don't both with this hook unless the filter is actually enabled. + + switch ($op) { + case 'filter': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_url', $type, $content, $fields, $extra)) return; + return spam_filter_url_spam_filter($content, $type, $fields, $extra); + + case 'filter_module': + return 'spam_filter_url'; + + case 'filter_info': + return array( + 'name' => t('URL filter'), + 'module' => t('spam_filter_url'), + 'description' => t('A spam url filter.'), + 'help' => t('The url filter blocks posts containing spam-URLs, automatically learned with the bayesian filter module.'), + ); + break; + + case 'filter_install': + return array( + 'status' => SPAM_FILTER_ENABLED, + 'gain' => 250, + 'weight' => -6, + ); + + case 'mark_as_spam': + case 'mark_as_not_spam': + if (!module_invoke('spam', 'filter_enabled', 'spam_filter_url', $type, $content, $fields, $extra)) return; + spam_log(SPAM_DEBUG, 'spam_filter_url_spamapi', t('@op', array('@op' => $op)), $type, $extra['id']); + $fields = spam_invoke_module($type, 'filter_fields', $extra['content']); + $spam_filter_urls = _spam_filter_url_extract($extra['content'], $type, $fields, $extra); + spam_filter_url_update($spam_filter_urls, ($op == 'mark_as_spam' ? TRUE : FALSE), $type, $extra['id']); + break; + } +} + +function _spam_filter_url_extract($content, $type, $fields, $extra = array()) { + static $spam_filter_urls = array(); + $id = spam_invoke_module($type, 'content_id', $content, $extra); + + if (is_object($content)) { + $content = (array)$content; + } + + if (!isset($spam_filter_urls["$type-$id"])) { + $string = ''; + foreach ($fields['main'] as $field) { + $string .= $content["$field"] .' '; + } + if (is_array($fields['other'])) { + foreach ($fields['other'] as $field) { + $string .= $content["$field"] .' '; + } + } + + // TODO: Improve this matching. We don't actually extract mailto: urls. + $URI = "(http://|https://|ftp://|mailto:)"; + // Find all urls in content. + preg_match_all("!(|[ \n\r\t\(]*)($URI([a-zA-Z0-9@:%_~#?&=.,/;-]*[a-zA-Z0-9@:%_~#&=/;-]))([.,?]?)(?=(
|[ \n\r\t\)]*))!i", $string, $matches); + foreach ($matches[2] as $spam_filter_url) { + $spam_filter_url = preg_replace("'$URI'", '', $spam_filter_url); + // get full domain (ie www.sample.com) + preg_match("/^()?([^\/\"\']+)/i", $spam_filter_url, $domain); + // get root domain (ie sample.com) + preg_match("/[^\.\/]+\.[^\.\/]+$/", $domain[2], $root); + $spam_filter_url = htmlspecialchars(drupal_strtolower($root[0])); + _spam_filter_url_count($spam_filter_url); + $u[] = $spam_filter_url; + } + $spam_filter_urls["$type-$id"] = $u; + } + + return $spam_filter_urls["$type-$id"]; +} + +/** + * Search for known spam urls in content. + */ +function spam_filter_url_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { + $action = array(); + + $id = spam_invoke_module($type, 'content_id', $content, $extra); + $spam = FALSE; + + $spam_filter_urls = _spam_filter_url_extract($content, $type, $fields, $extra); + + if (is_array($spam_filter_urls) && !empty($spam_filter_urls)) { + $count = _spam_filter_url_count(); + + $limit = variable_get('spam_filter_url_limit_total', 10); + if ($limit > -1 && $count['total'] > $limit) { + spam_log(SPAM_VERBOSE, 'spam_filter_url_spam_filter', t('total urls(@total) > spam_filter_url_limit_total(@limit)', array('@total' => $count['total'], '@limit' => variable_get('spam_filter_url_limit_total', 10))), $type, $id); + $action['spam_filter_url'][] = array( + 'limit' => 'total', + 'total' => $count['total'], + ); + $action['total'] = 99; + return $action; + } + $limit = variable_get('spam_filter_url_limit_repeat', 5); + if ($limit > -1) { + // Sort urls from most repeated to least repeated. + asort($count); + + // skip count['total'] + array_pop($count); + + $max = array_pop($count); + if ($max > $limit) { + spam_log(SPAM_VERBOSE, 'spam_filter_url_spam_filter', t('repeated urls(@total) > spam_filter_url_limit_repeat(@limit)', array('@total' => $max, '@limit' => variable_get('spam_filter_url_limit_repeat', 5))), $type, $id); + $action['spam_filter_url'][] = array( + 'limit' => 'repeat', + 'total' => $max, + ); + $action['total'] = 99; + } + } + + foreach ($spam_filter_urls as $spam_filter_url) { + $p = db_fetch_object(db_query("SELECT probability FROM {spam_bayesian_tokens} WHERE class = 'spam_filter_url' AND token = '%s'", $spam_filter_url)); + $action['spam_filter_url'][] = array( + 'spam_filter_url' => $spam_filter_url, + 'probability' => $p->probability, + ); + if ($p->probability >= variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD)) { + spam_log(SPAM_VERBOSE, 'spam_filter_url_spam_filter', t('found spam url(@url) probability(@probability)', array('@url' => $spam_filter_url, '@probability' => $p->probability)), $type, $id); + $spam = TRUE; + break; + } + spam_log(SPAM_DEBUG, 'spam_filter_url_spam_filter', t('not spam url(@url) probability(@probability)', array('@url' => $spam_filter_url, '@probability' => $p->probability)), $type, $id); + } + } + + if ($spam) { + $action['total'] = 99; + } + else { + $action['total'] = 0; + } + + return $action; +} + +/** + * Update url probabilities in database. + */ +function spam_filter_url_update($spam_filter_urls, $yes, $type, $id) { + module_invoke('spam_bayesian', 'tokens_update', 'spam_filter_url', $spam_filter_urls, $yes, $type, $id); +} + +/** + * Keep track of the total number of URLs found in the current content. + * + * @param $spam_filter_url A URL to be added to a static array. + * @return Array of URLs showing how many times each URL is present, and + * the total number of arrays. + */ +function _spam_filter_url_count($spam_filter_url = NULL) { + // build up an array of all URLs seen in current content + static $spam_filter_urls = array(); + + if ($spam_filter_url != NULL) { + $spam_filter_urls["$spam_filter_url"]++; + $spam_filter_urls['total']++; + } + + return $spam_filter_urls; +} + diff -uprN /home/files/coding/drupal/spam/filters/surbl/surbl.info sites/all/modules/spam/filters/surbl/surbl.info --- /home/files/coding/drupal/spam/filters/surbl/surbl.info 2009-09-01 10:28:00.000000000 +1000 +++ sites/all/modules/spam/filters/surbl/surbl.info 1970-01-01 10:00:00.000000000 +1000 @@ -1,12 +0,0 @@ -; $Id: surbl.info,v 1.1.2.1.2.1 2008/12/25 05:42:10 jeremy Exp $ -name = Spam Surbl filter -description = A Surbl filter plug-in for the spam module. -package = Spam -dependencies[] = spam -core = 6.x -; Information added by drupal.org packaging script on 2009-09-01 -version = "6.x-1.x-dev" -core = "6.x" -project = "spam" -datestamp = "1251764880" - diff -uprN /home/files/coding/drupal/spam/filters/surbl/surbl.module sites/all/modules/spam/filters/surbl/surbl.module --- /home/files/coding/drupal/spam/filters/surbl/surbl.module 2008-12-25 16:42:10.000000000 +1100 +++ sites/all/modules/spam/filters/surbl/surbl.module 1970-01-01 10:00:00.000000000 +1000 @@ -1,151 +0,0 @@ -. - * - */ - -/** - * http://www.surbl.org/lists.html#multi bitmap - */ -define('SURBL_SC', 2); -define('SURBL_WS', 4); -define('SURBL_PH', 8); -define('SURBL_OB', 16); -define('SURBL_AB', 32); -define('SURBL_JP', 64); - -/** - * Spam hook_spamapi implementation. - */ -function surbl_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { - switch ($op) { - case 'filter': - if (!module_invoke('spam', 'filter_enabled', 'surbl', $type, $content, $fields, $extra)) return; - return surbl_spam_filter($content, $type, $fields, $extra); - - case 'filter_module': - return 'surbl'; - - case 'filter_info': - return array( - 'name' => t('Surbl filter'), - 'module' => t('surbl'), - 'description' => t('A spam url filter.'), - 'help' => t('Look up URLs in SURBL to determine if is spam.'), - ); - break; - - case 'filter_install': - return array( - 'status' => SPAM_FILTER_ENABLED, - 'gain' => 250, - 'weight' => -7, - ); - - } -} - -/** - * Extract URLs from content. - */ -function _surbl_url_extract($content, $type, $fields, $extra = array()) { - static $urls = array(); - $id = spam_invoke_module($type, 'content_id', $content, $extra); - - if (is_object($content)) { - $content = (array)$content; - } - - if (!isset($urls["$type-$id"])) { - $string = ''; - foreach ($fields['main'] as $field) { - $string .= $content["$field"] .' '; - } - if (is_array($fields['other'])) { - foreach ($fields['other'] as $field) { - $string .= $content["$field"] .' '; - } - } - - // TODO: Improve this matching. We don't actually extract mailto: urls. - $URI = "(http://|https://|ftp://|mailto:)"; - // Find all urls in content. - preg_match_all("!(|[ \n\r\t\(]*)($URI([a-zA-Z0-9@:%_~#?&=.,/;-]*[a-zA-Z0-9@:%_~#&=/;-]))([.,?]?)(?=(
|[ \n\r\t\)]*))!i", $string, $matches); - $u = array(); - foreach ($matches[2] as $url) { - $url = preg_replace("'$URI'", '', $url); - // get full domain (ie www.sample.com) - preg_match("/^()?([^\/\"\']+)/i", $url, $domain); - // get root domain (ie sample.com) - preg_match("/[^\.\/]+\.[^\.\/]+$/", $domain[2], $root); - $u[md5($root[0])] = htmlspecialchars(drupal_strtolower($root[0])); - } - $urls["$type-$id"] = $u; - } - - return $urls["$type-$id"]; -} - -/** - * Search for known spam urls in content. - */ -function surbl_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { - $action = array(); - - $id = spam_invoke_module($type, 'content_id', $content, $extra); - $spam = FALSE; - - $urls = _surbl_url_extract($content, $type, $fields, $extra); - - if (is_array($urls) && !empty($urls)) { - foreach ($urls as $url) { - $lookup = "$url.multi.surbl.org"; - $ip = gethostbyname($lookup); - if ($ip != $lookup) { - // this domain was in a SURBL, process accordingly - preg_match("/[^\.\/]+$/", $ip, $code); - if ($code[0] & SURBL_SC) { - spam_log(SPAM_IMPORTANT, 'surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('SpamCop message-body URI domains'))), $type, $id); - } - if ($code[0] & SURBL_WS) { - spam_log(SPAM_IMPORTANT, 'surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('sa-blacklist domains'))), $type, $id); - } - if ($code[0] & SURBL_PH) { - spam_log(SPAM_IMPORTANT, 'surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('Phishing data source'))), $type, $id); - } - if ($code[0] & SURBL_OB) { - spam_log(SPAM_IMPORTANT, 'surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('Outblaze URI blacklist'))), $type, $id); - } - if ($code[0] & SURBL_AB) { - spam_log(SPAM_IMPORTANT, 'surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('AbuseButler spamvertised sites'))), $type, $id); - } - if ($code[0] & SURBL_JP) { - spam_log(SPAM_IMPORTANT, 'surbl_spam_filter', t('found spam url(@url) @surbl', array('@url' => $url, '@surbl' => t('jwSpamSpy + Prolocation data source'))), $type, $id); - } - $action['surbl'][] = array( - 'url' => $url, - 'probability' => 99, - ); - $spam = TRUE; - } - else { - spam_log(SPAM_DEBUG, 'surbl_spam_filter', t('not spam url(@url)', array('@url' => $url)), $type, $id); - } - } - } - - if ($spam) { - $action['total'] = 99; - } - else { - $action['total'] = 0; - } - - return $action; -} - diff -uprN /home/files/coding/drupal/spam/filters/url/url.info sites/all/modules/spam/filters/url/url.info --- /home/files/coding/drupal/spam/filters/url/url.info 2009-09-01 10:28:00.000000000 +1000 +++ sites/all/modules/spam/filters/url/url.info 1970-01-01 10:00:00.000000000 +1000 @@ -1,13 +0,0 @@ -; $Id: url.info,v 1.1.2.1.2.1 2008/12/25 05:42:11 jeremy Exp $ -name = Spam URL filter -description = A URL filter plug-in for the spam module. -package = Spam -dependencies[] = spam -dependencies[] = bayesian -core = 6.x -; Information added by drupal.org packaging script on 2009-09-01 -version = "6.x-1.x-dev" -core = "6.x" -project = "spam" -datestamp = "1251764880" - diff -uprN /home/files/coding/drupal/spam/filters/url/url.module sites/all/modules/spam/filters/url/url.module --- /home/files/coding/drupal/spam/filters/url/url.module 2008-12-25 16:42:11.000000000 +1100 +++ sites/all/modules/spam/filters/url/url.module 1970-01-01 10:00:00.000000000 +1000 @@ -1,182 +0,0 @@ -. All rights reserved. - * - */ - -function url_spamapi($op, $type = NULL, $content = array(), $fields = array(), $extra = NULL) { - // Don't both with this hook unless the filter is actually enabled. - - switch ($op) { - case 'filter': - if (!module_invoke('spam', 'filter_enabled', 'url', $type, $content, $fields, $extra)) return; - return url_spam_filter($content, $type, $fields, $extra); - - case 'filter_module': - return 'url'; - - case 'filter_info': - return array( - 'name' => t('URL filter'), - 'module' => t('url'), - 'description' => t('A spam url filter.'), - 'help' => t('The url filter blocks posts containing spam-URLs, automatically learned with the bayesian filter module.'), - ); - break; - - case 'filter_install': - return array( - 'status' => SPAM_FILTER_ENABLED, - 'gain' => 250, - 'weight' => -6, - ); - - case 'mark_as_spam': - case 'mark_as_not_spam': - if (!module_invoke('spam', 'filter_enabled', 'url', $type, $content, $fields, $extra)) return; - spam_log(SPAM_DEBUG, 'url_spamapi', t('@op', array('@op' => $op)), $type, $extra['id']); - $fields = spam_invoke_module($type, 'filter_fields', $extra['content']); - $urls = _url_extract($extra['content'], $type, $fields, $extra); - url_update($urls, ($op == 'mark_as_spam' ? TRUE : FALSE), $type, $extra['id']); - break; - } -} - -function _url_extract($content, $type, $fields, $extra = array()) { - static $urls = array(); - $id = spam_invoke_module($type, 'content_id', $content, $extra); - - if (is_object($content)) { - $content = (array)$content; - } - - if (!isset($urls["$type-$id"])) { - $string = ''; - foreach ($fields['main'] as $field) { - $string .= $content["$field"] .' '; - } - if (is_array($fields['other'])) { - foreach ($fields['other'] as $field) { - $string .= $content["$field"] .' '; - } - } - - // TODO: Improve this matching. We don't actually extract mailto: urls. - $URI = "(http://|https://|ftp://|mailto:)"; - // Find all urls in content. - preg_match_all("!(|[ \n\r\t\(]*)($URI([a-zA-Z0-9@:%_~#?&=.,/;-]*[a-zA-Z0-9@:%_~#&=/;-]))([.,?]?)(?=(
|[ \n\r\t\)]*))!i", $string, $matches); - foreach ($matches[2] as $url) { - $url = preg_replace("'$URI'", '', $url); - // get full domain (ie www.sample.com) - preg_match("/^()?([^\/\"\']+)/i", $url, $domain); - // get root domain (ie sample.com) - preg_match("/[^\.\/]+\.[^\.\/]+$/", $domain[2], $root); - $url = htmlspecialchars(drupal_strtolower($root[0])); - _url_count($url); - $u[] = $url; - } - $urls["$type-$id"] = $u; - } - - return $urls["$type-$id"]; -} - -/** - * Search for known spam urls in content. - */ -function url_spam_filter($content, $type, $fields, $extra = array(), $filter_test = FALSE) { - $action = array(); - - $id = spam_invoke_module($type, 'content_id', $content, $extra); - $spam = FALSE; - - $urls = _url_extract($content, $type, $fields, $extra); - - if (is_array($urls) && !empty($urls)) { - $count = _url_count(); - - $limit = variable_get('url_limit_total', 10); - if ($limit > -1 && $count['total'] > $limit) { - spam_log(SPAM_VERBOSE, 'url_spam_filter', t('total urls(@total) > url_limit_total(@limit)', array('@total' => $count['total'], '@limit' => variable_get('url_limit_total', 10))), $type, $id); - $action['url'][] = array( - 'limit' => 'total', - 'total' => $count['total'], - ); - $action['total'] = 99; - return $action; - } - $limit = variable_get('url_limit_repeat', 5); - if ($limit > -1) { - // Sort urls from most repeated to least repeated. - asort($count); - - // skip count['total'] - array_pop($count); - - $max = array_pop($count); - if ($max > $limit) { - spam_log(SPAM_VERBOSE, 'url_spam_filter', t('repeated urls(@total) > url_limit_repeat(@limit)', array('@total' => $max, '@limit' => variable_get('url_limit_repeat', 5))), $type, $id); - $action['url'][] = array( - 'limit' => 'repeat', - 'total' => $max, - ); - $action['total'] = 99; - } - } - - foreach ($urls as $url) { - $p = db_fetch_object(db_query("SELECT probability FROM {bayesian_tokens} WHERE class = 'url' AND token = '%s'", $url)); - $action['url'][] = array( - 'url' => $url, - 'probability' => $p->probability, - ); - if ($p->probability >= variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD)) { - spam_log(SPAM_VERBOSE, 'url_spam_filter', t('found spam url(@url) probability(@probability)', array('@url' => $url, '@probability' => $p->probability)), $type, $id); - $spam = TRUE; - break; - } - spam_log(SPAM_DEBUG, 'url_spam_filter', t('not spam url(@url) probability(@probability)', array('@url' => $url, '@probability' => $p->probability)), $type, $id); - } - } - - if ($spam) { - $action['total'] = 99; - } - else { - $action['total'] = 0; - } - - return $action; -} - -/** - * Update url probabilities in database. - */ -function url_update($urls, $yes, $type, $id) { - module_invoke('bayesian', 'tokens_update', 'url', $urls, $yes, $type, $id); -} - -/** - * Keep track of the total number of URLs found in the current content. - * - * @param $url A URL to be added to a static array. - * @return Array of URLs showing how many times each URL is present, and - * the total number of arrays. - */ -function _url_count($url = NULL) { - // build up an array of all URLs seen in current content - static $urls = array(); - - if ($url != NULL) { - $urls["$url"]++; - $urls['total']++; - } - - return $urls; -} - diff -uprN /home/files/coding/drupal/spam/modules/spam_comment.inc sites/all/modules/spam/modules/spam_comment.inc --- /home/files/coding/drupal/spam/modules/spam_comment.inc 2009-08-07 06:37:19.000000000 +1000 +++ sites/all/modules/spam/modules/spam_comment.inc 1970-01-01 10:00:00.000000000 +1000 @@ -1,387 +0,0 @@ -cid))) { - $comment->status = COMMENT_NOT_PUBLISHED; - } - break; - } -} - -/** - * Cache the comment id to be sure it's available when we need it. - */ -function _spam_comment_cid($id = NULL) { - static $cid = 0; - - if (isset($id) && is_numeric($id)) { - $cid = $id; - } - - return $cid; -} - -/** - * Spam module _spamapi() hook. - */ -function comment_spamapi($op, $arg1 = NULL, $arg2 = NULL, $arg3 = NULL) { - switch ($op) { - case 'content_module': - // Register with the spam api as a content type module. - return 'comment'; - - case 'content_id': - // Tell the spam module the id of a given comment. - if (is_object($arg1)) { - // The delete hook uses an object instead of an array. - $arg1 = (array)$arg1; - } - return _spam_comment_cid($arg1['cid']); - - case 'content_types': - // Register the "comment" content type with the spam module. - return array(array( - 'name' => t('comments'), - 'module' => t('comment'), - 'title' => t('Comments'), - 'description' => t('Check this box to filter comments for spam.'), - 'default_value' => 1, - )); - - case 'filter_content_type': - return (variable_get('spam_filter_comments', 1)); - - case 'filter_fields': - // Tell spam module which fields it should scan for spam. - $fields['main'] = array('subject', 'comment'); - if (is_object($arg1)) { - // The delete hook uses an object instead of an array. - $arg1 = (array)$arg1; - } - if (isset($arg1['author'])) { - $fields['other'][] = 'author'; - } - if (isset($arg1['name'])) { - $fields['other'][] = 'name'; - } - if (isset($arg1['mail'])) { - $fields['other'][] = 'mail'; - } - if (isset($arg1['homepage'])) { - $fields['other'][] = 'homepage'; - } - return $fields; - - case 'feedback_form': - $form = array(); - if (is_numeric($form['cid'])) { - $form['cid'] = array( - '#type' => 'textfield', - '#title' => t('Comment ID'), - '#value' => $arg1['cid'], - '#disabled' => TRUE, - ); - } - // fall through... - case 'error_form': - if (!is_array($form)) { - $form = array(); - } - $form['comment'] = array( - '#type' => 'fieldset', - '#title' => 'Comment', - ); - $form['comment']['title'] = array( - '#type' => 'textfield', - '#title' => t('Subject'), - '#value' => $arg1['subject'], - '#disabled' => TRUE, - ); - $form['comment']['body'] = array( - '#type' => 'textarea', - '#title' => t('Comment'), - '#value' => $arg1['comment'], - '#disabled' => TRUE, - ); - $form['comment']['author'] = array( - '#type' => 'markup', - '#prefix' => 'Your posting on @site from %IP has been automatically flagged by our spam filters as being inappropriate for this website.
At @site we work very hard behind the scenes to keep our web pages free of spam. Unfortunately, sometimes we accidentally block legitimate content. If you are attempting to post legitimate content to this website, you can help us to improve our spam filters by emailing the following information to a site administrator:
%LINK
', array('@site' => variable_get('site_name', 'Drupal')))), + '#description' => t('Message to show visitors when the spam filters block them from posting content. The text "%IP" will be replaced by the visitors actual IP address.') + ); + + // TODO: These options are for debugging the spam module. They should be + // disabled before the module is released. + $form['advanced'] = array( + '#type' => 'fieldset', + '#title' => t('Advanced configuration'), + '#collapsible' => TRUE, + '#collapsed' => TRUE, + ); + $options = drupal_map_assoc(spam_range(10, 40, 10)) + drupal_map_assoc(spam_range(45, 70, 5)) + drupal_map_assoc(spam_range(72, 88, 2)) + drupal_map_assoc(spam_range(90, 99)); + $form['advanced']['spam_threshold'] = array( + '#type' => 'select', + '#title' => t('Spam threshold'), + '#options' => $options, + '#default_value' => variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD), + '#description' => t('Each of filtered content will be assigned a single number from 1 to 99. This number signifies the percent of likelihood that the filtered content is spam. Any piece of content whose spam value is equal to or greater than this threshold will be considered spam. Any piece of content whose spam value is less than this threshold will be considered not spam.'), + ); + $form['advanced']['spam_log_level'] = array( + '#type' => 'select', + '#title' => t('Log level'), + '#options' => array(0 => t('Disabled'), SPAM_LOG => t('Important'), SPAM_VERBOSE => t('Verbose'), SPAM_DEBUG => t('Debug')), + '#default_value' => variable_get('spam_log_level', SPAM_LOG), + // TODO: Add informative description. + '#description' => t('Logging level.'), + ); + $period = drupal_map_assoc(array(0, 3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 4838400, 9676800, 31536000), 'format_interval'); + $period[0] = t('never'); + $form['advanced']['spam_log_delete'] = array( + '#type' => 'select', + '#title' => t('Discard spam logs older than'), + '#default_value' => variable_get('spam_log_delete', 259200), + '#options' => $period, + '#description' => t('Older spam log entries will be automatically discarded. (Requires a correctly configured cron maintenance task.)', array('@cron' => url('admin/reports/status'))) + ); + + return system_settings_form($form); +} + +/** + * Determine if we should be filtering a given content type. + */ +function spam_filter_content_type($content, $type, $extra) { + $filter = spam_invoke_module($type, 'filter_content_type', $content, $extra); + if (!$filter) { + spam_log(SPAM_DEBUG, 'spam_filter_content_type', t('not configured to scan this content type'), $type, $id); + } + return $filter; +} + +/** + * Determine if a given filter is enabled. + */ +function spam_filter_enabled($filter, $type, $content, $fields, $extra) { + return db_result(db_query("SELECT status FROM {spam_filters} WHERE module = '%s'", $filter)); +} + +/** + * Check if any new spam filters are available for installation. + */ +function spam_init_filters() { + static $initialized = FALSE; + + if (!$initialized) { + $modules = spam_invoke_api('filter_module'); + dprint_r($modules); + foreach ($modules as $module) { + $filter = spam_invoke_module($module, 'filter_info'); + $fid = db_result(db_query_range("SELECT fid FROM {spam_filters} WHERE name = '%s' AND module = '%s'", $filter['name'], $filter['module'], 0, 1)); + if (!$fid) { + spam_install_filter($filter); + } + } + } +} + +/** + * Install the named spam filter, making it available for detecting spam + * content. It will be configured per any defaults defined by the filter. + * + * @param $filter array + * array - must contain 'name' and 'module' elements + */ +function spam_install_filter($filter) { + // Typically we install a filter that's never been installed before. But + // it's also possible to use this function to restore a filter to its default + // settings. + db_query("DELETE FROM {spam_filters} WHERE name = '%s' AND module = '%s'", $filter['name'], $filter['module']); + $default['name'] = $filter['name']; + $default['module'] = $filter['module']; + $default['status'] = SPAM_FILTER_ENABLED; + $default['weight'] = 0; + $default['gain'] = 100; + // Allow module to override defaults. The module can also set other defaults + // when this hook is called. + $defaults = spam_invoke_module($filter['module'], 'filter_install', NULL, array(), array(), $default); + foreach ($defaults as $key => $value) { + $default[$key] = $value; + } + db_query("INSERT INTO {spam_filters} (name, module, status, weight, gain) VALUES('%s', '%s', %d, %d, %d)", $default['name'], $default['module'], $default['status'], $default['weight'], $default['gain']); +} + +/** + * As the spam module isn't a core Drupal module, many important modules won't + * utilize its API. We define the appropriate hooks for these modules in the + * modules/ subdirectory. For example, we define the spam api hooks for the + * node module in modules/spam_node.inc. + */ +function spam_init_api() { + static $initialized = FALSE; + + if (!$initialized) { + // We only need to module_load_include('initialized = TRUE', 'spam', ''); + $path = drupal_get_path('module', 'spam') .'/modules'; + // These files must be names spam_custom_*.inc, such as spam_node.inc. + $files = drupal_system_listing('spam_content_.*\.inc$', $path, 'name', 0); + foreach ($files as $file) { + $module = substr_replace($file->name, '', 0, 13); + if (module_exists($module)) { + require_once './' . $file->filename; + } + } + } +} + +/** + * Invoke spam API functions defined by other modules. + */ +function spam_invoke_api() { + $args = func_get_args(); + array_unshift($args, 'spamapi'); + $contentblah = call_user_func_array('module_invoke_all', $args); + return $contentblah; +} + +/** + * Invoke spam API functions in a specific module. + */ +function spam_invoke_module() { + $args = func_get_args(); + $module = array_shift($args); + print_r($args,$module); + array_unshift($args, $module, 'spamapi'); + return call_user_func_array('module_invoke', $args); +} + +/** + * Manage spam content. + */ +function spam_admin_list() { + $output = drupal_get_form('spam_filter_form'); + $output .= drupal_get_form('spam_admin_overview'); + return $output; +} + +/** + * Spam feedback overview. + */ +function spam_admin_list_feedback() { + $header = array( + array('data' => t('Date'), 'field' => 'timestamp', 'sort' => 'desc'), + array('data' => t('Type'), 'field' => 'content_type'), + array('data' => t('From'), 'field' => 'hostname'), + array('data' => t('Preview')), + array('data' => t('Options'))); + $sql = 'SELECT * FROM {spam_filters_errors}'; + $sql .= tablesort_sql($header); + $result = pager_query($sql, 25); + + $rows = array(); + while ($feedback = db_fetch_object($result)) { + $row = array(); + $row[] = array('data' => format_date($feedback->timestamp, 'small')); + $row[] = array('data' => $feedback->content_type); + $row[] = array('data' => $feedback->hostname); + $row[] = array('data' => _spam_truncate($feedback->feedback, 32)); + $row[] = l(t('view'), "admin/content/spam/feedback/$feedback->bid"); + $rows[] = $row; + } + + $output = theme('table', $header, $rows); + $output .= theme('pager', NULL, 25, 0); + return $output; +} + +/** + * Spam feedback details. + */ +function spam_admin_feedback_form($form_state, $bid) { + $form = array(); + + $feedback = db_fetch_object(db_query('SELECT * FROM {spam_filters_errors} WHERE bid = %d', $bid)); + + $form = spam_invoke_module($feedback->content_type, 'feedback_form', unserialize($feedback->content)); + if (!is_array($form)) { + $form = array(); + } + + $form['date'] = array( + '#type' => 'markup', + '#prefix' => ''. t('To filter on the Title or the Status, you must first filter on the Type.') .'
'; + } + + return $output; +} + +/** + * A filterable list of spam. + */ +function spam_admin_overview() { + $filter = spam_build_filter_query(); + $result = pager_query('SELECT t.* FROM {spam_tracker} t '. $filter['join'] .' '. $filter['where'] .' ORDER BY t.timestamp DESC', 50, 0, NULL, $filter['args']); + + $form['options'] = array('#type' => 'fieldset', + '#title' => t('Update options'), + '#prefix' => 'Your posting on @site from %IP has been automatically flagged by our spam filters as being inappropriate for this website.
At @site we work very hard to keep our web pages free of spam. Unfortunately, sometimes we accidentally block legitimate content. If you are attempting to post legitimate content to this website, you can help us to improve our spam filters and ensure that your post appears on our website by clicking this link:%LINK', array('@site' => variable_get('site_name', 'Drupal'), ))), array('%IP' => ip_address(), '%LINK' => _spam_error_link($_SESSION['content']))); + } + if (!$title) { + $title = t('Your posting was blocked by our spam filter.'); + } + drupal_set_title($title); + print theme('maintenance_page', filter_xss_admin($message)); +} + +/** + * Allow the user to report when their content was inapropriately marked as + * spam. + */ +function spam_denied_in_error_page() { + if ($_SESSION['content']) { + $content = unserialize($_SESSION['content']); + if (is_array($content)) { + $hash = md5($_SESSION['content']); + $exists = db_result(db_query("SELECT bid FROM {spam_filters_errors} WHERE content_hash = '%s'", $hash)); + if ($exists) { + $output = t('You have already reported this content as not spam. Please be patient; a site administrator will review it soon.'); + } + else { + return drupal_get_form('spam_error_page'); + } + } + } + return $output; +} + +/** + * Require user reporting non-spam to submit feedback. + */ +// TODO: add captcha +function spam_error_page() { + $content = unserialize($_SESSION['content']); + $type = $_SESSION['type']; + $form = $_SESSION['spam_form']; + + $form = spam_invoke_module($type, 'error_form', $content); + if (!is_array($form)) { + $form = array(); + } + + $form['feedback'] = array( + '#type' => 'textarea', + '#title' => t('Feedback'), + '#required' => TRUE, + '#description' => t('Please offer some feedback to the site administrator, explaining how your content is relevant to this website.'), + ); + + $form['submit'] = array( + '#type' => 'submit', + '#value' => t('Send'), + ); + return $form; +} + +/** + * Store reported legitimate content in database. + */ +function spam_error_page_submit($form, &$form_state) { + global $user; + $content = unserialize($_SESSION['content']); + $type = $_SESSION['type']; + $id = spam_invoke_module($type, 'content_id', $content); + $hash = md5($_SESSION['content']); + if (is_array($_SESSION['spam_form'])) { + $spam_form = serialize($_SESSION['spam_form']); + } + else { + $spam_form = $_SESSION['spam_form']; + } + db_query("INSERT INTO {spam_filters_errors} (uid, content_type, content_id, content_hash, content, form, hostname, feedback, timestamp) VALUES(%d, '%s', %d, '%s', '%s', '%s', '%s', '%s', %d)", $user->uid, $type, $id, $hash, $_SESSION['content'], $spam_form, ip_address() , $form_state['values']['feedback'], time()); + $_SESSION['content'] = $_SESSION['type'] = $_SESSION['spam_form'] = ''; + drupal_set_message(t('Your feedback will be reviewed by a site administrator.')); + drupal_goto(''); +} + +/** + * Add the appropriate links to all content that is actively being filtered. + */ +function spam_links($type, $id, $content) { + $links = array(); + if (spam_invoke_module($type, 'filter_content_type', $content)) { + if (user_access('administer spam')) { + $score = (int)db_result(db_query("SELECT score FROM {spam_tracker} WHERE content_type = '%s' AND content_id = %d", $type, $id)); + + if ($score >= variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD)) { + $links['spam'] = array('title' => t('spam (@score)', array('@score' => $score))); + $links['mark-as-not-spam'] = array('href' => "spam/$type/$id/not_spam", 'title' => t('mark as not spam')); + } + else { + $links['spam'] = array('title' => t('not spam (@score)', array('@score' => $score))); + $links['mark-as-spam'] = array('href' => "spam/$type/$id/spam", 'title' => t('mark as spam')); + } + } + } + return $links; +} + +/** + * Invoke appropriate actions for marking content as spam. + * TODO: Integrate with the Actions module, making actions fully configurable. + */ +function spam_mark_as_spam($type, $id, $extra = array()) { + // TODO: Fix this loop + static $loop = array(); + if (isset($loop[$id])) { + spam_log(SPAM_DEBUG, 'spam_mark_as_spam', t('FIX ME: looping'), $type, $id); + return; + } + $loop[$id] = TRUE; + + spam_update_statistics(t('@type marked as spam', array('@type' => $type))); + $extra['sid'] = db_result(db_query("SELECT sid FROM {spam_tracker} WHERE content_type = '%s' AND content_id = %d", $type, $id)); + if (!$extra['score']) { + $extra['score'] = 99; + } + spam_log(SPAM_VERBOSE, 'spam_mark_as_spam', t('marked as spam, score(@score)', array('@score' => $extra['score'])), $type, $id); + if ($extra['sid']) { + db_query('UPDATE {spam_tracker} SET score = %d WHERE sid = %d', $extra['score'], $extra['sid']); + $extra['content'] = spam_invoke_module($type, 'load', $id); + } + else { + $hostname = spam_invoke_module($type, 'hostname', $id); + db_query("INSERT INTO {spam_tracker} (content_type, content_id, score, hostname, timestamp) VALUES('%s', %d, %d, '%s', %d)", $type, $id, $extra['score'], $hostname, time()); + $extra['sid'] = db_result(db_query("SELECT sid FROM {spam_tracker} WHERE content_type = '%s' AND content_id = %d", $type, $id)); + $extra['content'] = spam_invoke_module($type, 'load', $id); + } + $extra['id'] = $id; + spam_invoke_api('mark_as_spam', $type, array(), array(), $extra); + + if ($id) { + // For now, we're hard coding the actions... + spam_unpublish($type, $id); + } + + if ($extra['redirect']) { + spam_invoke_module($type, 'redirect', $id); + } +} + +/** + * Invoke appropriate actions for marking content as not spam. + * TODO: Integrate with the Actions module, making actions fully configurable. + */ +function spam_mark_as_not_spam($type, $id, $extra = array()) { + // TODO: Fix this loop + static $loop = array(); + if (isset($loop[$id])) { + spam_log(SPAM_DEBUG, 'spam_mark_as_not_spam', t('FIX ME: looping'), $type, $id); + return; + } + $loop[$id] = TRUE; + + spam_update_statistics(t('@type marked as not spam', array('@type' => $type))); + $extra['sid'] = db_result(db_query("SELECT sid FROM {spam_tracker} WHERE content_type = '%s' AND content_id = %d", $type, $id)); + if (!$extra['score']) { + $extra['score'] = 1; + } + spam_log(SPAM_VERBOSE, 'spam_mark_as_not_spam', t('marked as not spam, score(@score)', array('@score' => $extra['score'])), $type, $id); + if ($extra['sid']) { + db_query('UPDATE {spam_tracker} SET score = %d WHERE sid = %d', $extra['score'], $extra['sid']); + } + else if ($id) { + $hostname = spam_invoke_module($type, 'hostname', $id); + db_query("INSERT INTO {spam_tracker} (content_type, content_id, score, hostname, timestamp) VALUES('%s', %d, %d, '%s', %d)", $type, $id, $extra['score'], $hostname, time()); + $extra['sid'] = db_result(db_query("SELECT sid FROM {spam_tracker} WHERE content_type = '%s' AND content_id = %d", $type, $id)); + } + if (!isset($extra['content'])) { + $extra['content'] = spam_invoke_module($type, 'load', $id); + } + + $extra['id'] = $id; + spam_invoke_api('mark_as_not_spam', $type, array(), array(), $extra); + + if ($id) { + // For now, we're hard coding the actions... + spam_publish($type, $id); + } + + if ($extra['redirect']) { + spam_invoke_module($type, 'redirect', $id); + } +} + +/** + * Extract text from content array. + */ +function spam_get_text($content, $type, $fields, $extra = array(), $full = TRUE) { + if (is_object($content)) { + $content = (array)$content; + } + + $text = ''; + + foreach ($fields['main'] as $field) { + $text .= $content[$field] .' '; + } + if ($full && is_array($fields['other'])) { + foreach ($fields['other'] as $field) { + $text .= $content[$field] .' '; + } + } + return $text; +} + +/** + * Write to the spam_log database table. + */ +function spam_log($level, $function, $message, $type = NULL, $id = NULL) { + global $user; + + $trid = _spam_log_trace($message, $type, $id); + + if (variable_get('spam_log_level', SPAM_LOG) >= $level) { + db_query("INSERT INTO {spam_log} (level, trid, content_type, content_id, uid, function, message, hostname, timestamp) VALUES(%d, %d, '%s', %d, %d, '%s', '%s', '%s', %d)", $level, $trid, $type, $id, $user->uid, $function, $message, ip_address(), time()); + } +} + +/** + * Maintain a "trace id", allowing easy tracing of all spam actions for each + * page load. Only active if logging is set to verbose or higher. + */ +function _spam_log_trace($message, $type, $id) { + global $user; + static $trid = NULL; + + if (!$trid && (variable_get('spam_log_level', SPAM_DEBUG) >= SPAM_VERBOSE)) { + $key = md5(microtime() . $message); + db_query("INSERT INTO {spam_log} (level, content_type, content_id, uid, function, message, hostname, timestamp) VALUES(%d, '%s', %d, %d, '%s', '%s', '%s', %d)", SPAM_VERBOSE, $type, $id, $user->uid, '_spam_log_trace', $key, ip_address(), time()); + $trid = db_result(db_query("SELECT lid FROM {spam_log} WHERE message = '%s'", $key)); + if ($trid) { + db_query("UPDATE {spam_log} SET trid = %d, message = '%s' WHERE lid = %d", $trid, t('--'), $trid); + } + else { + $trid = 1; + spam_log(SPAM_LOG, '_spam_log_trace', t('Failed to obtain a valid trid.')); + } + } + return $trid; +} + +/** + * Display statistics overview. + */ +function spam_logs_statistics() { + drupal_set_title("Spam statistics"); + + $statistics = array(); + + $stats = array( + array( + 'title' => 'scanned @module', + 'query' => 'scan %s', + ), + array( + 'title' => 'prevented @module spam', + 'query' => 'prevented spam %s', + ), + array( + 'title' => 'marked @module as spam', + 'query' => '%s marked as spam', + ), + array( + 'title' => 'manually marked @module as spam', + 'query' => '%s manually marked as spam', + ), + array( + 'title' => 'marked @module as not spam', + 'query' => '%s marked as not spam', + ), + array( + 'title' => 'manually marked @module as not spam', + 'query' => '%s manually marked as not spam', + ), + ); + + $header = array('', t('Action'), t('Count'), t('Last')); + $displayed = array(); + $modules = spam_invoke_api('content_module'); + foreach ($modules as $module) { + foreach ($stats as $stat) { + $query = str_replace('@name', $stat['query'], "SELECT * FROM {spam_statistics} WHERE name = '@name'"); + if ($result = db_fetch_object(db_query($query, $module))) { + $row = array(); + if (!isset($displayed[$module])) { + $displayed[$module] = TRUE; + $row[] = array('data' => "$module", 'colspan' => 4); + $rows[] = $row; + $row = array(); + } + $row[] = ''; + $row[] = array('data' => t($stat['title'], array('@module' => $module))); + $row[] = array('data' => number_format($result->count)); + $row[] = array('data' => t('@time ago', array('@time' => format_interval(time() - $result->timestamp)))); + $rows[] = $row; + } + } + } + + $output = theme('table', $header, $rows); + return $output; +} + +/** + * Display an overview of the latest spam_log entries. + */ +function spam_logs_overview($type = NULL, $id = NULL) { + drupal_set_title(t('Spam module logs')); + + $header = array( + array('data' => t('type'), 'field' => 'content_type'), + array('data' => t('id'), 'field' => 'content_id'), + array('data' => t('date'), 'field' => 'lid', 'sort' => 'desc'), + array('data' => t('message'), 'field' => 'message'), + array('data' => t('user'), 'field' => 'uid'), + array('data' => t('operations')), + ); + + if ($id) { + $sql = "SELECT * FROM {spam_log} WHERE content_type = '%s' AND content_id = %d"; + $arguments = array($type, $id); + } + else if ($type) { + $sql = "SELECT * FROM {spam_log} WHERE content_type = '%s'"; + $arguments = array($type); + } + else { + $sql = "SELECT * FROM {spam_log}"; + $arguments = array(); + } + + $result = pager_query($sql . tablesort_sql($header), 50, 0, NULL, $arguments); + + while ($log = db_fetch_object($result)) { + $options = ''; + if ($log->trid > 1) { + $options = l(t('trace'), "admin/reports/spam/$log->trid/trace") .' | '; + } + $options .= l(t('detail'), "admin/reports/spam/$log->lid/detail"); + $rows[] = array('data' => array( + t($log->content_type), + $log->content_id, + format_date($log->timestamp, 'small'), + truncate_utf8($log->message, 64) . (drupal_strlen($log->message) > 64 ? '...' : ''), + theme('username', user_load(array('uid' => $log->uid))), + $options, + ) + ); + } + + if (!$rows) { + $rows[] = array(array('data' => t('No log messages available.'), 'colspan' => 6)); + } + + return theme('table', $header, $rows) . theme('pager', NULL, 50, 0); +} + +/** + * Displays complete information about a single log entry. + */ +function spam_logs_entry($id = NULL) { + if (!$id) { + return NULL; + } + + $breadcrumb[] = l(t('Home'), NULL); + $breadcrumb[] = l(t('Administer'), 'admin'); + $breadcrumb[] = l(t('Logs'), 'admin/reports'); + $breadcrumb[] = l(t('Spam'), 'admin/reports/spam'); + $breadcrumb[] = l(t('Spam module log entry'), 'admin/reports/spam/detail'); + drupal_set_breadcrumb($breadcrumb); + + $message = db_fetch_object(db_query('SELECT * FROM {spam_log} WHERE lid = %d', $id)); + + if ($message->content_type) { + $table[] = array( array('data' => t('Content type'), 'header' => TRUE), array('data' => l(t($message->content_type), "admin/reports/spam/$message->content_type")) ); + } + else { + $table[] = array( array('data' => t('Content type'), 'header' => TRUE), array('data' => t('unknown')) ); + } + if ($message->content_id) { + $table[] = array( array('data' => t('!type ID', array('!type' => drupal_ucfirst($message->content_type))), 'header' => TRUE), array('data' => l(t($message->content_id), "admin/reports/spam/$message->content_type/$message->content_id")) ); + } + $table[] = array( array('data' => t('Date'), 'header' => TRUE), array('data' => format_date($message->timestamp, 'large')) ); + $table[] = array( array('data' => t('User'), 'header' => TRUE), array('data' => theme('username', user_load(array('uid' => $message->uid)))) ); + $table[] = array( array('data' => t('Spam module function'), 'header' => TRUE), array('data' => $message->function) ); + $table[] = array( array('data' => t('Message'), 'header' => TRUE), array('data' => $message->message) ); + $table[] = array( array('data' => t('Hostname'), 'header' => TRUE), array('data' => $message->hostname) ); + $table[] = array( array('data' => t('Options'), 'header' => TRUE), array('data' => l(t('trace'), "admin/reports/spam/$message->trid/trace")) ); + return theme('table', NULL, $table); +} + +/** + * Trace all logs generated by the same page load. + */ +function spam_logs_trace($trid = NULL) { + if (!$trid) return; + + drupal_set_title(t('Spam module logs trace')); + + $breadcrumb[] = l(t('Home'), NULL); + $breadcrumb[] = l(t('Administer'), 'admin'); + $breadcrumb[] = l(t('Logs'), 'admin/reports'); + $breadcrumb[] = l(t('Spam'), 'admin/reports/spam'); + $breadcrumb[] = l(t('Spam module log trace'), 'admin/reports/spam/trace'); + drupal_set_breadcrumb($breadcrumb); + + $header = array( + array('data' => t('type'), 'field' => 'content_type'), + array('data' => t('id'), 'field' => 'content_id'), + array('data' => t('date'), 'field' => 'lid', 'sort' => 'asc'), + array('data' => t('function'), 'field' => 'function'), + array('data' => t('message'), 'field' => 'message'), + array('data' => t('user'), 'field' => 'uid'), + array('data' => t('operations')), + ); + + $sql = "SELECT * FROM {spam_log} WHERE trid = %d"; + $arguments = array($trid); + + $result = pager_query($sql . tablesort_sql($header), 50, 0, NULL, $arguments); + + while ($log = db_fetch_object($result)) { + $options = l(t('detail'), "admin/reports/spam/$log->lid/detail"); + $rows[] = array('data' => array( + t($log->content_type), + $log->content_id, + format_date($log->timestamp, 'small'), + truncate_utf8($log->function, 20) . (drupal_strlen($log->function) > 20 ? '...' : ''), + truncate_utf8($log->message, 64) . (drupal_strlen($log->message) > 64 ? '...' : ''), + theme('username', user_load(array('uid' => $log->uid))), + $options, + ) + ); + } + + if (!$rows) { + $rows[] = array(array('data' => t('No log messages available.'), 'colspan' => 6)); + } + + return theme('table', $header, $rows) . theme('pager', NULL, 50, 0); +} + +function spam_score_is_spam($score) { + if ($score >= variable_get('spam_threshold', SPAM_DEFAULT_THRESHOLD)) { + return (TRUE); + } + else { + return FALSE; + } +} + +/** + * Support PHP4 which has no 'step' parameter in its range() function. + */ +function spam_range($low, $high, $step = 1) { + if (version_compare(phpversion(), '5') < 0) { + // Emultate range with a step paramater for PHP4 users. + $rng = array(); + for ($i = $low; $i <= $high; $i+=$step) { + $rng[] = $i; + } + return $rng; + } + else { + return range($low, $high, $step); + } +} + +/** + * Invoke unpublish action for given content type. + * TODO: Integrate with the Actions module. + */ +function spam_unpublish($type, $id, $extra = array()) { + spam_log(SPAM_VERBOSE, 'spam_unpublish', t('unpublished'), $type, $id); + spam_invoke_module($type, 'unpublish', $id, $extra); + cache_clear_all(); + spam_update_statistics(t('unpublish @type', array('@type' => $type))); +} + +/** + * Invoke unpublish action for given content type. + * TODO: Integrate with the Actions module. + */ +function spam_publish($type, $id, $extra = array()) { + spam_log(SPAM_VERBOSE, 'spam_publish', t('published'), $type, $id); + spam_invoke_module($type, 'publish', $id, $extra); + cache_clear_all(); + spam_update_statistics(t('publish @type', array('@type' => $type))); +}