Index: modules/search/search.module =================================================================== RCS file: /cvs/drupal/drupal/modules/search/search.module,v retrieving revision 1.348 diff -u -p -r1.348 search.module --- modules/search/search.module 9 May 2010 19:46:11 -0000 1.348 +++ modules/search/search.module 31 May 2010 17:56:44 -0000 @@ -1081,8 +1081,11 @@ function search_excerpt($keys, $text) { preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' ' . $keys, $matches); $keys = array_merge($matches[2], $matches[3]); - // Prepare text - $text = ' ' . strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)) . ' '; + // Prepare text by stripping HTML tags and decoding HTML entities. + $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)); + $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); + + // Slash-escape quotes in the search keyword string. array_walk($keys, '_search_excerpt_replace'); $workkeys = $keys; @@ -1112,9 +1115,12 @@ function search_excerpt($keys, $text) { // $q) and behind it (position $s) if (preg_match('/' . $boundary . $key . $boundary . '/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) { $p = $match[0][1]; - if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) { - $end = substr($text, $p, 80); + if (($q = strpos(' ' . $text, ' ', max(0, $p - 61))) !== FALSE) { + $end = substr($text . ' ', $p, 80); if (($s = strrpos($end, ' ')) !== FALSE) { + // Account for the added spaces. + $q = max($q - 1, 0); + $s = min($s, drupal_strlen($end) - 1); $ranges[$q] = $p + $s; $length += $p + $s - $q; $included[$key] = $p + 1; @@ -1133,9 +1139,11 @@ function search_excerpt($keys, $text) { } } - // If we didn't find anything, return the beginning. if (count($ranges) == 0) { - return truncate_utf8($text, 256, TRUE, TRUE); + // We didn't find any keyword matches, so just return the first part of the + // text. We also need to re-encode any HTML special characters that we + // entity-decoded above. + return htmlspecialchars(truncate_utf8($text, 256, TRUE, TRUE)); } // Sort the text ranges by starting position. @@ -1165,7 +1173,9 @@ function search_excerpt($keys, $text) { foreach ($newranges as $from => $to) { $out[] = substr($text, $from, $to - $from); } - $text = (isset($newranges[0]) ? '' : '... ') . implode(' ... ', $out) . ' ...'; + + $text = (isset($newranges[0]) ? '' : t('... ')) . implode(t(' ... '), $out) . t(' ...'); + $text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8'); // Highlight keywords. Must be done at once to prevent conflicts ('strong' and ''). $text = preg_replace('/' . $boundary . '(' . implode('|', $keys) . ')' . $boundary . '/iu', '\0', $text); @@ -1177,7 +1187,7 @@ function search_excerpt($keys, $text) { */ /** - * Helper function for array_walk in search_except. + * Helper function for array_walk() in search_except. */ function _search_excerpt_replace(&$text) { $text = preg_quote($text, '/'); Index: modules/search/search.test =================================================================== RCS file: /cvs/drupal/drupal/modules/search/search.test,v retrieving revision 1.63 diff -u -p -r1.63 search.test --- modules/search/search.test 12 May 2010 15:53:43 -0000 1.63 +++ modules/search/search.test 31 May 2010 17:56:44 -0000 @@ -419,7 +419,7 @@ class SearchRankingTestCase extends Drup function testHTMLRankings() { // Login with sufficient privileges. $this->drupalLogin($this->drupalCreateUser(array('create page content'))); - + // Test HTML tags with different weights. $sorted_tags = array('h1', 'h2', 'h3', 'h4', 'a', 'h5', 'h6', 'notag'); $shuffled_tags = $sorted_tags; @@ -451,7 +451,7 @@ class SearchRankingTestCase extends Drup // Refresh variables after the treatment. $this->refreshVariables(); - + // Disable all other rankings. $node_ranks = array('sticky', 'promote', 'recent', 'comments', 'views'); foreach ($node_ranks as $node_rank) { @@ -489,7 +489,7 @@ class SearchRankingTestCase extends Drup // Assert the results. $this->assertEqual($set[0]['node']->nid, $node->nid, 'Search tag ranking for "<' . $tag . '>" order.'); - + // Delete node so it doesn't show up in subsequent search results. node_delete($node->nid); } @@ -506,8 +506,8 @@ class SearchRankingTestCase extends Drup // See testRankings() above - build a node that will rank high for sticky. $settings = array( - 'type' => 'page', - 'title' => array(LANGUAGE_NONE => array(array('value' => 'Drupal rocks'))), + 'type' => 'page', + 'title' => array(LANGUAGE_NONE => array(array('value' => 'Drupal rocks'))), 'body' => array(LANGUAGE_NONE => array(array('value' => "Drupal's search rocks"))), 'sticky' => 1, ); @@ -761,9 +761,9 @@ class SearchCommentTestCase extends Drup /** * Tests that comment count display toggles properly on comment status of node - * + * * Issue 537278 - * + * * - Nodes with comment status set to Open should always how comment counts * - Nodes with comment status set to Closed should show comment counts * only when there are comments @@ -772,7 +772,7 @@ class SearchCommentTestCase extends Drup class SearchCommentCountToggleTestCase extends DrupalWebTestCase { protected $searching_user; protected $searchable_nodes; - + public static function getInfo() { return array( 'name' => 'Comment count toggle', @@ -789,23 +789,23 @@ class SearchCommentCountToggleTestCase e // Create initial nodes. $node_params = array('type' => 'article', 'body' => array(LANGUAGE_NONE => array(array('value' => 'SearchCommentToggleTestCase')))); - + $this->searchable_nodes['1 comment'] = $this->drupalCreateNode($node_params); $this->searchable_nodes['0 comments'] = $this->drupalCreateNode($node_params); - + // Login with sufficient privileges. $this->drupalLogin($this->searching_user); - + // Create a comment array $edit_comment = array(); $edit_comment['subject'] = $this->randomName(); $edit_comment['comment_body[' . LANGUAGE_NONE . '][0][value]'] = $this->randomName(); $filtered_html_format_id = db_query_range('SELECT format FROM {filter_format} WHERE name = :name', 0, 1, array(':name' => 'Filtered HTML'))->fetchField(); $edit_comment['comment_body[' . LANGUAGE_NONE . '][0][format]'] = $filtered_html_format_id; - + // Post comment to the test node with comment $this->drupalPost('comment/reply/' . $this->searchable_nodes['1 comment']->nid, $edit_comment, t('Save')); - + // First update the index. This does the initial processing. node_update_index(); @@ -828,13 +828,13 @@ class SearchCommentCountToggleTestCase e $this->drupalPost('', $edit, t('Search')); $this->assertText(t('0 comments'), t('Empty comment count displays for nodes with comment status set to Open')); $this->assertText(t('1 comment'), t('Non-empty comment count displays for nodes with comment status set to Open')); - + // Test comment count display for nodes with comment status set to Closed $this->searchable_nodes['0 comments']->comment = COMMENT_NODE_CLOSED; node_save($this->searchable_nodes['0 comments']); $this->searchable_nodes['1 comment']->comment = COMMENT_NODE_CLOSED; node_save($this->searchable_nodes['1 comment']); - + $this->drupalPost('', $edit, t('Search')); $this->assertNoText(t('0 comments'), t('Empty comment count does not display for nodes with comment status set to Closed')); $this->assertText(t('1 comment'), t('Non-empty comment count displays for nodes with comment status set to Closed')); @@ -843,12 +843,12 @@ class SearchCommentCountToggleTestCase e $this->searchable_nodes['0 comments']->comment = COMMENT_NODE_HIDDEN; node_save($this->searchable_nodes['0 comments']); $this->searchable_nodes['1 comment']->comment = COMMENT_NODE_HIDDEN; - node_save($this->searchable_nodes['1 comment']); - + node_save($this->searchable_nodes['1 comment']); + $this->drupalPost('', $edit, t('Search')); $this->assertNoText(t('0 comments'), t('Empty comment count does not display for nodes with comment status set to Hidden')); $this->assertNoText(t('1 comment'), t('Non-empty comment count does not display for nodes with comment status set to Hidden')); - } + } } /** @@ -933,6 +933,57 @@ class SearchConfigSettingsForm extends D } /** + * Tests the search_excerpt() function. + */ +class SearchExcerptTestCase extends DrupalUnitTestCase { + public static function getInfo() { + return array( + 'name' => 'Search excerpt extraction', + 'description' => 'Tests that the search_excerpt() function works.', + 'group' => 'Search', + ); + } + + /** + * Implementation setUp(). + */ + function setUp() { + parent::setUp('search'); + } + + /** + * Tests search_excerpt() with several simulated search keywords. + * + * Passes keywords and a sample marked up string, "The quick + * brown fox jumps over the lazy dog", and compares it to the + * correctly marked up string. The correctly marked up string + * contains either highlighted keywords or the original marked + * up string if no keywords matched the string. + */ + function testSearchExcerpt() { + // Make some text with entities and tags. + $text = 'The quick brown fox & jumps

over

the lazy dog'; + // Note: The search_excerpt() function adds some extra spaces -- not + // important for HTML formatting. Remove these for comparison. + $expected = 'The quick brown fox & jumps over the lazy dog'; + $result = preg_replace('| +|', ' ', search_excerpt('nothing', $text)); + $this->assertEqual(preg_replace('| +|', ' ', $result), $expected, 'Entire string is returned when keyword is not found in short string'); + + $result = preg_replace('| +|', ' ', search_excerpt('fox', $text)); + $this->assertEqual($result, 'The quick brown fox & jumps over the lazy dog ...', 'Found keyword is highlighted'); + + $longtext = str_repeat($text . ' ', 10); + $result = preg_replace('| +|', ' ', search_excerpt('nothing', $text)); + $this->assertTrue(strpos($result, $expected) === 0, 'When keyword is not found in long string, return value starts as expected'); + + $entities = str_repeat('készítése ', 20); + $result = preg_replace('| +|', ' ', search_excerpt('nothing', $entities)); + $this->assertFalse(strpos($result, '&'), 'Entities are not present in excerpt'); + $this->assertTrue(strpos($result, 'í') > 0, 'Entities are converted in excerpt'); + } +} + +/** * Test the CJK tokenizer. */