--- modules\filter\filter.module.org Sun Jul 01 20:41:14 2007
+++ modules\filter\filter.module Mon Sep 03 20:19:40 2007
@@ -1083,24 +1083,80 @@ function _filter_url_settings($format) {
* ftp links, etc.) into hyperlinks.
*/
function _filter_url($text, $format) {
+ // List of tags - the content of which must be skipped
+ $ignoretags = 'a|script|style|code';
+
// Pass length to regexp callback
_filter_url_trim(NULL, variable_get('filter_url_length_'. $format, 72));
-
- $text = ' '. $text .' ';
-
- // Match absolute URLs.
- $text = preg_replace_callback("`(
|
|
|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(||
|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
-
- // Match e-mail addresses.
- $text = preg_replace("`(|
|
|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(||
|[ \n\r\t\)]))`i", '\1\2\3', $text);
-
- // Match www domains/addresses.
- $text = preg_replace_callback("`(|
|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(||
|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
- $text = substr($text, 1, -1);
-
+
+ // Need to process each replacement separately (see switch in the middle of loops).
+ // The text must be joined and split again after each replacement, since they create
+ // new html tags.
+ for ( $task = 1; $task <= 3; $task++ ) {
+ // Split at all tags.
+ // This ensures that nothing that is a tagname or attribute will be processed.
+ $chunks = preg_split('´(<.+?>)´i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ // Note: PHP ensures the array consists of alternating delimiters and literals
+ // and begins and ends with a literal (inserting NULL as required).
+
+ // If an ignoretag is found, it is stored here and removed when the
+ // closing tag is found. Until then no replacements are made.
+ // Think of this as a stack that always has 0 or 1 items
+ $opentag = '';
+ for ( $i = 0; $i < count( $chunks ); $i++ ) {
+ // Even/odd = text/tag
+ if ( $i % 2 == 0 ) { // Text
+ if ( $opentag == '' ) {
+ // Only do replacements when there are no unclosed ignoretags
+ switch ($task) {
+ case 1:
+ // Match absolute URLs.
+ $protocols = 'http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://';
+ $urlpattern = "(?:$protocols)(?:[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-])";
+ $re = "`($urlpattern)([\.\,\?\!]*?)`i";
+ $chunks[$i] = preg_replace_callback($re, '_filter_url_parse_full_links', $chunks[$i] );
+ break;
+ case 2:
+ // Match e-mail addresses.
+ $urlpattern = '[A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4}';
+ $re = "`($urlpattern)`i";
+ $chunks[$i] = preg_replace($re, '\1', $chunks[$i]);
+ break;
+ case 3:
+ // Match www domains/addresses.
+ $urlpattern = 'www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-]';
+ $re = "`($urlpattern)([\.\,\?\!]*?)`i";
+ $chunks[$i] = preg_replace_callback($re, '_filter_url_parse_partial_links', $chunks[$i]);
+ break;
+ }
+ }
+ }
+ else { // Tag
+ if( $opentag == '' ) {
+ // No open "ignoretag"
+ // Is it an ignoretag? (opening tag)
+ if( preg_match( "´<($ignoretags)(?:\s|>)´i", $chunks[$i], $matches ) ) {
+ // Store and catch the tag in question
+ $opentag = $matches[1];
+ }
+ }
+ else {
+ // Ignoretag found
+ // Nobody gets to do anything until we find a matching closing tag
+ if( preg_match( "´<\/$opentag>´i", $chunks[$i], $matches ) ) {
+ $opentag = '';
+ }
+ } // if ( $opentag == '' )
+ } // if ( $i % 2 )
+ } // for chunks
+
+ $text = implode( $chunks );
+ } // for tasks 1..3
+
return $text;
}
+
/**
* Scan input and make sure that all HTML tags are properly closed and nested.
*/
@@ -1176,23 +1232,27 @@ function _filter_htmlcorrector($text) {
}
/**
- * Make links out of absolute URLs.
+ * Callback function. Make links out of absolute URLs.
*/
function _filter_url_parse_full_links($match) {
- $match[2] = decode_entities($match[2]);
- $caption = check_plain(_filter_url_trim($match[2]));
- $match[2] = check_url($match[2]);
- return $match[1] . ''. $caption .''. $match[5];
+ // Which capturing parenthesis in the regexp contains the url?
+ $i = 1;
+ $match[$i] = decode_entities($match[$i]);
+ $caption = check_plain(_filter_url_trim($match[$i]));
+ $match[$i] = check_url($match[$i]);
+ return ''. $caption .''. $match[$i+1];
}
/**
- * Make links out of domain names starting with "www."
+ * Callback function. Make links out of domain names starting with "www."
*/
function _filter_url_parse_partial_links($match) {
- $match[2] = decode_entities($match[2]);
- $caption = check_plain(_filter_url_trim($match[2]));
- $match[2] = check_plain($match[2]);
- return $match[1] . ''. $caption .''. $match[3];
+ // Which parenthesis in the regexp contains the url?
+ $i = 1;
+ $match[$i] = decode_entities($match[$i]);
+ $caption = check_plain(_filter_url_trim($match[$i]));
+ $match[$i] = check_plain($match[$i]);
+ return ''. $caption .''. $match[$i+1];
}
/**