--- modules/aggregator/aggregator.module.orig 2009-05-13 10:23:20.000000000 -0700 +++ modules/aggregator/aggregator.module 2009-07-22 22:52:00.000000000 -0700 @@ -689,6 +689,26 @@ } } +function _aggregator_verify_entity($matches) { + // character reference + if (preg_match('/&#[0-9]+;/', $matches[1]) || preg_match('/&#x[0-9A-Fa-f]+;/', $matches[1])) { + return $matches[0]; + } + + // entity reference + // [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] + // | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] + // | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] + // [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] + // [5] Name ::= NameStartChar (NameChar)* + if (preg_match('/&[:A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-῿‌-‍⁰-↏Ⰰ-⿯、-퟿豈-﷏ﷰ-�𐀀-󯿿][-.:A-Z_a-z0-9·À-ÖØ-öø-ͯͰ-ͽͿ-῿‌-‍‿-⁀⁰-↏Ⰰ-⿯、-퟿豈-﷏ﷰ-�𐀀-󯿿]*;/', $matches[1])) { + return $matches[0]; + } + + // it did not match a valid character or entity reference, return an & + whatever + return '&'.substr($matches[0], 1); +} + /** * Parse a feed and store its items. * @@ -709,6 +729,7 @@ $channel = array(); // parse the data: + $data = preg_replace_callback('/(&[^ ;<\'"&]*;?)/', '_aggregator_verify_entity', $data); $xml_parser = drupal_xml_parser_create($data); xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end'); xml_set_character_data_handler($xml_parser, 'aggregator_element_data');