core/modules/filter/filter.module | 192 +++++++++++++++++++- .../lib/Drupal/filter/Tests/FilterAPITest.php | 143 +++++++++++++++ .../lib/Drupal/filter/Tests/FilterSecurityTest.php | 19 +- core/modules/php/php.module | 1 + core/profiles/standard/standard.install | 20 -- 5 files changed, 352 insertions(+), 23 deletions(-) diff --git a/core/modules/filter/filter.module b/core/modules/filter/filter.module index 6c54fae..a0a265d 100644 --- a/core/modules/filter/filter.module +++ b/core/modules/filter/filter.module @@ -6,6 +6,77 @@ */ use Drupal\Core\Template\Attribute; + + + +/** + * HTML generator filters -- MUST generate HTML. + * + * Formats using filters of this type may not be able to use WYSIWYG editors. + * + * WYSIWYG use case: ability to detect non-HTML formats, such as Markdown, where + * no WYSIWYG editor should be used because it would be impossible to go back to + * the original text format. + */ +define('FILTER_TYPE_HTML_GENERATOR', 'html generator'); + +/** + * Security filters -- strip HTML tags that the user MAY NOT use. + * + * WYSIWYG use case: all filters of this type MUST be applied, the user MAY NOT + * be presented processed text without all filters of this type. Security + * exploits might otherwise occur. + */ +define('FILTER_TYPE_SECURITY', 'security'); + +/** + * DOM transformation filters -- DOM-based, reliably reversible transformations. + * + * Filters SHOULD NOT use regular expressions when they can use DOM manipulation + * instead. This makes filters as robust as possible. + * + * WYSIWYG use case: these filters MUST NOT be applied when feeding a piece of + * text into the WYSIWYG editor. Instead, they MAY be re-implemented in + * JavaScript for each supported WYSIWYG editor. + * E.g. `` may be (reversibly!) transformed to + * `
Druplicon
`. + */ +define('FILTER_TYPE_TRANSFORM_DOM', 'DOM transformation'); + +/** + * Text transformation filters -- text-based, irreversible transformations. + * + * WYSIWYG use case: these filters MUST NOT be applied when feeding a piece of + * text into the WYSIWYG editor. Furthermore, they MUST NOT be re-implemented + * in JavaScript. + * E.g.: the Typogrify filter would transform `WYSIWYG` and `I said "foo"!` into + * `WYSIWYG` and `I said “foo”!`, respectively. Text + * link ad systems would transform `fancy car` into something like + * `fancy car`. Neither of those text-based + * transformations make sense when doing WYSIWYG editing, nor is it possible to + * reliably reverse them. + */ +define('FILTER_TYPE_TRANSFORM_TEXT', 'text transformation'); + +/** + * All of the above implies: + * - if a format uses >=1 filters of type FILTER_TYPE_HTML_GENERATOR, no WYSIWYG + * editor can be used. + * - if a format uses >=1 filters of type FILTER_TYPE_SECURITY, and a user saves + * modified text through his WYSIWYG editor, any disallowed tags will be lost. + * This seems like a minor annoyance and appears acceptable. + * - if a format uses >=1 filters of type FILTER_TYPE_TRANSFORM_TEXT, these + * transformations will not be visible while editing, but will be visible when + * viewing. + * - if a format uses >=1 filters of type FILTER_TYPE_TRANSFORM_DOM, these + * transformations may not be visible while editing (it is up to the filter to + * implement support for the WYSIWYG editor, by re-implementing the filtering + * in JavaScript), but will be visible when viewing. + */ + + + + /** * Implements hook_cache_flush(). */ @@ -546,6 +617,91 @@ function filter_default_format($account = NULL) { } /** + * Retrieves all filter types that are used in a given text format. + * + * @param string $format_id + * A text format ID. + * @return array + * All filter types used by filters of a given text format. + */ +function filter_get_filter_types_by_format($format_id) { + $filter_types = array(); + + $filters = filter_list_format($format_id); + + // Ignore filters that are disabled. + $filters = array_filter($filters, function($filter) { + return $filter->status == 1; + }); + + $filters_metadata = filter_get_filters(); + foreach ($filters as $filter) { + // @todo: Remove the fallback for when no filter type is defined. We don't + // fail, we just ignore these right now. + $type = FALSE; + if (empty($filters_metadata[$filter->name]['type'])) { + drupal_set_message(t('Filter !filter has no type specified!', array('!filter' => $filter->name)), 'error'); + } + else { + $type = $filters_metadata[$filter->name]['type']; + } + if ($type && !in_array($type, $filter_types)) { + $filter_types[] = $type; + } + } + + return $filter_types; +} + +/** + * Retrieve all tags that are allowed by a given text format. + * + * @param string $format_id + * A text format ID. + * @return array|TRUE + * An array of HTML tags (in "p", not "

" format) that are allowed by the + * text format. The empty array implies no tags are allowed. TRUE implies all + * tags are allowed. + */ +function filter_get_allowed_tags_by_format($format_id) { + $filters = filter_list_format($format_id); + + // Ignore filters that are disabled or don't have an "allowed tags" setting. + $filters = array_filter($filters, function($filter) { + if ($filter->status == 0) { + return FALSE; + } + + $filters_metadata = filter_get_filters(); + if (!empty($filters_metadata[$filter->name]['allowed tags setting'])) { + return TRUE; + } + }); + + if (empty($filters)) { + return TRUE; + } + else { + $allowed_tags = array_reduce($filters, function($result, $filter) { + $allowed_tags = array(); + $filters_metadata = filter_get_filters(); + + $setting_name = $filters_metadata[$filter->name]['allowed tags setting']; + $allowed_tags = preg_split('/\s+|<|>/', $filter->settings[$setting_name], -1, PREG_SPLIT_NO_EMPTY); + + if (is_null($result)) { + return $allowed_tags; + } + else { + return array_intersect($result, $allowed_tags); + } + }, NULL); + + return $allowed_tags; + } +} + +/** * Returns the ID of the fallback text format that all users have access to. * * The fallback text format is a regular text format in every respect, except @@ -755,13 +911,18 @@ function filter_list_format($format_id) { * Boolean whether to cache the filtered output in the {cache_filter} table. * The caller may set this to FALSE when the output is already cached * elsewhere to avoid duplicate cache lookups and storage. + * @param array|FALSE $filter_types_to_skip + * An array of filter types to skip, or FALSE (default) to skip no filter + * types. All of the format's filters will be applied, except for filters of + * the types that are marked to be skipped. FILTER_TYPE_SECURITY is the only + * type that cannot be skipped. * * @return * The filtered text. * * @ingroup sanitization */ -function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE) { +function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE, $filter_types_to_skip = FALSE) { if (!isset($format_id)) { $format_id = filter_fallback_format(); } @@ -771,6 +932,19 @@ function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE) return ''; } + // Prevent FILTER_TYPE_SECURITY from being skipped. + if ($filter_types_to_skip && in_array(FILTER_TYPE_SECURITY, $filter_types_to_skip)) { + $filter_types_to_skip = array_diff($filter_types_to_skip, array(FILTER_TYPE_SECURITY)); + if (empty($filter_types_to_skip)) { + $filter_types_to_skip = FALSE; + } + } + + // When certain filters should be skipped, don't perform caching. + if ($filter_types_to_skip) { + $cache = FALSE; + } + // Check for a cached version of this piece of text. $cache = $cache && !empty($format->cache); $cache_id = ''; @@ -791,6 +965,10 @@ function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE) // Give filters the chance to escape HTML-like data such as code or formulas. foreach ($filters as $name => $filter) { + // If necessary, skip filters of a certain type. + if ($filter_types_to_skip && in_array($filter_info[$name]['type'], $filter_types_to_skip)) { + continue; + } if ($filter->status && isset($filter_info[$name]['prepare callback'])) { $function = $filter_info[$name]['prepare callback']; $text = $function($text, $filter, $format, $langcode, $cache, $cache_id); @@ -799,6 +977,10 @@ function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE) // Perform filtering. foreach ($filters as $name => $filter) { + // If necessary, skip filters of a certain type. + if ($filter_types_to_skip && in_array($filter_info[$name]['type'], $filter_types_to_skip)) { + continue; + } if ($filter->status && isset($filter_info[$name]['process callback'])) { $function = $filter_info[$name]['process callback']; $text = $function($text, $filter, $format, $langcode, $cache, $cache_id); @@ -1221,10 +1403,12 @@ function theme_filter_guidelines($variables) { function filter_filter_info() { $filters['filter_html'] = array( 'title' => t('Limit allowed HTML tags'), + 'type' => FILTER_TYPE_SECURITY, + 'allowed tags setting' => 'allowed_html', 'process callback' => '_filter_html', 'settings callback' => '_filter_html_settings', 'default settings' => array( - 'allowed_html' => '