From e3d3f2f0af0696f66d8a06f508d8083f59df258d Mon Sep 17 00:00:00 2001
From: Bob Vincent <bobvin@pillars.net>
Date: Fri, 16 Sep 2011 18:41:59 -0400
Subject: [PATCH] Issue #1165694: file_scan_directory() should include common
 version-control and temporary files in its default no-mask
 pattern.

---
 includes/file.inc                  |   33 +++++++++++++++++++--------
 modules/simpletest/tests/file.test |   42 ++++++++++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/includes/file.inc b/includes/file.inc
index 6e2e5cb2828c9f4622b9253feed8a296b463050d..3303c21c3f0d8a46db7287c10bf8063086408979 100644
--- a/includes/file.inc
+++ b/includes/file.inc
@@ -1981,18 +1981,18 @@ function file_download() {
 /**
  * Finds all files that match a given mask in a given directory.
  *
- * Directories and files beginning with a period are excluded; this
- * prevents hidden files and directories (such as SVN working directories)
- * from being scanned.
+ * The current '.' and parent '..' directories are always excluded, to
+ * avoid infinite recursion errors.
  *
  * @param $dir
  *   The base directory or URI to scan, without trailing slash.
  * @param $mask
- *   The preg_match() regular expression of the files to find.
+ *   The preg_match() regular expression for files to be included.
  * @param $options
  *   An associative array of additional options, with the following elements:
- *   - 'nomask': The preg_match() regular expression of the files to ignore.
- *     Defaults to '/(\.\.?|CVS)$/'.
+ *   - 'nomask': The preg_match() regular expression for files to excluded.
+ *     The default regex excludes version-control files and directories,
+ *     temporary files, and MACOS resource forks.
  *   - 'callback': The callback function to call for each match. There is no
  *     default callback.
  *   - 'recurse': When TRUE, the directory scan will recurse the entire tree
@@ -2004,17 +2004,26 @@ function file_download() {
  *   - 'min_depth': Minimum depth of directories to return files from. Defaults
  *     to 0.
  * @param $depth
- *   Current depth of recursion. This parameter is only used internally and
+ *   The current depth of recursion. This parameter is only used internally and
  *   should not be passed in.
  *
  * @return
  *   An associative array (keyed on the chosen key) of objects with 'uri',
- *   'filename', and 'name' members corresponding to the matching files.
+ *   'filename', and 'name' properties corresponding to the matched files.
  */
 function file_scan_directory($dir, $mask, $options = array(), $depth = 0) {
   // Merge in defaults.
   $options += array(
-    'nomask' => '/(\.\.?|CVS)$/',
+    // The following is the default regex for files to be excluded.
+    'nomask' => '/
+      ^(?:
+      |CVS(?:\.adm)?                        # CVS directories.
+      |RCS(?:LOG)?|SCCS                     # RCS and SCCS directories.
+      |\.(?:bzr|darcs|git|hg|svn)           # Other version-control directories.
+      |(?:cvslog\.|\.?#|,|_\$|\.del-).*     # Temporary file prefixes.
+      |.*?(?:~|\$|\.(old|bak|BAK|orig|rej)) # Temporary file suffixes.
+      |__MACOSX                             # MacOSX resource fork directory.
+      )$/x',
     'callback' => 0,
     'recurse' => TRUE,
     'key' => 'uri',
@@ -2025,7 +2034,11 @@ function file_scan_directory($dir, $mask, $options = array(), $depth = 0) {
   $files = array();
   if (is_dir($dir) && $handle = opendir($dir)) {
     while (FALSE !== ($filename = readdir($handle))) {
-      if (!preg_match($options['nomask'], $filename) && $filename[0] != '.') {
+      // Always exclude the current and parent directories.
+      if ($filename === '.' || $filename === '..') {
+        continue;
+      }
+      if (!preg_match($options['nomask'], $filename)) {
         $uri = "$dir/$filename";
         $uri = file_stream_wrapper_uri_normalize($uri);
         if (is_dir($uri) && $options['recurse']) {
diff --git a/modules/simpletest/tests/file.test b/modules/simpletest/tests/file.test
index 3633bae11dd58e735ad5f33e60d3eb66a58e1f7f..22d39ed303d261d5c1ac74bdedb69431de788974 100644
--- a/modules/simpletest/tests/file.test
+++ b/modules/simpletest/tests/file.test
@@ -1099,19 +1099,57 @@ class FileScanDirectoryTest extends FileTestCase {
   }
 
   /**
-   * Check that the no-mask parameter is honored.
+   * Check that the nomask parameter is honored.
    */
   function testOptionNoMask() {
     // Grab a listing of all the JavaSscript files.
     $all_files = file_scan_directory($this->path, '/^javascript-/');
     $this->assertEqual(2, count($all_files), t('Found two, expected javascript files.'));
 
-    // Now use the nomast parameter to filter out the .script file.
+    // Now use the nomask parameter to filter out the .script file.
     $filtered_files = file_scan_directory($this->path, '/^javascript-/', array('nomask' => '/.script$/'));
     $this->assertEqual(1, count($filtered_files), t('Filtered correctly.'));
   }
 
   /**
+   * Check that the default nomask regex is working properly.
+   */
+  function testDefaultNoMask() {
+    // Create a new, randomly-named directory to hold the test files.
+    $testdir = 'public://' . $this->randomName(20);
+    $options = FILE_CREATE_DIRECTORY | FILE_MODIFY_PERMISSIONS;
+    // Build a list of test subdirectory filenames to create.
+    $testnames = array(
+      // Filenames that should be excluded.
+      'CVS', 'CVS.adm', 'RCS', 'RCSLOG', 'SCCS', '.svn', '.git', '.bzr',
+      'cvslog.FILE', '.#FILE', '#FILE', ',FILE', '_$FILE', '.del-FILE',
+      'FILE~', 'FILE$', 'FILE.old', 'FILE.bak', 'FILE.BAK', 'FILE.orig',
+      'FILE.rej', '__MACOSX',
+      // Filenames that should not be excluded.
+      'regular', '.hidden', 'xCVSx', 'xRCSx',
+    );
+    // For each test name, create a subdirectory containing a regular file.
+    foreach ($testnames as $name) {
+      $subdir = "$testdir/$name";
+      file_prepare_directory($subdir, $options);
+      file_put_contents("$subdir/FILE", 'test');
+    }
+    // Verify that the files can be found with a non-matching 'nomask'.
+    $unfiltered_files = file_scan_directory(
+      $testdir, '/.*/', array('nomask' => '/^$/')
+    );
+    $args = array(
+      '%found' => count($unfiltered_files),
+      '%expected' => count($testnames),
+    );
+    $this->assertEqual($args['%found'], $args['%expected'], t('Found %found files; expected %expected.', $args));
+    $this->verbose('<pre>' . check_plain(var_export($unfiltered_files, 1)) . '</pre>');
+    $filtered_files = file_scan_directory($testdir, '/FILE/');
+    $this->assertEqual(4, count($filtered_files), t('The default nomask regex is working properly.'));
+    $this->verbose('<pre>' . check_plain(var_export($filtered_files, 1)) . '</pre>');
+  }
+
+  /**
    * Check that key parameter sets the return value's key.
    */
   function testOptionKey() {
-- 
1.7.5.4

