Multisite cron without wget/curl

Last modified: July 7, 2009 - 21:00

#!/usr/bin/php5
<?php
/**
* This script scans the sites directory, and uses a regular expression to extract the sitenames.
* It then uses this sitename to execute the cronjob for these sites.
* You then only have to create one cronjob for this script.
* In this way, you can create and delete sites on the fly, but all their cronjobs will be executed.
*/
/*
* Carl van Denzen, june 2009:
* Options for this script:
* -h <hostname> (p.e. arjan.vandenzen.nl)
* -r <regexp> (p.e. .+\.vandenzen\.nl) This will be used in most cases.
* -a
*
* When this script is invoked  with parameter -r, then it will call
* itself for every host in the sites directory that matches the -r regexp.
* These calls will be with the -h <hostname> argument set to the site name.
*
* When invoked with -h option (only ONE is allowed), it will run cron
* for the named hostname site.
*
* When this script is invoked without parameters, it will die. This is to avoid
* runaway scripts.
*
* Purpose of this behaviour:
*
* You can add this script to your crontab with parameter -a: it will run
* for every site found in the drupal sites directory. This is the regular
* drupal/cron.php behaviour.
* In a one-site set-up it will only run for the default directory.
* In a multi-site set-up it will run for all sites (beware of the default directory?)
*
* You can add this script to your crontab with the -r parameter and it will only run cron
* for the sites that match the <regexp>. This is primarily meant for a multi-site set-up
* when the sites directory contains sites that you want to exclude from running cron
* (p.e. the "default" site).
*
* You can add this script to your crontab with the -h option to run cron
* for only the specified site (only one allowed).
*
* Without parameters it will do nothing.
* I have seen some problems with argument parsing that made me
* afraid of doing things like calling the same script. I would have preferred
* that this script would cron all sites if it was invoked without parameters.
*
* This script calls itself for every site (in a new command shell).
* It is impossible to do this in a
* php function (i.e. without invoking a new process) because the drupal
* bootstrap coding cannot (easily) be called multiple times.
*
* Disadvantages of this script:
* pearl5 is needed for Console/Getopt.php
* It is not efficient because for every host a new php process is
* created.
*
* Advantage:
* For me it works.
* It is only one script (I saw other solutions that used two scripts
* to accomplish this task).
* It doesn't use wget or similar programs that try to start cron
* by making a connection to the cron.php file. This strategy didn't
* work for me, because my website hoster doesn't allow outgoing
* connections.
*/

/***********
* SETTINGS
**********/
//the location of the 'sites' directory relative to this script.
$sitesDir = 'sites';
/**
* A regular expression that matches the name of the directories in
* the 'sites' dir that you want to execute cronjobs for, with a
* backreference around the actual site name. (so we can exclude the
* domain part)
*
*/
$siteNameRegExp = '(.*\.example\.com)';
$debug=0;
/***********
* END SETTINGS
**********/

/*
* Do default action like old cron.php (i.e. before
* the year 2008) script in Drupal 6.x
*/
function do_old_cron() {
   
drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
   
drupal_cron_run();   
}
/*
* some sanity checks
*/
if (!isset($_SERVER['argv'])) {
   
// print('server argv not set<br/>');
   
do_old_cron();
}

error_reporting(E_ALL);

include (
"Console/Getopt.php");


// initialize object
$cg = new Console_Getopt();

/* define list of allowed options - p = a:all sites, h:one site, r:sites that match regular expression */
$allowedShortOptions = "ah:r:";

// read the command line
$args = $cg->readPHPArgv();

// get the options
$ret = $cg->getopt($args, $allowedShortOptions);

// check for errors and die with an error message if there was a problem
if (PEAR::isError($ret)) {
    die (
"Error in command line: " . $ret->getMessage() . "\n");
}

ini_set('include_path',ini_get('include_path'). PATH_SEPARATOR . './scripts');

/* This doesn't work in every case: getopt function is not always available
$options = getopt("h:r:");
var_dump($options);
*/

include_once './includes/bootstrap.inc';

function
cron_one_site($sitename) {
   
$_SERVER['SCRIPT_NAME'] = '/cron.php';
   
$_SERVER['SCRIPT_FILENAME'] = '/cron.php';
   
$_SERVER['HTTP_HOST'] = $sitename;
   
$_SERVER['REMOTE_ADDR'] = 'localhost';
   
$_SERVER['REQUEST_METHOD'] = 'GET';

   
drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
    echo
"Hostname is: $hostname<br/>\n";
    print
'conf_path is '.conf_path() . '<br/>';
   
drupal_cron_run();
}
/*
* Call this script for every site in regexp (call
* it with -h option for every site).
*/
function cron_regexp($siteNameRegExp) {
    global
$sitesDir;
    global
$debug;
   
$sites = array();
   
// Get the name of this script, so we can call it recursively
    // doesn't work: argv is not defined: $thisScript=$argv[0];
   
$argv=$_SERVER["argv"];
   
$thisScript=$argv[0];

   
$handle = opendir($sitesDir);
    while (
$file = readdir($handle)) {
        if (
$debug>0) {
          if (
file_exists("$sitesDir/$file/settings.php")) {
            print(
'Yes: ');
          } else {
            print(
"No: ");
          }
          print(
"exists $sitesDir/$file/settings.php<br/>");
        }
        if ((
$file!='all') && (file_exists("$sitesDir/$file/settings.php"))) {
           
// preg expects the pattern to be enclosed in a (freely chosen)
            // delimiter. I have chosen ^ because I think that will never
            // be used in a host name
           
if(preg_match('^'.$siteNameRegExp.'^', $file)){
                if (
$debug>0) {
                  print
'$file is '.$file.', $siteNameRegExp is '.$siteNameRegExp.'<br/>\n';
                }
               
$sites[] = $file;
            }
        }
    }
    foreach(
$sites as $site){

        if (
$debug>0) {
          print
'Doing site '.$site.'<br/>\n';
        }
       
$commandline='/usr/bin/php5 '.$thisScript.' -h '.$site;
       
exec($commandline,$out1,$out2);
        if (
$debug>0) {
          print
'Commandline is '.$commandline.'<br/>\n';
          print
'out1 is '.implode('\n',$out1).'<br/>\n';
          print
'out2 is '.implode('\n',$out2).'<br/>\n';
        }
    }
}
// display the options
//print_r($ret);
if ($debug>0) {
 
print_r($_SERVER);
}
// parse the options array
$opts = $ret[0];
if (
sizeof($opts) > 0) {
   
// if at least one option is present
   
foreach ($opts as $opt) {
        switch (
$opt[0]) {
           
// handle the all sites option
           
case 'a':
               
$re='.*';
               
cron_regexp($re);
                break;
           
// handle the hostname
           
case 'h':
               
$hostname = $opt[1];
               
cron_one_site($hostname);
                break;
           
/* handle the regexp option. */
           
case 'r':
               
$re = $opt[1]; // regular expression
               
cron_regexp($re);
                break;
            default:
                print
'Usage: <br/>\n';
                print
'- h hostname<br/>\n';
                print
'- r regexp<br/>\n';
                print
'- a (do all sites)<br/>\n';
                break;

        }
    }
}

/*
* Some experienced I had with this script at your-webhost.nl in june 2009
*         // $_SERVER["PHP_SELF"] is not set
*/
?>

 
 

Drupal is a registered trademark of Dries Buytaert.