Multisite cron without wget/curl
Last modified: July 7, 2009 - 21:00
#!/usr/bin/php5
<?php
/**
* This script scans the sites directory, and uses a regular expression to extract the sitenames.
* It then uses this sitename to execute the cronjob for these sites.
* You then only have to create one cronjob for this script.
* In this way, you can create and delete sites on the fly, but all their cronjobs will be executed.
*/
/*
* Carl van Denzen, june 2009:
* Options for this script:
* -h <hostname> (p.e. arjan.vandenzen.nl)
* -r <regexp> (p.e. .+\.vandenzen\.nl) This will be used in most cases.
* -a
*
* When this script is invoked with parameter -r, then it will call
* itself for every host in the sites directory that matches the -r regexp.
* These calls will be with the -h <hostname> argument set to the site name.
*
* When invoked with -h option (only ONE is allowed), it will run cron
* for the named hostname site.
*
* When this script is invoked without parameters, it will die. This is to avoid
* runaway scripts.
*
* Purpose of this behaviour:
*
* You can add this script to your crontab with parameter -a: it will run
* for every site found in the drupal sites directory. This is the regular
* drupal/cron.php behaviour.
* In a one-site set-up it will only run for the default directory.
* In a multi-site set-up it will run for all sites (beware of the default directory?)
*
* You can add this script to your crontab with the -r parameter and it will only run cron
* for the sites that match the <regexp>. This is primarily meant for a multi-site set-up
* when the sites directory contains sites that you want to exclude from running cron
* (p.e. the "default" site).
*
* You can add this script to your crontab with the -h option to run cron
* for only the specified site (only one allowed).
*
* Without parameters it will do nothing.
* I have seen some problems with argument parsing that made me
* afraid of doing things like calling the same script. I would have preferred
* that this script would cron all sites if it was invoked without parameters.
*
* This script calls itself for every site (in a new command shell).
* It is impossible to do this in a
* php function (i.e. without invoking a new process) because the drupal
* bootstrap coding cannot (easily) be called multiple times.
*
* Disadvantages of this script:
* pearl5 is needed for Console/Getopt.php
* It is not efficient because for every host a new php process is
* created.
*
* Advantage:
* For me it works.
* It is only one script (I saw other solutions that used two scripts
* to accomplish this task).
* It doesn't use wget or similar programs that try to start cron
* by making a connection to the cron.php file. This strategy didn't
* work for me, because my website hoster doesn't allow outgoing
* connections.
*/
/***********
* SETTINGS
**********/
//the location of the 'sites' directory relative to this script.
$sitesDir = 'sites';
/**
* A regular expression that matches the name of the directories in
* the 'sites' dir that you want to execute cronjobs for, with a
* backreference around the actual site name. (so we can exclude the
* domain part)
*
*/
$siteNameRegExp = '(.*\.example\.com)';
$debug=0;
/***********
* END SETTINGS
**********/
/*
* Do default action like old cron.php (i.e. before
* the year 2008) script in Drupal 6.x
*/
function do_old_cron() {
drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
drupal_cron_run();
}
/*
* some sanity checks
*/
if (!isset($_SERVER['argv'])) {
// print('server argv not set<br/>');
do_old_cron();
}
error_reporting(E_ALL);
include ("Console/Getopt.php");
// initialize object
$cg = new Console_Getopt();
/* define list of allowed options - p = a:all sites, h:one site, r:sites that match regular expression */
$allowedShortOptions = "ah:r:";
// read the command line
$args = $cg->readPHPArgv();
// get the options
$ret = $cg->getopt($args, $allowedShortOptions);
// check for errors and die with an error message if there was a problem
if (PEAR::isError($ret)) {
die ("Error in command line: " . $ret->getMessage() . "\n");
}
ini_set('include_path',ini_get('include_path'). PATH_SEPARATOR . './scripts');
/* This doesn't work in every case: getopt function is not always available
$options = getopt("h:r:");
var_dump($options);
*/
include_once './includes/bootstrap.inc';
function cron_one_site($sitename) {
$_SERVER['SCRIPT_NAME'] = '/cron.php';
$_SERVER['SCRIPT_FILENAME'] = '/cron.php';
$_SERVER['HTTP_HOST'] = $sitename;
$_SERVER['REMOTE_ADDR'] = 'localhost';
$_SERVER['REQUEST_METHOD'] = 'GET';
drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
echo "Hostname is: $hostname<br/>\n";
print 'conf_path is '.conf_path() . '<br/>';
drupal_cron_run();
}
/*
* Call this script for every site in regexp (call
* it with -h option for every site).
*/
function cron_regexp($siteNameRegExp) {
global $sitesDir;
global $debug;
$sites = array();
// Get the name of this script, so we can call it recursively
// doesn't work: argv is not defined: $thisScript=$argv[0];
$argv=$_SERVER["argv"];
$thisScript=$argv[0];
$handle = opendir($sitesDir);
while ($file = readdir($handle)) {
if ($debug>0) {
if (file_exists("$sitesDir/$file/settings.php")) {
print('Yes: ');
} else {
print("No: ");
}
print("exists $sitesDir/$file/settings.php<br/>");
}
if (($file!='all') && (file_exists("$sitesDir/$file/settings.php"))) {
// preg expects the pattern to be enclosed in a (freely chosen)
// delimiter. I have chosen ^ because I think that will never
// be used in a host name
if(preg_match('^'.$siteNameRegExp.'^', $file)){
if ($debug>0) {
print '$file is '.$file.', $siteNameRegExp is '.$siteNameRegExp.'<br/>\n';
}
$sites[] = $file;
}
}
}
foreach($sites as $site){
if ($debug>0) {
print 'Doing site '.$site.'<br/>\n';
}
$commandline='/usr/bin/php5 '.$thisScript.' -h '.$site;
exec($commandline,$out1,$out2);
if ($debug>0) {
print 'Commandline is '.$commandline.'<br/>\n';
print 'out1 is '.implode('\n',$out1).'<br/>\n';
print 'out2 is '.implode('\n',$out2).'<br/>\n';
}
}
}
// display the options
//print_r($ret);
if ($debug>0) {
print_r($_SERVER);
}
// parse the options array
$opts = $ret[0];
if (sizeof($opts) > 0) {
// if at least one option is present
foreach ($opts as $opt) {
switch ($opt[0]) {
// handle the all sites option
case 'a':
$re='.*';
cron_regexp($re);
break;
// handle the hostname
case 'h':
$hostname = $opt[1];
cron_one_site($hostname);
break;
/* handle the regexp option. */
case 'r':
$re = $opt[1]; // regular expression
cron_regexp($re);
break;
default:
print 'Usage: <br/>\n';
print '- h hostname<br/>\n';
print '- r regexp<br/>\n';
print '- a (do all sites)<br/>\n';
break;
}
}
}
/*
* Some experienced I had with this script at your-webhost.nl in june 2009
* // $_SERVER["PHP_SELF"] is not set
*/
?>