Index: index.php =================================================================== RCS file: /cvs/drupal/drupal/index.php,v retrieving revision 1.96 diff -u -r1.96 index.php --- index.php 20 Sep 2008 20:22:23 -0000 1.96 +++ index.php 12 Dec 2008 02:33:49 -0000 @@ -41,3 +41,10 @@ } drupal_page_footer(); + +require_once DRUPAL_ROOT . '/includes/browser/browser.inc'; +$browser = Browser::getInstance(); +$request = $browser->get('http://google.com'); +if ($request) { + file_put_contents('output.html', $request['content']); +} Index: includes/browser/wrapper.inc =================================================================== RCS file: includes/browser/wrapper.inc diff -N includes/browser/wrapper.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ includes/browser/wrapper.inc 1 Jan 1970 00:00:00 -0000 @@ -0,0 +1,30 @@ + Index: includes/browser/curl.inc =================================================================== RCS file: includes/browser/curl.inc diff -N includes/browser/curl.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ includes/browser/curl.inc 1 Jan 1970 00:00:00 -0000 @@ -0,0 +1,127 @@ +handle)) { + $this->handle = curl_init(); + curl_setopt_array($this->handle, $this->getDefaultOptions()); + } + } + + /** + * Close the cURL handler and unset the handler. + */ + public function close() { + if (isset($this->handle)) { + curl_close($this->handle); + unset($this->handle); + } + } + + protected function getDefaultOptions() { + global $base_url; + return array( + CURLOPT_COOKIEJAR => $this->cookieFile, + CURLOPT_URL => $base_url, + CURLOPT_FOLLOWLOCATION => TRUE, + CURLOPT_RETURNTRANSFER => TRUE, + CURLOPT_SSL_VERIFYPEER => FALSE, // Required to make the tests run on https:// + CURLOPT_SSL_VERIFYHOST => FALSE, // Required to make the tests run on https:// + CURLOPT_HEADERFUNCTION => array(&$this, 'headerCallback'), + ); + } + + public function setUserAgent($agent) { +// var_dump($this->handle); +// curl_setopt($this->handle, CURLOPT_USERAGENT, $agent); + } + + public function get($url, array $headers = array()) { + $this->execute(array( + CURLOPT_HTTPGET => TRUE, + CURLOPT_URL => $url, + CURLOPT_NOBODY => FALSE, + CURLOPT_HTTPHEADER => $headers, + )); + + return $this->buildRequest(); + } + + public function post() { + + } + + protected function buildRequest() { + if ($this->content) { + return array( + 'url' => $this->url, + 'headers' => $this->headers, + 'content' => $this->content, + ); + } + return FALSE; + } + + /** + * Performs a cURL exec with the specified options after calling curlConnect(). + * + * @param $options + * Custom cURL options. + * @return + * Content returned from the exec. + */ + protected function execute($options) { + $this->open(); + + curl_setopt_array($this->handle, $options); + $this->content = curl_exec($this->handle); + $this->url = curl_getinfo($this->handle, CURLINFO_EFFECTIVE_URL); + // $this->headers should be filled by headerCallback. + } + + /** + * Reads headers and stores in $headers array. + * + * @param $curlHandler + * The cURL handler. + * @param $header + * An header. + * @return + * The string length of the header. (required by cURL) + */ + protected function headerCallback($handler, $header) { + $this->headers[] = $header; + return strlen($header); + } +} Index: includes/browser/browser.inc =================================================================== RCS file: includes/browser/browser.inc diff -N includes/browser/browser.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ includes/browser/browser.inc 1 Jan 1970 00:00:00 -0000 @@ -0,0 +1,121 @@ +wrapper = new HttpWrapper_curl(); + + $this->setUserAgent('Drupal (+http://drupal.org/)'); + } + + final public static function getInstance() { + if (!isset(self::$browser)) { + self::$browser = new Browser(); + } + return self::$browser; + } + + public function setUserAgent($agent) { + $this->additional_headers['User-Agent'] = $agent; +// $this->wrapper->setUserAgent($agent); + } + + public function getUserAgent() { + return $this->additional_headers['User-Agent']; + } + + /** + * Retrieves a Drupal path or an absolute path. + * + * @param $path + * Drupal path or URL to load into browser. + * @param $options + * Options to be forwarded to url(). + * @param $headers + * An array containing additional HTTP request headers, each formatted as + * "name: value". + * @return + * The retrieved HTML string, also available as $this->getContent(). + */ + public function get($path, array $options = array(), array $headers = array()) { + $request = $this->wrapper->get($path, $options, $headers); + + // TODO Error check, look for meta refresh, etc. + $this->setState($request['url'], $request['headers'], $request['content']); + return $request; + } + + public function post() { + + } + + /** + * Gets the current raw HTML of the last requested page. + * + * @return + * Raw HTML of last requested page. + */ + public function getContent() { + return $this->content; + } + + protected function setState($url, $headers, $content) { + $this->url = $url; + $this->headers = $headers; + $this->content = $content; + + module_invoke_all('browser_request', self::$browser); // TODO decide on hooks + } +}