diff -ruN plugins-old/docapi_docvert/docapi_docvert.info plugins/docapi_docvert/docapi_docvert.info --- plugins-old/docapi_docvert/docapi_docvert.info 1970-01-01 01:00:00.000000000 +0100 +++ plugins/docapi_docvert/docapi_docvert.info 2009-01-04 13:54:19.000000000 +0100 @@ -0,0 +1,10 @@ +name = DocAPI Docvert Text-Parser +description = Provides a plugin parser or the Document Import API. Uses docvert to import word-processor documents. +dependencies[] = docapi +core = 6.x +package = Document Import Plugins +version = "6.x-2.x-dev" +core = "6.x" +project = "docapi" + + diff -ruN plugins-old/docapi_docvert/docapi_docvert.module plugins/docapi_docvert/docapi_docvert.module --- plugins-old/docapi_docvert/docapi_docvert.module 1970-01-01 01:00:00.000000000 +0100 +++ plugins/docapi_docvert/docapi_docvert.module 2009-02-05 18:14:23.000000000 +0100 @@ -0,0 +1,245 @@ +'creator','dc:language'=>'language','dc:title'=>'title'); + + return $plugin; + } + +function _docapi_docvert_get_docdir($path, $force_new=FALSE) { + + $docdir = variable_get('docapi_docvert_docdir', file_directory_path() .'/docapi_docvert') . '/' . basename($path); + if(!is_dir($docdir) || $force_new) { + if(!_docapi_docvert_convert($path, $docdir)) return FALSE; + } + + return $docdir; +} + +function _docapi_docvert_convert($path, $docdir) { + //This converts the upload to HTML with docvert. + //Docvert gives us a zip containing the html file(s) + //which must be scanned + + $host = '127.0.0.1'; + if(drupal_valid_http_host($_SERVER['HTTP_HOST'])) $host = $_SERVER['HTTP_HOST']; + $docvert_url = variable_get('docapi_docvert_docvert_url', 'http://'.$host.'/docvert/web-service.php'); + + // populate the form field data + $postData = array(); + $postData[ 'random file' ] = '@'.realpath($path); //curl needs the full path + $postData[ 'pipeline' ] = "autopipeline:simple webpage"; + $postData[ 'autopipeline' ] = "Nothing (one long page)"; + $postData[ 'afterconversion' ] = "downloadZip"; + $postData[ 'converter' ] = "pyodconverter"; + + // initialize the cURL object + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $docvert_url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_POST, 1); + curl_setopt($ch, CURLOPT_POSTFIELDS, $postData ); + + // make the actual docvert POST call + $return_file = curl_exec($ch); + $curl_error = curl_error($ch); + curl_close($ch); + //check that the returned data is a zip file + $zipMagic = array(80, 75); + if(!(ord(substr($return_file, 0, 1)) == $zipMagic[0] && ord(substr($return_file, 1, 1)) == $zipMagic[1])) + { + drupal_set_message(t('Could not convert this file.'), 'error'); + /* + drupal_set_message(strip_tags($return_file), 'error'); + drupal_set_message(print_r($curl_error,1),'error'); + */ + } else { + $zipPath = file_directory_temp() . '/' . basename($path); + if(file_save_data($return_file, $zipPath, FILE_EXISTS_REPLACE)) { + + file_check_directory($docdir, TRUE); + file_scan_directory($docdir, '.*', array('.', '..'),'unlink',TRUE); + $z = new ZipArchive(); + if ($z->open($zipPath)) { + $z->extractTo($docdir .'/'); + $files = file_scan_directory($docdir, '.*',array('.', '..')); + foreach($files as $file) file_move($file->filename, $docdir.'/'.$file->basename, FILE_EXISTS_REPLACE); + + $z->close(); + return TRUE; + } + } + } + return FALSE; +} + + /** + * Implementation of hook_cron + * + * Cleans up the docapi_docvert dir by deleting subdirs without a corresponding entry in the docapi library + */ + function docapi_docvert_cron() { + $toKeep = array(); + $toDelete = array(); + + $docsdir = variable_get('docapi_docvert_docdir', file_directory_path() .'/docapi_docvert'); + $docsdir_entries = scandir($docsdir); + + if($docsdir_entries) { + $results = db_query("SELECT f.filepath FROM {docapi_library} d INNER JOIN {files} f ON d.fid = f.fid"); + while($result = db_result($results)) $toKeep[] = basename($result); + + $toDelete = array_diff($docsdir_entries, $toKeep); + + foreach($toDelete as $entry) { + $entryPath = $docsdir.'/'.$entry; + if($entry != '.' && $entry != '..' && is_dir($entryPath)) { + file_scan_directory($entryPath, '.*', array('.', '..'),'unlink',TRUE); //delete files + if($entries = scandir($entryPath)) { + foreach($entries as $dirEntry) //remove empty dirs + if($dirEntry != '.' && $dirEntry != '..') @rmdir($entryPath.'/'.$dirEntry); + } + @rmdir($entryPath); + } + } + } + } + /** + * Implementation of docapi_hook_validate_file + * + * @param $path + * The full path to the file, including the filename + * @param $mimetype + * The mimetype of the file we are validating + * @return Whether the file supplied is parsable by this module. + */ + function docapi_docvert_docapi_validate_file($path, $mimetype) { + if(_docapi_docvert_get_docdir($path, TRUE)) return TRUE; + return FALSE; + } + + /** + * Implementation of docapi_hook_parse + * + * First uses docvert to + */ + function docapi_docvert_docapi_parse($path, $mimetype) { + // Create the container object + $imported_data = array('metadata'=>'','content'=>''); + + if(!($docdir = _docapi_docvert_get_docdir($path))) return $imported_data; + $contentPath = $docdir . '/index.html'; + + if(is_file($contentPath)) { + $content = file_get_contents($contentPath); + if(!empty($content)) { + $content = str_replace('open($metaPath); + $odt_meta = array(); + while ($reader->read()) { + if ($reader->nodeType == XMLREADER::ELEMENT) { + $elm = $reader->name; + } else { + if ($reader->nodeType == XMLREADER::END_ELEMENT && $reader->name == 'office:meta') { + break; + } + if (!trim($reader->value)) { + continue; + } + $odt_meta[$elm] = $reader->value; + } + } + $reader->close(); + + $meta = $odt_meta; + } + return $meta; + }