diff -rupN ../docapi-old/plugins/docapi_docvert/docapi_docvert.info ./plugins/docapi_docvert/docapi_docvert.info --- ../docapi-old/plugins/docapi_docvert/docapi_docvert.info 1970-01-01 01:00:00.000000000 +0100 +++ ./plugins/docapi_docvert/docapi_docvert.info 2009-01-04 13:54:19.000000000 +0100 @@ -0,0 +1,10 @@ +name = DocAPI Docvert Text-Parser +description = Provides a plugin parser or the Document Import API. Uses docvert to import word-processor documents. +dependencies[] = docapi +core = 6.x +package = Document Import Plugins +version = "6.x-2.x-dev" +core = "6.x" +project = "docapi" + + diff -rupN ../docapi-old/plugins/docapi_docvert/docapi_docvert.module ./plugins/docapi_docvert/docapi_docvert.module --- ../docapi-old/plugins/docapi_docvert/docapi_docvert.module 1970-01-01 01:00:00.000000000 +0100 +++ ./plugins/docapi_docvert/docapi_docvert.module 2009-01-04 13:51:42.000000000 +0100 @@ -0,0 +1,201 @@ +'creator','dc:language'=>'language','dc:title'=>'title'); + + return $plugin; + } + +function _docapi_docvert_get_zip($path, $force_new=FALSE) { + + $zipPath = variable_get('docapi_docvert_zip_path', file_directory_path() .'/docapi_docvert') . '/' . basename($path) . '.zip'; + if(!file_exists($zipPath) || $force_new) { + if(!_docapi_docvert_convert($path, $zipPath)) return FALSE; + } + + return $zipPath; +} + +function _docapi_docvert_convert($path, $zipPath) { + //This converts the upload to HTML with docvert. + //Docvert gives us a zip containing the html file(s) + //which must be scanned + + // populate the form field data + $postData = array(); + $postData[ 'random file' ] = '@'.$path; + $postData[ 'pipeline' ] = "autopipeline:simple webpage"; + $postData[ 'autopipeline' ] = "Nothing (one long page)"; + $postData[ 'afterconversion' ] = "downloadZip"; + $postData[ 'converter' ] = "pyodconverter"; + + // initialize the cURL object + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, variable_get('docapi_docvert_docvert_path', 'http://127.0.0.1/docvert/web-service.php')); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_POST, 1); + curl_setopt($ch, CURLOPT_POSTFIELDS, $postData ); + + // make the actual docvert POST call + $return_file = curl_exec($ch); + curl_close($ch); + + //check that the returned data is a zip file + $zipMagic = array(80, 75); + if(!(ord(substr($return_file, 0, 1)) == $zipMagic[0] && ord(substr($return_file, 1, 1)) == $zipMagic[1])) + { + drupal_set_message(t('Could not convert this file.'), 'error'); + //drupal_set_message(strip_tags($return_file), 'error'); + } else { + if(file_save_data($return_file, $zipPath, FILE_EXISTS_REPLACE)) return TRUE; + } + return FALSE; +} + + /** + * Implementation of docapi_hook_validate_file + * + * @param $path + * The full path to the file, including the filename + * @param $mimetype + * The mimetype of the file we are validating + * @return Whether the file supplied is parsable by this module. + */ + function docapi_docvert_docapi_validate_file($path, $mimetype) { + if(_docapi_docvert_get_zip($path, TRUE)) return TRUE; + return FALSE; + } + + /** + * Implementation of docapi_hook_parse + * + * First uses docvert to + */ + function docapi_docvert_docapi_parse($path, $mimetype) { + // Create the container object + $imported_data = array(); + + if(!($zipPath = _docapi_docvert_get_zip($path))) return; + + $z = new ZipArchive(); + if ($z->open($zipPath)) { + $contentName = $z->getNameIndex($z->locateName('index.html', ZIPARCHIVE::FL_NODIR)); + $content = $z->getFromName($contentName); + + $contentDir = dirname($contentName); + + $z->close(); + } + + + // Build the example result object + $imported_data['metadata'] = ''; + $imported_data['content'] = $content; + + return $imported_data; + } + + /** + * Implementation of docapi_hook_metadata + * @param $path + * The full path to the file, including the filename + * @param $mimetype + * The mimetype of the file we are validating + * @return The metadata for a given file + */ + function docapi_docvert_docapi_metadata($path, $mimetype) { + // Return just the metadata for the specified file + $meta = array(); + + if(!($zipPath = _docapi_docvert_get_zip($path))) return; + + $z = new ZipArchive(); + if ($z->open($zipPath)) { + $contentName = $z->getNameIndex($z->locateName('index.html', ZIPARCHIVE::FL_NODIR)); + $content = $z->getFromName($contentName); + + $contentDir = dirname($contentName); + + $z->close(); + + $reader = new XMLReader(); + + $reader->open('zip://' . $zipPath . '#' . $contentDir . '/docvert-meta.xml'); + $odt_meta = array(); + while ($reader->read()) { + if ($reader->nodeType == XMLREADER::ELEMENT) { + $elm = $reader->name; + } else { + if ($reader->nodeType == XMLREADER::END_ELEMENT && $reader->name == 'office:meta') { + break; + } + if (!trim($reader->value)) { + continue; + } + $odt_meta[$elm] = $reader->value; + } + } + $reader->close(); + + $meta = $odt_meta; + } + return $meta; + }