Index: file_import.batch.inc =================================================================== RCS file: file_import.batch.inc diff -N file_import.batch.inc 0a1,464 > > // TODO - permissions on admin screen > > // TODO - final screen shows results! and errors! > > // TODO - make it clear that the administrator of this module is > // capable of running any external command via the webserver user - > // THIS PERMISSION IS NOT TO BE GRANTED LOOSELY > > // TODO - provide option for files to be moved or copied to the > // Drupal folders tree? > > // TODO - provide ability to not move files around if they are > // already in Drupal files dir? default should be just to pull copy > // from files dir > > // TODO - provide regex method for selecting which files are > // handled, eg only import PDF on this run > > // TODO - trigger actions so that post-import things can be done, eg > // imagecache or op_video parse on imported file (would require > // being able to define what form the attachment took; media modules > // don't necess make file an attachment as per upload.module > > // no wait!! implement upload.module attachment as an action, then > // allow selection of this or other attachment forms > > // TODO - provide taxonomy selector to tag all imported files with > // (should validate that node type is able to be tagged with > // specified vocab, or maybe make multistep?) > > // TODO - validation - node type must support file attachment (or > // whatever other method is used) > > // TODO - settings for cmds to match import files (eg ps2ascii *pdf) > > // TODO - attachment is a hookable operation, and we select the > // hookable operation when in first form > > // TODO - customise node name (currently extracted from filename > // plain) > > define('FILE_IMPORT_CANNOT_WRITE_FILE',-1); > define('FILE_IMPORT_CANNOT_WRITE_DIR',-2); > define('FILE_IMPORT_FILE_OUTSIDE_SOURCE_DIR',-4); > > function file_import_batch_import_form() { > // specify the directory to scan from > $outpath = variable_get('file_import_output_path', file_directory_path()); > $inpath = variable_get('file_import_path', file_directory_path()); > > // select which node type to import as > foreach( node_get_types() as $name => $type ) { > $node_types['DOES_NOT_EXIST'] = '' ; > $node_types[$name] = $type->name ; > } > $form['type'] = array( > '#type' => 'select', > '#options' => $node_types, > /* '#default_value' => '' */ > '#description' => t('Select the type of node you would like each file to generate'), > '#title' => 'Node Type', > ) ; > > // if this file is *outside* file_directory_path() then we should > // see whether people want to cp or mv to that location. some use > // cases (eg ubercart feature download) suggest that the video is > // stored outside that path anyway > $form['inpath'] = array( > '#type' => 'textfield', > '#default_value' => $inpath, > '#title' => 'Import files from', > '#description' => 'The directory to import files from. This folder can be outside your Drupal tree.', > ) ; > > // optional, can be useful to clean up along the way tho, and avoid > // dupe imports > $form['outpath'] = array( > '#type' => 'textfield', > '#default_value' => $outpath, > '#title' => t('Move files to'), > '#description' => t('Files will be moved (or copied) to this folder once imported. This folder should be within the Drupal files path so the files are available for download'), > ) ; > > // optional, can be useful to clean up along the way tho, and avoid > // dupe imports > $args = array( > '!func_url' => l('file_scan_directory()', 'http://api.drupal.org/api/function/file_scan_directory/6'), > '!ereg_url' => l('ereg()', 'http://www.php.net/ereg'), > ); > > $form['mask'] = array( > '#type' => 'textfield', > '#size' => 20, > '#default_value' => '.*\..*', > '#title' => t('File mask'), > '#description' => t('Glob pattern to select files to import. Will be passed to !func_url, regex as handled by !ereg_url',$args), > ) ; > > // ignored currently > $recurse_opts = array( 'recurse' => t('Scan child directories also') ) ; > $form['recurse'] = array( > '#type' => 'checkboxes', > '#options' => $recurse_opts, > /* '#label' => 'Include child directories', */ > ) ; > > $form['submit'] = array( > '#type' => 'submit', > '#value' => 'Confirm Import', > ) ; > > return $form ; > } > > function file_import_batch_import_form_submit( $form_id, &$form_state ) { > $node_type = $form_state['values']['type'] ; > $source_dir = $form_state['values']['inpath'] ; > $dest_dir = $form_state['values']['outpath'] ; > $recurse = $form_state['values']['type'] ; > $mask = $form_state['values']['mask'] ; > > $options = array( > 'node_type' => $node_type, > 'source_dir' => $source_dir, > 'dest_dir' => $dest_dir, > 'recurse' => $recurse, > 'mask' => $mask, > ) ; > > // if we are unable to handle a file, we need to move on to the next > // one. therefore we build a list first, then iterate thru the list > $file_list = file_scan_directory( $source_dir, $mask ) ; > $options['files'] = $file_list ; > if ( !empty( $file_list ) ) { > foreach( $file_list as $file ) { > $operations[] = array('file_import_batch_import_batch_process', array($file,$options)) ; > } > > $batch = array( > 'operations' => $operations, > 'finished' => 'file_import_batch_import_finished', > 'title' => t('Importing Files'), > 'progress_message' => t('Imported @current of @total.'), > 'error_message' => t('The import process encountered an error.'), > 'init_message' => t('Preparing'), > 'file' => drupal_get_path( 'module','file_import' ).'/file_import.batch.inc', > ) ; > > batch_set( $batch ) ; > } > else { > // will have been caught by validate function anyway > drupal_set_message(t('No files found in %dir to import', array('%dir'=>$source_dir))); > } > } > > function file_import_batch_import_form_validate($form_id, &$form_state) { > $source_dir = $form_state['values']['inpath'] ; > $mask = $form_state['values']['mask'] ; > $args = array( > '%dir' => $source_dir, > '%mask' => $mask, > ) ; > // could also check that the mask is valid? > if ( !is_dir( $source_dir ) ) { > form_set_error('source_dir', t('The directory specified, %dir, does not exist.', $args)); > } > else { > $file_list = file_scan_directory( $source_dir, $mask ) ; > if ( empty($file_list) ) { > form_set_error('', t('No files were found in directory %dir matching the mask %mask', $args)) ; > } > } > } > > function file_import_batch_import_batch_process( $file, $options, &$context ) { > $args = array( > '!file' => $file->basename, > '!dir' => dirname($file->filename), > ) ; > global $user ; > $node = new stdClass ; > $node->title = $file->name ; > $node->type = $options['node_type'] ; > $node->uid = $user->uid ; > foreach ( variable_get('file_import_batch_patterns', file_import_batch_default_exts()) as $mask => $cmd ) { > if ( ereg($mask, $file->basename) ) { > // is this compatible with windows, using $retvar? > $args['!details'] = t('Using cmd !cmd to extract content', array('!cmd'=>$cmd)); > $extract_cmd = escapeshellcmd($cmd . ' "'.$file->filename.'"') ; > exec($extract_cmd, $output, $retvar) ; > $body = implode("\n", $output); > error_log( print_r(array( $retvar, $body ),1) ) ; > if ( $body != '' ) { > $node->body = $body ; > } > } else { > $args['!details'] = t('No command match for this filename, so unable to extract body.') ; > } > } > node_save($node) ; > // attach file > $fi_args = array( > 'source_file' => $args['!file'], > 'source_dir' => $args['!dir'], // from source file, not $options, > // b/c we may have recursed into > // child dir > 'dest_dir' => $options['dest_dir'], > 'dest_file' => $args['!file'], // from source file > ) ; > file_import_attach_file_to_node($fi_args, $node); > $context['message'] = t('Now importing from !dir, file !file. !details', $args) ; > $context['finished'] = 1 ; > $context['results'][] = $filename ; > } > > > > /** > * run the file_import hook, returns specified things > * > * @param $hook_name is the name of the hook to call, eg "file_import_import_file" > * @param $op is the action that a module should run > * @param $data is signit data > * @param $config is configuration data > * @return an array of data > * > */ > // function file_import_extend($hook_name, $op = null, $data = null, $config = null) { > // $items = array(); > // foreach (module_implements($hook_name) as $module) { > // if ($new = module_invoke($module, $hook_name, $op, $data, $config)) { > // $items = array_merge($items, $new); > // } > // } > // // make sure we return an array > // if (! is_array($items)) { return array(); } > // return $items; > // } > > // function upload_file_import($op, $data, $config = NULL) { > // // attach a file to a nodex > // } > > function file_import_batch_admin_settings() { > drupal_add_css(drupal_get_path( 'module','file_import' ).'/file_import_batch.css'); > $patterns = variable_get('file_import_batch_patterns', file_import_batch_default_exts()) ; > $n = sizeof($patterns); > $i = 0 ; > $args = array( > '!ereg_url' => l('ereg()', 'http://www.php.net/ereg'), > ); > > $form['patterns'] = array( > '#type' => 'fieldset', > '#title' => t('File import commands'), > '#description' => t('Enter an !ereg_url pattern to match filenames, and a suitable system command which can be used to extract the node body from imported files.', $args), > ) ; > > $examples = " > .*\.pdf /usr/bin/ps2ascii > .*\.doc /usr/bin/docvert (or /usr/bin/antiword) > .*\.txt /usr/bin/cat > " ; > $form['patterns']['help1'] = array( > '#value' => t('Examples:
!examples
', array('!examples'=>$examples)), > ) ; > $form['patterns']['help2'] = array( > '#value' => t('A new blank field will be available to add more patterns on submission.'), > ) ; > > > /* seems we don't need to provide this form field; > * system_settings_form() will store anything in > * $form_state['values'] later. */ > // $form['patterns']['file_import_batch_patterns'] = array( > // '#type' => 'hidden', > // ) ; > > foreach( $patterns as $mask => $cmd ) { > $form['patterns']['mask_'.$i] = array( > '#type' => 'textfield', > '#size' => '12', > '#title' => t('Mask'), > '#default_value' => $mask, > '#prefix' => '
', > '#suffix' => '
', > '#attributes' => array( > 'class' => 'clear-both' > ), > ) ; > > $form['patterns']['cmd_'.$i] = array( > '#type' => 'textfield', > '#size' => '24', > '#title' => t('Command'), > '#default_value' => $cmd, > '#prefix' => '
', > '#suffix' => '
', > '#attributes' => array( > 'class' => 'float-left', > ), > ) ; > $i++ ; > } > > $form['patterns']['mask_'.$i] = array( > '#type' => 'textfield', > '#size' => '12', > '#title' => t('Mask'), > '#default_value' => '', > '#prefix' => '
', > '#suffix' => '
', > '#attributes' => array( > 'class' => 'clear-both' > ), > ) ; > > $form['patterns']['cmd_'.$i] = array( > '#type' => 'textfield', > '#size' => '24', > '#title' => t('Command'), > '#default_value' => '', > '#prefix' => '
', > '#suffix' => '
', > '#attributes' => array( > 'class' => 'float-left', > ), > ) ; > > $form['#validate'][] = 'file_import_batch_cmd_settings_validate'; > $form['#submit'][] = 'file_import_batch_cmd_settings_submit'; > > return system_settings_form($form) ; > } > > /** > * this validator function moves values from mask_XX and cmd_XX to a > * single form field from where they can be stored as a nice array > */ > function file_import_batch_cmd_settings_validate( $form, &$form_state ) { > $patterns = array() ; > $prev_patterns = variable_get('file_import_batch_patterns', file_import_batch_default_exts()) ; > $i = 0 ; > while ( $i <= sizeof($prev_patterns) ) { > if ( $form_state['values']['cmd_'.$i] != '' && $form_state['values']['mask_'.$i] != '' ) { > $patterns[$form_state['values']['mask_'.$i]] = $form_state['values']['cmd_'.$i] ; > unset( $form_state['values']['cmd_'.$i] ); > unset( $form_state['values']['mask_'.$i] ); > } > $i++ ; > } > $form_state['values']['file_import_batch_patterns'] = $patterns ; > } > > function file_import_batch_cmd_settings_submit( ) { > // nothing needed here - system_settings_form() does the storage for > // us! > } > > function file_import_batch_default_exts() { > $default_exts = array( > 'txt' => '/usr/bin/cat', > 'html' => '/usr/bin/cat', > 'doc' => '/usr/bin/docvert', > 'pdf' => '/usr/bin/ps2ascii', > ) ; > // call a hook which allows other modules to tell us that they > // provide support for a given extension > foreach( $default_exts as $ext => $cmd ) { > $cmds['.*\.'.$ext] = $cmd ; > } > return $cmds ; > } > > /** > * this code is copied from file_import.module in > * file_import_form_submit() > * > * suggest it be separted into a single function so we can re-use it > * for any attach-to-node action > * > * @param array args = array( > * 'source_dir' => 'directory files are importing from', > * 'source_file' => 'file we are attaching to the node', > * 'dest_dir' => 'directory we are storing files in', > * 'dest_file' => 'file we want to save to', > * 'overwrite' => FALSE, // whether to overwrite or create new file > * ) ; > * > * @param object node (passed by ref) > */ > function file_import_attach_file_to_node($args, &$node) { > $args['source_path'] = $args['source_dir'].'/'.$args['source_file'] ; > if ( !file_check_location($args['source_path'],$args['source_dir']) ) { > error_log("Can't import files from outside source dir"); > return FILE_IMPORT_FILE_OUTSIDE_SOURCE_DIR ; > } > if ( !file_check_directory($args['dest_dir']) ) { > error_log("Can't import file because not permitted to write to dest dir"); > return FILE_IMPORT_CANNOT_WRITE_DEST_DIR ; > } > $args['dest_path'] = file_create_filename($args['dest_file'],$args['dest_dir']) ; > $args['dest_file'] = basename($args['dest_path']); > error_log( print_r( $args, 1 ) ) ; > > // should be moved to a module hook so that we can support more > // file-specific actions without cluttering this function > $info = image_get_info($args['dest_file']); > if ($info) { > list($width, $height) = explode('x', variable_get('upload_max_resolution', 0)); > if ($width && $height) { > // if we allow non-destructive import (ie, leaving source dir > // untouched), this should happen AFTER the file_move or > // file_copy > $result = image_scale($args['source_file'], $args['dest_file'], $width, $height); > if ($result) { > drupal_set_message(t('The image was resized to fit within the maximum allowed resolution of %resolution pixels.', array('%resolution' => variable_get('upload_max_resolution', 0)))); > } > } > } > > // use file_copy or file_move depending on setting - currently always file_move() > if (file_move($args['source_path'], $args['dest_dir'])) { > global $user ; > $file = array( > 'filename' => $args['dest_file'], > 'filepath' => $args['dest_path'], > 'filemime' => content_type($args['dest_path']), > 'filesize' => filesize($args['dest_path']), > 'status' => 1, > 'timestamp'=> time(), > 'uid' => $user->uid, > ) ; > drupal_write_record('files', $file); > // db_query("INSERT INTO {files} (filename, filepath, filemime, filesize, status, timestamp) VALUES ('%s', '%s', '%s', %d, %d, %d)", > // $args['dest_file'], > // $args['dest_path'], > // content_type($args['dest_path']), > // filesize($args['dest_path']), > // 1, // status (1 = permanent, 0=temporary) > // time()); > > if (!db_error()) { > $upload = array( > 'fid' => $file['fid'], > 'vid' => $node->vid, > 'description' => $args['dest_file'], > 'list' => 1 > ) ; > drupal_write_record('upload',$upload); > // db_query("INSERT INTO {upload} (fid, vid, description, list) VALUES (%d, %d, '%s', %d)", > // $file['fid'], > // $node->vid, > // $args['dest_file'], > // 1); > } > else { > drupal_set_message(t('Move failed (%file).', array('%file' => $args['source_path'])), 'error') ; > } > } > } \ No newline at end of file Index: file_import.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/file_import/file_import.module,v retrieving revision 1.2.2.2 diff -r1.2.2.2 file_import.module 89a90,114 > /** > * menu item for batch handler > */ > $items['admin/content/file_import/batch'] = array( > 'access arguments' => array( 'import files' ), > 'description' => 'Import files and create nodes from their contents', > 'file' => 'file_import.batch.inc', > 'file path' => drupal_get_path( 'module','file_import' ), > 'page callback' => 'drupal_get_form', > 'page arguments' => array( 'file_import_batch_import_form' ), > 'title' => 'Import Files as Nodes', > 'type' => MENU_LOCAL_TASK, > ) ; > > $items['admin/settings/file_import/batch'] = array( > 'access arguments' => array('administer site configuration'), > 'description' => "Change settings of the File Import module's batch settings.", > 'file' => 'file_import.batch.inc', > 'file path' => drupal_get_path( 'module','file_import' ), > 'page callback' => 'drupal_get_form', > 'page arguments' => array('file_import_batch_admin_settings'), > 'title' => 'File import batch', > 'type' => MENU_LOCAL_TASK, > ); > Index: file_import_batch.css =================================================================== RCS file: file_import_batch.css diff -N file_import_batch.css 0a1,9 > .file-import-settings-mask { > clear: both ; > float: left ; > } > > .file-import-settings-cmd { > float: left ; > margin-left: 2em ; > } \ No newline at end of file