diff -Nuarp import_html_47x/import_html.info import_html_47xto5x/import_html.info --- import_html_47x/import_html.info 1969-12-31 16:00:00.000000000 -0800 +++ import_html_47xto5x/import_html.info 2007-02-11 17:51:26.000000000 -0800 @@ -0,0 +1,5 @@ +; $Id$ +name = Import HTML +description = Import/Export an entire site to/from static HTML +package = "Import/export" +version = "5.x-0.x-dev" diff -Nuarp import_html_47x/import_html.module import_html_47xto5x/import_html.module --- import_html_47x/import_html.module 2006-10-08 08:44:20.000000000 -0700 +++ import_html_47xto5x/import_html.module 2007-02-12 16:22:32.000000000 -0800 @@ -93,31 +93,55 @@ function import_html_menu($may_cache) { $items[] = array ( 'path' => 'admin/import_html', + 'title' => t('Import HTML'), + 'description' => t("Import/Export an entire site to/from static HTML"), + 'callback' => 'system_admin_menu_block_page', + 'access' => user_access('access admin'), + 'type' => MENU_NORMAL_ITEM, + ); + $items[] = array( + 'path' => 'admin/import_html/settings', + 'title' => t('Import HTML Settings'), + 'description' => t('Adjust the import_html options and settings.'), + 'callback' => 'drupal_get_form', + 'weight' => 1, + 'callback arguments' => array('import_html_admin_settings'), + 'access' => user_access('administer site configuration'), + 'type' => MENU_NORMAL_ITEM, + ); + $items[] = array ( + 'path' => 'admin/import_html/import_site', 'title' => t('Import HTML Site'), - 'callback' => 'import_html_page', - 'access' => user_access('access admin'), - 'type' => MENU_NORMAL_ITEM, - ); + 'description' => t("Import/Export an entire site to/from static HTML"), + 'weight' => -1, + 'callback' => 'import_html_page', + 'access' => user_access('access admin'), + 'type' => MENU_NORMAL_ITEM, + ); $items[] = array ( 'path' => 'admin/import_html/list_filesystem', 'title' => t('List files to import'), - 'callback' => 'import_html_list_filesystem_callback', - 'access' => user_access('access admin'), - 'type' => MENU_CALLBACK, - ); + 'description' => t('List files to import.'), + 'callback' => 'import_html_list_filesystem_callback', + 'access' => user_access('access admin'), + 'type' => MENU_CALLBACK, + ); $items[] = array ( 'path' => 'admin/import_html/import_files', 'title' => t('Import the selected files'), - 'callback' => 'import_html_import_files', + 'description' => t('Import the selected files.'), + 'callback' => 'import_html_import_files_page', 'access' => user_access('access admin'), 'type' => MENU_CALLBACK, ); $items[] = array ( 'path' => 'admin/import_html/demo', - 'title' => t('Demonstrate or test HTML Import on one file'), - 'callback' => 'import_html_demo_page', + 'title' => t('Demo'), + 'description' => t('Demonstrate or test HTML Import on one file.'), + 'callback' => 'drupal_get_form', + 'callback arguments' => array('import_html_demo_form'), 'access' => user_access('access admin'), - 'type' => MENU_CALLBACK, + 'type' => MENU_NORMAL_ITEM, ); } return $items ? $items : array (); @@ -143,9 +167,9 @@ function import_html_help($section) { switch ($section) { - case 'admin/modules#description' : + case 'admin/build/modules#description' : return t("Import/Export an entire site to/from static HTML"); - case 'admin/modules/import_html' : + case 'admin/build/modules/import_html' : return t("Import/Export an entire site to/from static HTML"); case 'admin/help#import_html' : return file_get_contents(drupal_get_path("module", "import_html") . "/import_html_help.htm"); @@ -208,22 +232,22 @@ function import_html_nodeapi(& $node, $o function import_html_page() { - if (!module_exist('path')){ + if (!module_exists('path')){ drupal_set_message(t("The path module is not enabled! If you try to import pages without paths, I can do it, but links won't work and you'll find it hard to find the pages again.'"),'error'); } $output = t("

- For background, remember to read the Import Html help page. - The settings page contains the config options. + For background, remember to read the Import Html help page. + The settings page contains the config options.

", - array ( '%help_link' => url('admin/help/import_html'), '%settings_link' => url('admin/settings/import_html') ) + array ( '!help_link' => url('admin/help/import_html'), '!settings_link' => url('admin/import_html/settings') ) ); $output .= t("

- For a quick intro, try a quick demo. + For a quick intro, try a quick demo.

", - array ('%link' => url('admin/import_html/demo')) + array ('!link' => url('admin/import_html/demo')) ); $intro = t(" @@ -235,6 +259,20 @@ function import_html_page()

"); $output .= theme_box(t("About Importing"), $intro); + $output .= drupal_get_form('import_html_page_form'); + + $output .= t(" +

+ Note Big sites will take a long time to process, + and processing may timeout. + You can either increase the php timeout value and wait around, + or just do sections at a time by using the subsection parameter above. +

"); + + print theme('page', $output); +} + +function import_html_page_form() { $form = array ( '#method' => 'post', '#action' => url('admin/import_html/list_filesystem'), @@ -271,28 +309,17 @@ function import_html_page() $form["htmlsource"][] = array ( '#type' => 'submit', - '#value' => t('Next' - ),); - - $output .= drupal_get_form('your_form_id', $form); - - $output .= t(" -

- Note Big sites will take a long time to process, - and processing may timeout. - You can either increase the php timeout value and wait around, - or just do sections at a time by using the subsection parameter above. -

"); - - print theme('page', $output); + '#value' => t('Next'), + ); + return $form; } /** * Display the import_html options and settings. * - * Hook Implimentation + * Settings menu callback */ -function import_html_settings() +function import_html_admin_settings() { $form = array(); @@ -447,7 +474,7 @@ function import_html_settings() '#value' => t("How and where the imported content is reproduced on this site. (requires path.module).") ); - if (!module_exist('path')){ + if (!module_exists('path')){ drupal_set_message(t("The path module is not enabled! If you try to import pages without paths, I can do it, but links won't work and you'll find it hard to find the pages again.'"),'error'); } @@ -551,7 +578,7 @@ function import_html_settings() ); */ - if (module_exist('menu')) + if (module_exists('menu')) { $options = menu_parent_options(0); $form['replication']['import_html_menu_parent_id'] = array( @@ -620,28 +647,29 @@ function import_html_settings() ); $form['import_html_proceed'] = array( - '#value' => '

'.l(t("Proceed to import"),"admin/import_html").'

', + '#value' => '

'.l(t("Proceed to import"),"admin/import_html/import_site").'

', ); - return $form; + return system_settings_form($form); } /** * Show an interface to import just one file */ -function import_html_demo_page() -{ - $output .= t("

+function import_html_demo_form() { + $form = array(); + + $form['description'] = array( + '#value' => t("

Enter one HTML URL to process as an import. The retrieved data will not automatically become part of the site unless you confirm it, it's just a demo of what data would be extracted on a simple import. -

"); - +

"), + ); $form = array( '#method' => 'post', - '#action' => url('admin/import_html/import_files'), - ); + ); $form['Select'] = array( '#type' => 'fieldset', @@ -662,10 +690,51 @@ function import_html_demo_page() '#type' => 'submit', '#value' => t("Next"), ); + return $form; +} - $output = drupal_get_form('import_html_demo_page', $form); - print theme('page', $output); +//TODO: fix handler errors +/** + * Submit handler for import_html_demo_form + * @param $form_id + * a form_id + * @param $form_values + * posted form values typically passed in by submit handler, but also can be from $_POST. + * should contain + * ('file_rel_path' array) + * relative to the submitted 'source_siteroot' (no trailing slash). + * Rel paths here begin with slashes. + */ +function import_html_demo_form_submit($form_id, $form_values) { + switch ($form_id) { + case 'import_html_demo_form': + // Bulk imports come with context paths which are used for structure + // Over-ride (fudge) those inputs for the demo + $source_path = foreslash( $form_values['source_url'] ); + + $url_parts = parse_url($source_path); + + // divide the path into two halves + $split_at = strpos($source_path, $url_parts['path']); + + $source_siteroot = substr($source_path, 0, $split_at); + $rel_path = substr($source_path, $split_at); + + $import_files = array($rel_path); + + $_POST['op'] = t('Preview'); // Fool the edit page into skipping to step 2 + $files = _import_html_import_files($import_files, $source_siteroot, $form_id, $form_values); + $file = array_shift($files); + $node = (object) $file['node']; + $output = drupal_get_form('page_node_form', $node); + print theme('page', $output); + break; + + case 'page_node_form': + drupal_execute('page_node_form', $form_values); + break; + } } @@ -699,12 +768,12 @@ function import_html_list_filesystem_cal } else { - $base_path = $_POST['edit']['source_siteroot']; + $base_path = $_POST['source_siteroot']; } $base_path = foreslash($base_path); variable_set('import_html_siteroot', $base_path); - $current_subsection = ensure_trailing_slash($_POST['edit']['import_html_current_subsection']); + $current_subsection = ensure_trailing_slash($_POST['import_html_current_subsection']); $current_subsection = preg_replace('|^/|', '', $current_subsection); variable_set('import_html_current_subsection', $current_subsection); @@ -797,7 +866,7 @@ function import_html_list_filesystem($ba '%base_path' => $base_path, '%dest_virtual_path' => l($dest_virtual_path, $dest_virtual_path ), '%dest_file_root' => l($dest_file_root, $dest_file_root), - '%settings' => url('admin/settings/import_html'), + '%settings' => url('admin/import_html/settings'), '%default_document' => variable_get("import_html_default_document", "index.htm"), '%current_subsection' => $current_subsection ? "($current_subsection)" : '' ) @@ -819,19 +888,24 @@ function import_html_list_filesystem($ba $tree_content .= ""; + $output .= drupal_get_form('import_html_list_filesystem_form', $tree_content); + return $output; +} + +function import_html_list_filesystem_form($tree_content) { $form['tree'] = array( '#value' => $tree_content ); - if (module_exist('menu')) + if (module_exists('menu')) { - if(module_exist('menu')){ + if(module_exists('menu')){ $form["files"]["import_html_create_menus"] = array( '#type' => 'checkbox', '#title' => t("Add each page to menu"), '#return_value' => TRUE, '#default_value' => variable_get("import_html_create_menus", TRUE), - '#description' => t( 'requires menu.module'.(module_exist('menu')?'(installed)':' (Which is NOT available)')), + '#description' => t( 'requires menu.module'.(module_exists('menu')?'(installed)':' (Which is NOT available)')), ); $options = menu_parent_options(0); $form["files"]["import_html_menu_parent_id"] = array( @@ -857,9 +931,97 @@ function import_html_list_filesystem($ba '#value' => t('Import'), ); $form['#method'] = 'post'; - $form['#action'] = '?q=admin/import_html/import_files'; - $output .= drupal_get_form('import_html_list_filesystem', $form); - return $output; + $form['#action'] = url('admin/import_html/import_files'); + + return $form; +} + +// The form import_html_list_filesystem_form does not support FAPI completely so the +// standard drupal submit handler cannot be used here. instead we use +// a menu item and callback +/** + * A submit handler for import_html_list_filesystem_form. + * + * @param $form_id + * a form_id + * @param $form_values + * posted form values typically passed in by submit handler, but also can be from $_POST. + * should contain + * ('file_rel_path' array wrapped in 'edit') + * relative to the submitted 'source_siteroot' (no trailing slash). + * Rel paths here begin with slashes. + */ +function import_html_import_files_page($form_id = NULL, $form_values = NULL) { + // When a real submit handler can be implemented, we should remove the default values + if ($form_id == NULL) { + $form_id = 'import_html_list_filesystem_form'; + } + if ($form_values == NULL) { + $form_values = $_POST; + } + + // We are still on this page when previewing of fixing problems ... + // so we may be called upon to handle op 'preview' and 'Submit'. + // ... pass them to the normal node edit versions + if (($form_values['op'] == t('Preview')) || ($form_values['op'] == t('Submit'))) + { + // TODO: checkme: does this work? + return node_page(); + } + + // Otherwise, do what we are really here for... + + // My fields are handed back wrapped in an 'edit' array. + $import_files = $form_values['edit']['file_rel_path']; + + variable_set("import_html_create_menus", $form_values['import_html_create_menus'] != NULL); + variable_set("import_html_menu_parent_id", $form_values['import_html_menu_parent_id']); + + $source_siteroot = trim_trailing_slash(foreslash($form_values["source_siteroot"])); + + $files = _import_html_import_files($import_files, $source_siteroot, $form_id, $form_values); + + foreach ($files as $file) { + if ($file['type'] == 'node') { + // + // Go ahead and create it now + // + if ($errors = $file['errors']) + { + debug('Import did not quite validate', 1); + debug_pre($errors, 1); + + drupal_set_message(t('Import of %rel_path did not quite validate', array ( + '%rel_path' => $rel_path + ))); + //$output .= node_preview($node); + + // TODO This is not very helpful in bulk mode. + $output .= drupal_get_form('page_node_form', $node); + } + else + { + $node = $file['node']; + if ($node->nid) + { + drupal_set_message(t("Node {$node->nid} Exists, updating it.")); + node_save($node); + } + else + { + drupal_set_message(t("Inserting New Node." . l($node->path, $node->path))); + node_submit($node); // Submit doesn't actually save, it just fills in extra fields + node_save($node); + + // Had to wait until I had an ID to do this + import_html_add_node_navigation($node); + } + + $output .= node_view($node, true); + } + } + } + print theme('page', $output); } /** @@ -938,69 +1100,41 @@ function _import_html_format_tree_group( } /** - * Actually go and get the selected files. + * Actually go and get the selected files. Converts the files + * into nodes and resources and returns an array containing the + * converted data. Currently resources are automatically + * saved and not returned with the data. Nodes are not saved + * and it is the responsibility of the caller to preview or save. * - * A page callback, all inputs are read from the POST. - * - * Previous page should be submitting a collection of filepaths - * ('file_rel_path' array) - * relative to the submitted 'source_siteroot' (no trailing slash). - * Rel paths here begin with slashes. - * - * Alternatively, this page does double service for the - * single page demo, (which may be an URL) which is submitted - * as the parameter 'source_url' + * Alternatively, this function does double service for the + * single page demo. * * The relative paths are used to calculate a traditional url alias * for each imported page, so the distinction between base and rel_path is * significant. - * + * @return + * an associative array of files like the following + * array ( // of files + * array( // each file + * 'source_path' => $source_path, + * 'dest_path' => $dest_path, + * 'type' => 'node' | 'resource', + * 'node' => $node, + * 'errors' => form_get_errors(), + * ), + * ... + * ) */ -function import_html_import_files() -{ - // We are still on this page when previewing of fixing problems ... - // so we may be called upon to handle op 'preview' and 'Submit'. - // ... pass them to the normal node edit versions - if (($_POST['op'] == t('Preview')) || ($_POST['op'] == t('Submit'))) - { - return node_page(); - } - - // Otherwise, do what we are really here for... - - // My fields are handed back wrapped in an 'edit' array. - $output = ""; - - $import_files = $_POST['edit']['file_rel_path']; - - variable_set("import_html_create_menus", $_POST['edit']['import_html_create_menus']); - variable_set("import_html_menu_parent_id", $_POST['edit']['import_html_menu_parent_id']); - - $source_siteroot = trim_trailing_slash(foreslash($_POST['edit']["source_siteroot"])); +function _import_html_import_files($import_files, $source_siteroot) { $dest_root = variable_get('import_html_file_storage_path', 'files/imported/'); - - // Bulk imports come with context paths which are used for structure - // Over-ride (fudge) those inputs for the demo - if (isset ($_POST['edit']['source_url'])) - { - $source_path = foreslash( $_POST['edit']['source_url'] ); - - $url_parts = parse_url($source_path); - // divide the path into two halves - $split_at = strpos($source_path, $url_parts['path']); - $source_siteroot = substr($source_path, 0, $split_at); - $rel_path = substr($source_path, $split_at); - - $import_files = array ( - 0 => $rel_path - ); - } + $output = ""; + $files = array(); if (!$import_files) { drupal_set_message(t("No Files Selected. Nothing to import"), 'error'); import_html_list_filesystem_callback(); - return; + return $files; } $url_parts = parse_url($source_siteroot); @@ -1008,7 +1142,7 @@ function import_html_import_files() drupal_set_message(t("Importing " . count($import_files) . " files now")); debug('Importing ' . count($import_files) . " files now", 1); - + // // Loop starts here // @@ -1038,7 +1172,13 @@ function import_html_import_files() continue; //skip local directories altogether (their contents is selected individually) } } + + $file = array( + 'source' => $source_path, + 'dest' => $dest_path, + ); + // handle files that are resources if (import_html_guess_file_class($dest_path) != 'page') { // non-page resource - what sort of processing can I do here? @@ -1051,6 +1191,8 @@ function import_html_import_files() ) )); import_html_get_raw_file_local($source_path, $dest_path, $url_parts['host']); + $file['type'] = 'resource'; + $files[] = $file; continue; } @@ -1125,60 +1267,20 @@ function import_html_import_files() form_set_error('body', t("No body content found in this node")); } - if (isset ($_POST['edit']['source_url'])) - { - // Demo mode, immediately show edit page. - // $output .= node_preview($node); - $_POST['op'] = t('Preview'); // Fool the edit page into skipping to step 2 - $output .= node_form($node); - } - else - { - // - // Go ahead and create it now - // - if ($errors = form_get_errors()) - { - debug('Import did not quite validate', 1); - debug_pre($errors, 1); - - drupal_set_message(t('Import of %rel_path did not quite validate', array ( - '%rel_path' => $rel_path - ))); - //$output .= node_preview($node); - - // TODO This is not very helpful in bulk mode. - $output .= node_form($node); - } - else - { - if ($node->nid) - { - drupal_set_message(t("Node {$node->nid} Exists, updating it.")); - node_save($node); - } - else - { - drupal_set_message(t("Inserting New Node." . l($node->path, $node->path))); - node_submit($node); // Submit doesn't actually save, it just fills in extra fields - node_save($node); - // Had to wait until I had an ID to do this - import_html_add_node_navigation($node); - } - - $output .= node_view($node, true); - } - } } else { drupal_set_message(t("Failed to process page '$rel_path'"), "error"); } + $file['node'] = $node; + $file['errors'] = form_get_errors(); + $file['type'] = 'node'; + $files[] = $file; // end all pages loop. } - - print theme('page', $output); + + return $files; } /** @@ -1218,12 +1320,12 @@ function import_html_get_raw_file_local( /** * Set alias and navigation for the given node - * Helper for import_html_import_files() + * Helper for _import_html_import_files() */ function import_html_add_node_navigation(& $node) { -dsm($node); - if (module_exist('path') && variable_get("import_html_legacy_aliases", TRUE) && ($node->old_path != $node->path)) +//dsm($node); + if (module_exists('path') && variable_get("import_html_legacy_aliases", TRUE) && ($node->old_path != $node->path)) { debug("Setting up navigation links for this item now. Directing ".$node->old_path." to go to the system path ".drupal_get_normal_path($node->path), 2); @@ -1262,7 +1364,7 @@ dsm($node); } } -/** +/**1 * Analyse a source page and create a node definition from it. * * Most of the processing magic is in here. @@ -1591,7 +1693,7 @@ function import_html_guess_file_class($f function import_html_create_menu_path($path, $title = '') { debug("Looking for a menu item matching path '$path' ", 2); - if ((!module_exist('menu')) || (!module_exist('path'))) + if ((!module_exists('menu')) || (!module_exists('path'))) { return; }