'subform', 'title' => t('weblinker'), 'callback' => 'weblinker_form_page', 'type' => MENU_CALLBACK, 'callback arguments' => array( 'edit'=>$_REQUEST, ), 'access' => $access, ); # } if (arg(0) == 'node' && is_numeric(arg(1))) { $node = node_load(arg(1)); if ($node->nid) { $items[] = array( 'path' => 'node/'. arg(1) . '/weblinker', 'title' => t('weblinker'), 'callback' => 'weblinker_page', 'access' => node_access('update', $node), 'type' => MENU_LOCAL_TASK ); } } return $items; } /** * Insert a table listing all embedded links found in this page */ function weblinker_form_alter($form_id, &$form){ if($form_id == 'page_node_form'){ drupal_add_js('misc/progress.js'); drupal_add_js(drupal_get_path('module','weblinker').'/weblinker.js'); // this will cause problems between 4.7 and 5.x /* if(file_exists('misc/jquery.js')){ drupal_add_js('misc/jquery.js'); } else { drupal_add_js(drupal_get_path('module','weblinker').'/jquery.js'); } */ $weblinker_results = weblinker_scan_links($form['#node']); $form['weblinker'] = array( '#value' => theme('weblinker_found_links',$weblinker_results), '#weight' => 5, ); } } /** * Process a page, scanning it for embedded links * * Default is to re-display the page with a button to assist creation of a * corresponding weblink node. * If op is 'auto', the weblinks will be created without any more user input. */ function weblinker_page($op='show_link'){ $node = node_load(arg(1)); drupal_add_js('misc/progress.js'); drupal_add_js(drupal_get_path('module','weblinker').'/weblinker.js'); drupal_set_html_head(''); $weblinker_results = weblinker_scan_links($node); # dsm($weblinker_results); if ($op == 'auto'){ // process the results into new nodes weblinker_create_weblinks($weblinker_results,$node); } // Display the scan results above the page $summary = theme('weblinker_found_links',$weblinker_results); $node->body = $summary . $node->body; return node_show($node, arg(2)); } /** * Find all links in the body of the node. * * Return a prepared array of those links, with weblinks attached if they exist */ function weblinker_scan_links($node){ // find all href links $text = $node->body; $weblinker_results = array(); $found_weblinks = array(); $local_weblinks = array(); // Thanks to Chirp Internet: www.chirp.com.au http://www.the-art-of-web.com/php/parse-links/ // there's a lookbehind assertion (? (from some truly bad input pages) $pattern = "|(]*href=)(['\"]??)([^'\"]+?)\\2([^>]*(?)(.*)|"; if(preg_match_all($pattern."siU", $text, $matches)) {} // $matches[3] is a list of links, // $matches[5] is a list of titles; # dsm($matches); // massage the results into annotatable form foreach($matches[3] as $ix=>$link){ $link = weblinker_scrub_url($link); $url_parts = parse_url($link); $local = (! isset($url_parts['host'])); $weblinks = weblinks_get($link, TRUE); $weblink = array_pop($weblinks); // only use the first found $details = array( 'href' => $link, 'title' => $matches[5][$ix], 'weblink' => $weblink, 'local' => $local, ); $weblinker_results[$link] = $details; if($details['weblink']) { // used for counting $found_weblinks[$link] = $details['weblink']; } if($local){ $local_weblinks[$link] = $details['weblink']; } } if($weblinker_results){ $auto_link = url('node/'. arg(1) . '/weblinker/auto'); drupal_set_message(t('Found %weblinker_count links in the page. %local_count are internal, local references. %found_count are already registered as weblinks. Import all links now - using the current pages properties.',array('%weblinker_count'=>count($weblinker_results), '%found_count' => count($found_weblinks), '%local_count' => count($local_weblinks), '!auto_link'=>$auto_link ))); } return $weblinker_results; } /** * Given an array of links, render them, with some supplimentary info ina * table. * * The array may or may not have been checked at this stage. If checked, they * will have a 'weblink' property attached to them. * */ function theme_weblinker_found_links($weblinker_results){ $rows = array(); foreach($weblinker_results as $details){ $row = array( 'title' => l($details['title'],$details['href']), 'status' => ($details['remote_info']) ? $details['remote_info']['http_code'] : 'unchecked' ); if($details['local']){ $row['status'] = 'local'; } $class = 'status-' . $row['status']; if($details['weblink']){ // It seems valid if($details['weblink']->nid ){ // it exists already $row['status'] = l('linked','node/'.$details['weblink']->nid) ; if($details['remote_info'] && ($details['remote_info']['redirect_count'])){ $row['status'] .= '('. $details['remote_info']['redirect_count'] .' redirects)' ; } $class = 'weblinker-status-linked'; // Add an inline edit action. // // If it already exists, we only have to send the nid, and maybe the parent if($details['weblink']->nid){ $parameter_array = array( 'nid' => $details['weblink']->nid, 'parent_nid' => $details['weblink']->parent_nid, ); } } else { // No weblink exists for this yet // Inline all the data we can $row['status'] = l('create?','node/add/weblinks') ; $parameter_array = $details['weblink']; // send everything we know to the subform } $parameters = http_build_query($parameter_array, NULL, '&'); // drupal l() escapes the ampersand for me, don't do it twice $row['action'] = l('edit', 'subform/weblinks', array('class'=>'sub-form-link'), $parameters ); } // invalid/unchecked link $rows[] = array('data'=>$row, 'class'=>$class); } $attributes = array( 'class' => 'weblinker-summary', ); return theme('table', array_keys($row), $rows, $attributes, 'Links found in this page'); } /** * Given a list of links, instantiate a weblink object for each one. * * Does not double-up if it's already registered * * Pass by reference enables the verification (remote lookup) to set a flag in * the original array. */ function weblinker_create_weblinks(&$link_list, $context_node){ $weblinks_vid = _weblinks_get_vid(); $weblinks_default_tid = weblinks_default_tid(); global $weblinker_count; foreach($link_list as $ix => &$details){ if($details['local']){ continue; } // Check if this is already a known link if(! empty($details['weblink'])){ # foreach($details['weblinks'] as $node){ # drupal_set_message(t("Already have a weblink entry for this, link %link", array('%nid'=>$node->nid, '%link' => $details['href'])) ); # } } else { // Make it on-the-fly now # drupal_set_message(t("Going to create weblink to %link", array('%link' => $details['href'])) ); // use the FAPI rather than do it by hand $form_id = 'weblinks'; $weblink_def = array( 'type' => 'weblinks', 'title' => strip_tags($details['title']), 'body' => $details['title'], 'status' => 1, 'url' => $details['href'], // inheirit parent nodes owner 'name' => $context_node->name, 'uid' => $context_node->uid, // inheirit any classifications the parent node may have had 'taxonomy' => $context_node->taxonomy ? $context_node->taxonomy : array(), ); if(! $weblink_def['taxonomy'][$weblinks_vid]){ // stop weblinks complaining about unclassified links. if($weblinks_default_tid){ $weblink_def['taxonomy'][$weblinks_vid] = array($weblinks_default_tid => $weblinks_default_tid); } else { drupal_set_message(t('Problem initializing new weblink - there is no default term in the weblinks vocab. This can probably be fixed in the weblinks settings'),'error'); } } // Do remote lookups to try and retrieve the links status from the live location. $remote_info = weblinker_fetch_remote_info($details['href']); if($remote_info['http_code'] >= 400){ drupal_set_message(t("Remote lookup for !link failed. Request returned %http_code. This weblink is not going to be created.",array('!link' => l($details['href'],$details['href']), '%http_code' => $remote_info['http_code']) )); } else{ // so far so good. Fill in the retrieved details. // Use the remote pages own description as the local body if($remote_info['description']) { $weblink_def['body'] = '

' . trim(filter_xss($remote_info['description'])) . '

' . $weblink_def['body']; } if($remote_info['title']) { $weblink_def['body'] = '

' . trim(filter_xss($remote_info['title'])) . '

'; drupal_set_message(t("Actual remote title for %local_title was !remote_title.",array('%local_title' => $weblink_def['title'], '!remote_title' => l($remote_info['title'],$details['href'])) )); if(! trim($weblink_def['title']) ) $weblink_def['title'] = trim(strip_tags($remote_info['title'])); } // Submit the form using these values. // Drupal 5 // drupal_execute($form_id, $weblink_def); // Drupal 4.7 $weblink_def = (object)$weblink_def; #dsm($weblink_def); node_save($weblink_def); drupal_set_message(t('Created weblink !weblink',array('!weblink' => l($weblink_def->title, 'node/'.$weblink_def->nid)))); $weblinker_count ++ ; } $details['remote_info'] = $remote_info; // pass this back if the renderer wants it $details['weblink'] = $weblink_def; // pass this back if the renderer wants it } } } /** * Retrieve the header from the named page from the remote site */ function weblinker_fetch_remote_info($link){ drupal_set_message(t("Remotely requesting the page at !link",array('!link' => l($link,$link)) )); // use CURL to fetch the header response for this link static $ch; if(!$ch){ $ch = curl_init(); curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); // follow remote redirects curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); } curl_setopt($ch, CURLOPT_URL, $link); $result = curl_exec($ch); $info = curl_getinfo($ch); if (curl_errno($ch)) { drupal_set_message(curl_error($ch),'error'); } else { // // Scan result for title and description // XML would be easier, but I don't want to require it' $title_pattern = "|]*>([^<]+)|i"; $h1_pattern = "|]*>([^<]+)|i"; if(preg_match ($title_pattern, $result, $matches)){ $info['title'] = check_plain($matches[1]); } else if(preg_match ($h1_pattern, $result, $matches)){ $info['title'] = check_plain($matches[1]); } $description_pattern = '|]+name=[\'"]description[\'"][^>]+content=[\'"](.*)[\'"][^>]*>|i'; $description_pattern2 = '|]+content=[\'"](.*)[\'"][^>]+name=[\'"]description[\'"][^>]*>|i'; if(preg_match ($description_pattern, $result, $matches)){ $info['description'] = check_plain($matches[1]); } else if(preg_match ($description_pattern2, $result, $matches)){ $info['description'] = check_plain($matches[1]); } } // do not close curl, re-use it // curl_close($ch); return($info); } function weblinks_default_tid(){ // weblinks requires a taxonomy entry for each item. // Thus we need a default $weblinks_vid = _weblinks_get_vid(); $weblinks_default_tid = variable_get('weblinks_default_tid',0); if(! $weblinks_default_tid){ $weblinks_vocab = taxonomy_get_tree($weblinks_vid, 0, -1, 1); $weblinks_default_term = array_pop($weblinks_vocab); if($weblinks_default_term) $weblinks_default_tid = $weblinks_default_term->tid; } return $weblinks_default_tid; } /** * A cut-down 'create weblink' edit page, suitable to embed or pop-up from other * contexts, such as bulk action screens. * * Renders a form much like the node/add/weblinks one, but without the extras. * * Initializes from the nid given, or one found in the edit parameters array */ function weblinker_form_page($edit = array(), $request_nid='add'){ $nid = is_numeric($request_nid) ? $request_nid : $edit['nid']; if($nid){ $node = node_load($nid); } $form = weblinker_form($node, $edit); $output = drupal_get_form('weblinks_node_form', $form, 'node_form'); // No chrome. print $output; return; } /** * Returns a form much like the node/add/weblinks one, but without the extras. * * sorta Implementation of hook_form */ function weblinker_form(&$node, &$edit = array()){ print_r($node); print_r($edit); if(! $node){ $node = (object)array('type'=>'weblinks'); } $edit = array_merge((array)$node,$edit); // Start with a normal weblink node edit form // then simplify it a bit. $form = weblinks_form(&$edit); unset($form['body_filter']); # $form['#action']="/node/add/weblinks"; $form['title']['#default_value'] = $edit['title']; $form['url']['#default_value'] = $edit['url']; $form['weight']['#type']='hidden'; $form['weight']['#default_value']=0; //Get taxonomy to garnish this mini-form, but not all the others // Need a fake node so taxonomy knows what to do $form['#node'] = $node; $form['type']['#value'] = 'weblinks'; if($edit['parent_nid']){ // preset/merge with its parents tags $form['#node']->taxonomy = array_merge((array)$form['#node']->taxonomy, taxonomy_node_get_terms($edit['parent_nid'])); } // just invoke the taxonomy additions taxonomy_form_alter('weblinks_node_form', $form); unset($form['type']); // don't let the other ones see it or they'll want to join in when drupal_get_form does its form_alter. $form['submit'] = array('#type' => 'submit', '#value' => t('Submit')); return $form; } /** * repair any badness we find in the URLs - specifically any old redirect bounce * pages or redundant referrers. */ function weblinker_scrub_url($link){ // @TODO make this configurable $patterns = array( '|^/click2.php\?|i' => '', '|^http\://service\.bfast\.com/.*\?key=U|' => '', ); foreach($patterns as $pattern => $replacement){ $link = preg_replace($pattern, $replacement, $link); } return $link; } /** * Find and load the nid(s) that matches the given URL * * * @param string $url * @param boolean $load whether to node_load the entire weblink object while we * are there * @return array of partial url definitions, indexed by nid * You probably want weblinks_get($url)[0][nid] */ function weblinks_get($url, $load = FALSE){ $query = "SELECT * FROM {weblinks} where url = '%s'"; $result = db_query(db_rewrite_sql($query),$url); $nodes = array(); while ($data = db_fetch_object($result)){ $nodes[$data->nid] = $data; if($load) $nodes[$data->nid] = node_load($data->nid); } return $nodes; }