I noticed a bug with the module node_reference 7.x-2.0-beta3.

case:

- Node ID10 refers to node ID20
- Node ID20 is deleted
- When will the node ID10 was the result:

Notice : Undefined property: stdClass::$type dans _node_extract_type() (ligne 370 dans /.../drupal-7/modules/node/node.module).
Notice : Undefined property: stdClass::$nid dans node_build_content() (ligne 1359 dans /.../drupal-7/modules/node/node.module).
EntityMalformedException : Propriété de bundle manquante sur l'entité de type node. dans entity_extract_ids() (ligne 7389 dans/.../drupal-7/includes/common.inc).

I used the delete hook node to solve this problem:

/**
* hook_node_delete
*/
function fonctions_globales_node_delete($node) {
//print_sr($node);
// Correctif bug module Reference 7.x-2.0-beta3
$query = db_select('field_config', 'fc')
->fields('fc',array('field_name'))
->condition('fc.type', 'node_reference');
$result = $query->execute();
foreach ($result as $v) {
  //print_sr($v);
  db_delete('field_data_'.$v->field_name)
  ->condition($v->field_name.'_nid', $node->nid)
  ->execute();
  db_delete('field_revision_'.$v->field_name)
  ->condition($v->field_name.'_nid', $node->nid)
  ->execute();
}
field_cache_clear(); // /!\ IMPORTANT!
//exit;
}

Greetings.

Files: 
CommentFileSizeAuthor
#10 references_clean.zip1.54 KBmodstore

Comments

If I'm not mistaken, with this code, field_cache_clear() will be called N times when multiple nodes are deleted.

Trying to clearify title

d7 core issue with taxonomy #687180: Deleting a taxonomy vocabulary leaves term reference fields still pointing to it, and a PDO Exception when creating content

D6 cck issue #83929: referential integrity

Issue is with node reference and user reference.

Title:Fix bug when a node no longer existsNo deletion of node/user, referenes to it are not removed

An update function is needed to delete existing incorrect references.

Likely need to do something on cron due to the potential large amount of references being deleted.

You're right xjm, I corrected this way:

<?php
/**
* hook_node_delete
*/
function fonctions_globales_node_delete($node) {
 
//print_sr($node);
  // Correctif bug module Reference 7.x-2.0-beta3
 
$query = db_select('field_config', 'fc')
  ->
fields('fc',array('field_name'))
  ->
condition('fc.type', 'node_reference');
 
$result = $query->execute();
 
$delete = false;
  foreach (
$result as $v) {
   
//print_sr($v);
   
$num_deleted = db_delete('field_data_'.$v->field_name)
    ->
condition($v->field_name.'_nid', $node->nid)
    ->
execute();
   
db_delete('field_revision_'.$v->field_name)
    ->
condition($v->field_name.'_nid', $node->nid)
    ->
execute();
    if(
$num_deleted) $delete = true;
  }
  if(
$delete) field_cache_clear(); // /!\ IMPORTANT!
  //exit;
}
?>

I'm not really for this task when cron because during 1H (1 hour by cron) nodes in question are no longer accessible. I find it problematic.

Title:No deletion of node/user, referenes to it are not removedOn deletion of node/user, referenes to it are not removed

Regarding deleting on cron, it should at least be a queue in some form, because otherwise this code is guaranteed to break large sites. See: #89181: Use queue API for node and comment, user, node multiple deletes.

Thank you for your link. I'll watch.
Greetings.

Title:On deletion of node/user, referenes to it are not removedOn deletion of node/user, references to it are not removed

Just a quick note that the code here should probably use hook_entity_delete() rather than hook_node_delete(), so that nodes and users can be dealt with similarly?

Probably not since the sub-modules for node and user are separate and hard-coded to either node or user.

StatusFileSize
new1.54 KB

I created a module that runs in the queue. Full module attached.

If you want it to run more often that every hour, I suggest using Elysia cron, and you can specify how often each of your cron modules should run.

<?php
/**
* @file
* References Clean
*/
/**
* Implementation of hook_cron
* Get all broken references
*/
function references_clean_cron() {
 
// load the queue
 
$queue = DrupalQueue::get('references_clean_remove');
  if (
$queue->numberOfItems()) return; // don't add any if there is already items in the queue, otherwise we could get duplicates if running often
  // get the field names of type 'node_reference'
 
$query = db_select('field_config', 'fc')
    ->
fields('fc',array('field_name'))
    ->
condition('fc.type', 'node_reference');
 
$result = $query->execute();
 
$count = 0;
  foreach (
$result as $record) {
   
// for the supplied field, get any references that point to a non-existant node
   
$query = db_select('field_data_'.$record->field_name, 'r');
   
$query->leftJoin('node', 'n', 'r.'.$record->field_name.'_nid = n.nid');
   
$query->isNull('n.nid');
   
$query->fields('r',array('entity_id', 'entity_type', 'delta', 'language'));
   
$this_result = $query->execute();
    foreach (
$this_result as $this_record) {
     
$this_record->field_name = $record->field_name;
     
$queue->createItem($this_record);
     
$count++;
    }
  }
  if (
$count) {
   
watchdog('references_clean', "$count item(s) queued to be cleaned.");
  }
  return
TRUE;
}
/**
* Implementation of hook_cron_queue_info()
*/
function references_clean_cron_queue_info() {
 
$queues['references_clean_remove'] = array(
   
'worker callback' => 'references_clean_remove', // This is the callback function for each queue item.
   
'time' => 60, // This is the max run time per cron run in seconds.
 
);
  return
$queues;
}
/**
* Queue worker callback function. Removes the broken reference from the entity.
*/
function references_clean_remove($item) {
  if (!
$entities = entity_load($item->entity_type, array($item->entity_id))) {
    return
FALSE;
  }
  foreach (
$entities as $entity) {
    if (!empty(
$entity->{$item->field_name}[$item->language][$item->delta])) {
     
// unset the broken reference
     
unset($entity->{$item->field_name}[$item->language][$item->delta]);
     
// if this was the only item, unset the language array too
     
if (!count($entity->{$item->field_name}[$item->language])) {
        unset(
$entity->{$item->field_name}[$item->language]);
      } else {
// otherwise, go through the values and fix the index to be incremental again
       
$updated_elements = array();
        foreach (
$entity->{$item->field_name}[$item->language] as $element) {
         
$updated_elements[] = $element;
        }
       
$entity->{$item->field_name}[$item->language] = $updated_elements;
      }
     
entity_save($item->entity_type, $entity);
     
watchdog('references_clean', "Entity: $item->entity_id of type $item->entity_type has had it's references cleaned.");
    }
  }
  return
TRUE;
}
?>

Elysia cron will help run *less_often*: if hosting is the reason why your cron is triggered only once an hour, Elysia won't help.

To be pragmatic, I believe this choice should need a setting to decide the way treatment should be triggered: Cron vs. "On delete".

Thanks all for this work!

Alexandre

I created a module to address this issue for a client, and was thinking about submitting it to References for inclusion, but since I'm not very clear on References' current state of development, I decided to publish Reference Delete as a new module. The first dev release will be available later, but the module is already on git.

It works on node delete and user delete (not cron). It might be clumsy and/or rough around the edges, so I'm open to suggestions.

Priority:Normal» Critical

Raising to critical.

This is a serious shortcoming in the references module. Not deleting the occurrences of nodes in ohter reference fields creates serious inconsistencies.

For example the search api apachesolr module indexes items which the entity api tries to load this causes these kinds of errors: EntityMetadataWrapperException: Unable to get the data property location as the parent data structure is not set. in EntityStructureWrapper->getPropertyValue()

The functionality discussed should be committed in the main references module.

Another module that might help here (currently just a sandbox module) is Field reference delete.

The delete could register a shutdown function to run after the page processing is finished. You then get everything processed almost immediately without holding up the current page. You could copy the process from Poormanscron.

There is also ultimate_cron to run a cron process much more often ...

Not much of a PHP guy, but I am having a similar problem. If I used the code in #10, what would be a good way to modify that to account for nodes that had since been unpublished (but not deleted)?

Issue summary:View changes

Removing the full path of the error

Issue summary:View changes

<?php
function module_update_7101() {
  foreach (
field_info_fields() as $field_name => $field_info) {
    if (
$field_info['type'] == 'node_reference') {
      foreach (array(
'FIELD_LOAD_CURRENT', 'FIELD_LOAD_REVISION') as $table_type) {
        if (!empty(
$field_info['storage']['details']['sql'][$table_type])) {
         
$table = key($field_info['storage']['details']['sql'][$table_type]);
         
$column = $field_info['storage']['details']['sql'][$table_type][$table]['nid'];
         
db_query("DELETE from {$table} WHERE $column NOT IN (SELECT nid FROM {node})");
        }
      }
    }
  }
 
field_cache_clear();
}
?>

<?php
/**
* Implements hook_node_delete().
*/
function module_node_delete($node) {
 
$deleted = 0;
 
$ids = array();
 
// Deletes any leftover references to this node. References doesn't keep
  // itself tidy.
  // Only works for references on nodes currently for entity cache!
 
foreach (field_info_fields() as $field_name => $field_info) {
    if (
$field_info['type'] == 'node_reference' && in_array($node->type, $field_info['settings']['referenceable_types'])) {
      foreach (array(
'FIELD_LOAD_CURRENT', 'FIELD_LOAD_REVISION') as $table_type) {
        if (!empty(
$field_info['storage']['details']['sql'][$table_type])) {
         
$table = key($field_info['storage']['details']['sql'][$table_type]);
         
$column = $field_info['storage']['details']['sql'][$table_type][$table]['nid'];
         
$ids = array_merge($ids, db_select($table, 'f')
            ->
fields('f', array('entity_id'))
            ->
condition($column, $node->nid)
            ->
execute()->fetchCol());
         
$deleted += db_delete($table)
            ->
condition($column, $node->nid)
            ->
execute();
        }
      }
    }
  }
  if (
$deleted) {
   
// Clean entity cache.
   
cache_clear_all($ids, 'cache_entity_node');
   
field_cache_clear();
  }
}
?>

Just dropping this here, non-cron way that works with entity cache