Parse xml into sql insert statements

Last modified: February 19, 2008 - 03:56

This code was originally published by Senor Dude. But because he didn't publish it in the Drupal Handbook, and because I added code to improve the generation of the teaser (the original code just put the whole body into the teaser), I'm posting it here.

You'll need to have perl of a high enough version to handle the iso-8859-1 encoding (I used 5.8; 5.6 is too old). You'll need to install XML::SAX (easy enough with cpan). This may takes quite a while to run, depending upon what parser it locates.

use XML::SAX::Base;
use XML::SAX::ParserFactory;

package Node;

my $teaser_length = 600;

package ConversionFilter;

@ISA = qw(XML::SAX::Base);

my $type = 'blog';

sub characters {
        my ($self,$data) = @_;
        $self->{_characters} .= $data->{Data};
}

sub start_element {
        my ($self, $element) = @_;
        my $tagname = $element->{LocalName};
        my $handle = "start_$tagname";
        if ($self->can($handle)) {
                $self->$handle($element);
        }
        $self->SUPER::start_element($element);
}

sub end_element {
        my ($self, $element) = @_;
        my $tagname = $element->{LocalName};
        my $handle = "end_$tagname";
        if ($self->can($handle)) {
                $self->$handle($element);
        }
        $self->SUPER::end_element();
}

sub start_item {
        my $self = shift;
        $self->{_current_item} = new Node();
   
}

sub end_item {
        my $self = shift;
        print $self->{_current_item}->insert_statement();
}

sub start_description {
        my $self = shift;
        $self->clear_characters();
}

sub end_description {
        my $self = shift;
        $self->{_current_item}->{description} = $self->get_characters();
}

sub start_title {
        my $self = shift;
        $self->clear_characters();
}

sub end_title {
        my $self = shift;
        $self->{_current_item}->{title} = $self->get_characters();
}

sub start_date {
        my $self = shift;
        $self->clear_characters();
}

sub end_date {
        my $self = shift;
        $self->{_current_item}->{created} = $self->get_characters();
}

sub clear_characters {
        my $self = shift;
        $self->{_characters} = "";
}

sub get_characters {
        my $self = shift;
        return $self->{_characters};
}

package Node;

sub new {
        my $class = shift;
        return bless {}, $class;
}


# Borrowed from node.module

sub node_teaser {
    my $body = shift;
    my $size = $teaser_length;


  if ($size == 0) {
    return $body;
  }

  if (length($body) < $size) {
    return $body;
  }

  if (my $length = rindex($body, "<br/>", $size)) {
    return substr($body, 0, $length);
  }

  if (my $length = rindex($body, "<br>", $size)) {
    return substr($body, 0, $length);
  }

  if (my $length = rindex($body, "</p>", $size)) {
    return substr($body, 0, $length);
  }

  if (my $length = rindex($body, "\n", $size)) {
    return substr($body, 0, $length);
  }

  if (my $length = rindex($body, ". ", $size)) {
    return substr($body, 0, $length + 1);
  }

  if (my $length = rindex($body, "! ", $size)) {
    return substr($body, 0, $length + 1);
  }

  if (my $length = rindex($body, "? ", $size)) {
    return substr($body, 0, $length + 1);
  }

  return substr($body, 0, $size);
}

sub insert_statement {
        my $self = shift;
        my $body = mysql_escape($self->{description});
        my $teaser = mysql_escape( node_teaser( $self->{description} ) );
        return "INSERT INTO node ".
                "(type,title,uid,status,comment, promote, users, attributes, revisions, created,teaser,body)".
                " VALUES ('$type','".
                mysql_escape($self->{title})."',1,1,2,1,'','','',".
                "UNIX_TIMESTAMP('".to_mysql_date($self->{created})."'),'$teaser','$body');\n";
}

sub mysql_escape {
        my $string = shift;
        $string =~ s/\n/\\n/mg;
        $string =~ s/(\'|\")/\\$1/g; #" quote to help syntax coloring
        return $string;
}

sub to_mysql_date {
        my $string = shift;
        $string =~ s/T/ /;
        $string =~ s/\+00:00$//;
        return $string;
}

package main;


my $filename = $ARGV[0];

my $handler = new ConversionFilter();

my $parser = new XML::SAX::ParserFactory->parser(Handler => $handler);

$parser->parse_uri($filename);

Not compatible with Drupal 5?

dan_aka_jack - March 21, 2007 - 19:42

Hi there,

Am I right in thinking that this script produces MySQL statements which are incompatible with Drupal 5? I have tried to import my Movable Type blog into Drupal 5.1 and MySQL gave this error: Unknown column 'users' in 'field list'

Many thanks,
Jack Kelly
Jack-Kelly.com

 
 

Drupal is a registered trademark of Dries Buytaert.