Parse xml into sql insert statements
Last modified: February 19, 2008 - 03:56
This code was originally published by Senor Dude. But because he didn't publish it in the Drupal Handbook, and because I added code to improve the generation of the teaser (the original code just put the whole body into the teaser), I'm posting it here.
You'll need to have perl of a high enough version to handle the iso-8859-1 encoding (I used 5.8; 5.6 is too old). You'll need to install XML::SAX (easy enough with cpan). This may takes quite a while to run, depending upon what parser it locates.
use XML::SAX::Base;
use XML::SAX::ParserFactory;
package Node;
my $teaser_length = 600;
package ConversionFilter;
@ISA = qw(XML::SAX::Base);
my $type = 'blog';
sub characters {
my ($self,$data) = @_;
$self->{_characters} .= $data->{Data};
}
sub start_element {
my ($self, $element) = @_;
my $tagname = $element->{LocalName};
my $handle = "start_$tagname";
if ($self->can($handle)) {
$self->$handle($element);
}
$self->SUPER::start_element($element);
}
sub end_element {
my ($self, $element) = @_;
my $tagname = $element->{LocalName};
my $handle = "end_$tagname";
if ($self->can($handle)) {
$self->$handle($element);
}
$self->SUPER::end_element();
}
sub start_item {
my $self = shift;
$self->{_current_item} = new Node();
}
sub end_item {
my $self = shift;
print $self->{_current_item}->insert_statement();
}
sub start_description {
my $self = shift;
$self->clear_characters();
}
sub end_description {
my $self = shift;
$self->{_current_item}->{description} = $self->get_characters();
}
sub start_title {
my $self = shift;
$self->clear_characters();
}
sub end_title {
my $self = shift;
$self->{_current_item}->{title} = $self->get_characters();
}
sub start_date {
my $self = shift;
$self->clear_characters();
}
sub end_date {
my $self = shift;
$self->{_current_item}->{created} = $self->get_characters();
}
sub clear_characters {
my $self = shift;
$self->{_characters} = "";
}
sub get_characters {
my $self = shift;
return $self->{_characters};
}
package Node;
sub new {
my $class = shift;
return bless {}, $class;
}
# Borrowed from node.module
sub node_teaser {
my $body = shift;
my $size = $teaser_length;
if ($size == 0) {
return $body;
}
if (length($body) < $size) {
return $body;
}
if (my $length = rindex($body, "<br/>", $size)) {
return substr($body, 0, $length);
}
if (my $length = rindex($body, "<br>", $size)) {
return substr($body, 0, $length);
}
if (my $length = rindex($body, "</p>", $size)) {
return substr($body, 0, $length);
}
if (my $length = rindex($body, "\n", $size)) {
return substr($body, 0, $length);
}
if (my $length = rindex($body, ". ", $size)) {
return substr($body, 0, $length + 1);
}
if (my $length = rindex($body, "! ", $size)) {
return substr($body, 0, $length + 1);
}
if (my $length = rindex($body, "? ", $size)) {
return substr($body, 0, $length + 1);
}
return substr($body, 0, $size);
}
sub insert_statement {
my $self = shift;
my $body = mysql_escape($self->{description});
my $teaser = mysql_escape( node_teaser( $self->{description} ) );
return "INSERT INTO node ".
"(type,title,uid,status,comment, promote, users, attributes, revisions, created,teaser,body)".
" VALUES ('$type','".
mysql_escape($self->{title})."',1,1,2,1,'','','',".
"UNIX_TIMESTAMP('".to_mysql_date($self->{created})."'),'$teaser','$body');\n";
}
sub mysql_escape {
my $string = shift;
$string =~ s/\n/\\n/mg;
$string =~ s/(\'|\")/\\$1/g; #" quote to help syntax coloring
return $string;
}
sub to_mysql_date {
my $string = shift;
$string =~ s/T/ /;
$string =~ s/\+00:00$//;
return $string;
}
package main;
my $filename = $ARGV[0];
my $handler = new ConversionFilter();
my $parser = new XML::SAX::ParserFactory->parser(Handler => $handler);
$parser->parse_uri($filename);
Not compatible with Drupal 5?
Hi there,
Am I right in thinking that this script produces MySQL statements which are incompatible with Drupal 5? I have tried to import my Movable Type blog into Drupal 5.1 and MySQL gave this error: Unknown column 'users' in 'field list'
Many thanks,
Jack Kelly
Jack-Kelly.com