XML, the Perl Way

Processing XML with Perl Michel Rodriguez

Introduction to XML::DOM

XML::Twig

Example: a (quasi!) HTML converter

#!/bin/perl -w
use strict;
use XML::DOM;

my $dom = new XML::DOM::Parser;                         # create the DOM object
my $doc= $dom->parsefile( "wine.xml");                  # parse the document
my $catalog= $doc->getDocumentElement;                  # get the root
$catalog->setTagName( 'html');                          # NOT part of the DOM spec!
# process class (becomes a div)
my @classes= $doc->getElementsByTagName( 'class');      # VERY useful method
foreach my $class (@classes)
  { # get the name
    my $name= $class->getAttribute( 'name');            # attribute processing
    $class->removeAttribute ('name');
    insert_before( $doc, $class, 'h2', $name);          # you WILL write that kind of subroutine
    $class->setTagName( 'div');
  }
# process category (becomes a table)
my @categories= $doc->getElementsByTagName( 'category');
foreach my $category (@categories)
  { # get the name
    my $name= $category->getAttribute( 'name');
    $category->removeAttribute ('name');
    insert_before( $doc, $category, 'p', $name);
    $category->setTagName( 'table');
  }
# process item (becomes a row) and remove stock
my @items= $doc->getElementsByTagName( 'item');
foreach my $item (@items)
  { $item->setTagName( 'tr'); 
   my @stocks= $item->getElementsByTagName( 'stock');   # can be called on an element too
   foreach my $stock (@stocks)
     { $item->removeChild( $stock); }
  }
# process fields (become cells)
foreach my $field ( qw(winery type year rating price))
  { my @items= $doc->getElementsByTagName( $field);
    foreach my $item (@items)
      { $item->setTagName( 'td'); }
  }

print $doc->toString;
exit;

sub insert_before
  { my( $doc, $elt, $tag, $text)= @_;
    # create the element
    my $new_elt= $doc->createElement( $tag);            # elements are created in a document
    my $new_elt_pcdata=  $doc->createTextNode( $text);
    $new_elt->appendChild( $new_elt_pcdata);
    # get the parent and insert the new_elt
    my $parent= $elt->getParentNode;
    $parent->insertBefore( $new_elt, $elt);             # you need the parent to insert a child
  }


Introduction to XML::DOM

XML::Twig