#!/usr/bin/perl -w
# Using XML::TokeParser
# Author: D.H. http://search.cpan.org/author/PODMASTER
# additional comments by Michel Rodriguez

use strict;

use XML::TokeParser;

my $file = 'REC-xml-19980210.xml';
my $i = 0;
my $p = XML::TokeParser->new($file);


my $Ret = "";

# go through the document, reading tokens
while(defined(my $t = $p->get_token() )){
    if($t->[0] eq 'S' and $t->[1] eq 'lhs') {
        # found the start tag for an 'lhs' element: get its text
        $i++;
        $Ret = join '', "[$i] ", $p->get_text('/lhs'), " ::= ";
    }elsif( $t->[0] eq 'S' and $t->[1] eq 'rhs'){
        # start tag for a 'rh's element: get its text
        $Ret .= $p->get_text('/rhs');
    }elsif($t->[0] eq 'E' and $t->[1] eq 'prod'){
        # end tag for a 'prod' element: output the rule 
        print clean($Ret),"\n";
        $Ret = "";
    }
}

undef $Ret;
undef $p;

## mirod already did this, so I'm borrowing

sub clean { 
        my( $string)= @_;
        $string =~ s/\xc2\xa0/ /sg;
        $string =~ s/\s+/ /g; $string=~ s{\s$}{}g;
        return $string;
}
