#!/usr/bin/perl -w
# Using XML::Stream::Parser
# author: Ryan Eatmon <reatmon@mail.com>
# modified by mirod <mirod@cpan.org>

use strict;

use XML::Stream::Parser;

my( $in_lhs, $lhs, $in_rhs, $rhs, $counter);

my $parser =
    new XML::Stream::Parser(handlers =>
                                {   startElement => \&start_element,
                                    characters   => \&characters,
                                    endElement   => \&end_element,
                                }
                           );

# a kludge to filter out UTF-8 related warnings
$SIG{__WARN__}= sub { print STDERR @_ unless( $_[0]=~/^(Malformed UTF-8 character|utf8)/); };

$parser->parsefile( 'REC-xml-19980210.xml');


exit;
sub start_element
{
    my $parser = shift;
    my ($tag,%att) = @_;

    if ($tag eq 'rhs')     { $in_rhs = 1; }
    elsif ($tag eq 'lhs')  { $in_lhs = 1; }
    elsif ($tag eq 'prod') { $rhs = ''; $lhs = ''; }
}

sub end_element
{
    my $parser = shift;
    my ($tag) = @_;

    if ($tag eq 'rhs')
    {
        $in_rhs = undef;
        }
    elsif ($tag eq 'lhs')
    {
        $in_lhs = undef;
        }
    elsif ($tag eq 'prod')
    {
        $counter++;
        my $prod = "[" . $counter. "] " . $lhs ." ::= " .$rhs ;
        $prod = clean( $prod );
        print $prod,"\n";
        }
}

sub characters
{
    my $parser = shift;
    my ($cdata) = @_;

    if ($in_lhs)
    {
        $lhs .= $cdata;
        }
    elsif ($in_rhs)
    {
        $rhs .= $cdata;
        }
}

sub clean
{
    my($s) = @_;
    $s=~ s/\xc2\xa0/ /sg;

    $s=~ s{\&nbsp;}{ }g;
    $s=~ s{&pic;}{'?>'}g;
    $s=~ s{&xmlpio;}{'<?xml'}g;
    $s=~ s{&hcro;}{&#x}g;

    $s=~ s/\s+/ /g; 
    $s=~ s{\s$}{};
    return $s;
}


