#!/usr/bin/perl -w

# Using XML::Parser::Lite
# Author: Michel Rodriguez <mirod@xmltwig.com>
#         based on a stub by Josh Narins <josh@large.com>
#         and help from Jeff Gleixner 




use strict;
use XML::Parser::Lite;

# we need to replace the entities as XML::Parser::Lite does not do it
my %ent= ( amp  => '&', quot => '"', apos => "'", lt   => '<', gt   => '>', 
	   xmlpio => "'<?xml'",             # uses &lt;
	   hcro   => "&#x",                 # uses &amp
	   nbsp   => ' ', '#160' => ' ',    # def is commented out in the REC 
         );

open( REC, "<REC-xml-19980210.xml") or die $!;
my $doc;
{
 local $/ = undef;
 $doc=<REC>;
}
close REC;

# load entities, breaks for entities using other entities
while( $doc=~ /<!ENTITY\s+(\w+)\s+(["'])(.*?)\2\s*>/sg)
  { $ent{$1} ||= $3; } # use ||= to avoid redefining entities


my $flags={};
my $parser = new XML::Parser::Lite  Handlers => {
        Start => sub {  
            my ( $p, $el) = @_;
            if    ( $el eq 'rhs' ) { $flags->{in_rhs}=1 }
            elsif ( $el eq 'lhs' ) { $flags->{in_lhs}=1 }
        },
        Char  => sub {
            my ( $p, $txt) = @_;
            if    ($flags->{in_lhs}) {$flags->{lhs} .= $txt}
            elsif ($flags->{in_rhs}) {$flags->{rhs} .= $txt}
        },
        End   => sub {
            my ( $p, $el) = @_;
            if    ( $el eq 'rhs' ) { $flags->{in_rhs}=0; }
            elsif ( $el eq 'lhs' ) { $flags->{in_lhs}=0; }
            elsif ( $el eq 'prod') { push @{$flags->{production}}, 
                                          production(++$flags->{i},$flags->{lhs},$flags->{rhs});
                                     $flags->{lhs}= ''; $flags->{rhs}= '';
                                   }
        }
};
$parser->parse( $doc);

foreach my $prod ( @{$flags->{production}})
  { print clean( $prod), "\n"; }
    
sub production {
  my ($i,$lhs,$rhs) = @_;
  return "[$i] $lhs ::= $rhs";
}


sub clean { 
        my( $string)= @_;

	# replace entities
        $string=~ s{&(.*?);}{$ent{$1} or die "unknown entity $1\n"}eg; 
 
  	$string =~ s{\xc2\xa0}{ }g; # weird characters in the original document
        $string =~ s{\s+}{ }g; $string=~ s{\s$}{};
        return $string;
}

