=head1 NAME
EPrints::XML::SAX::Writer
=cut
package EPrints::XML::SAX::Writer;
=pod
Based on XML::SAX::Writer by:
Robin Berjon, robin@knowscape.com
=cut
use vars qw( %DEFAULT_ESCAPE %COMMENT_ESCAPE $ESCAPE_REGEX $COMMENT_ESCAPE_REGEX );
%DEFAULT_ESCAPE = (
'&' => '&',
'<' => '<',
'>' => '>',
'"' => '"',
"'" => ''',
);
%COMMENT_ESCAPE = (
'--' => '--',
);
$ESCAPE_REGEX = join( '|', map { $_ = "\Q$_\E" } keys %DEFAULT_ESCAPE );
$ESCAPE_REGEX = qr/$ESCAPE_REGEX/;
$COMMENT_ESCAPE_REGEX = join( '|', map { $_ = "\Q$_\E" } keys %COMMENT_ESCAPE );
$COMMENT_ESCAPE_REGEX = qr/$COMMENT_ESCAPE_REGEX/;
use strict;
use XML::NamespaceSupport qw();
sub new
{
my( $class, %self ) = @_;
$self{Output} ||= *{STDOUT}{IO};
$self{QuoteCharacter} ||= "'";
if( ref($self{Output}) eq "SCALAR" )
{
$self{_output} = sub { push @{$self{_buffer}}, $_[1] };
}
else
{
$self{_output} = sub { print {$self{Output}} $_[1] };
}
return bless \%self, $class;
}
sub start_document
{
my( $self, $data ) = @_;
$self->{_o} = [];
$self->{NSDecl} = [];
$self->{NSHelper} = XML::NamespaceSupport->new({ xmlns => 1, fatal_errors => 0 });
$self->{NSHelper}->push_context;
}
sub end_document
{
my( $self, $data ) = @_;
if( ref($self->{Output}) eq "SCALAR" )
{
${$self->{Output}} = join '', @{$self->{_buffer}};
}
# we may need to do a little more here
$self->{NSHelper}->pop_context;
}
sub start_element {
my( $self, $data ) = @_;
$self->_output_element;
my $attr = $data->{Attributes};
# fix the namespaces and prefixes of what we're receiving, in case
# something is wrong
if( $data->{NamespaceURI} )
{
my $uri = $self->{NSHelper}->getURI($data->{Prefix}) || '';
# ns has precedence
if ($uri ne $data->{NamespaceURI})
{
$data->{Prefix} = $self->{NSHelper}->getPrefix($data->{NamespaceURI}); # random, but correct
$data->{Name} = $data->{Prefix} ? "$data->{Prefix}:$data->{LocalName}" : "$data->{LocalName}";
}
}
elsif ($data->{Prefix}) { # we can't have a prefix and no NS
$data->{Name} = $data->{LocalName};
$data->{Prefix} = '';
}
# create a hash containing the attributes so that we can ensure there is
# no duplication. Also, we check that ns are properly declared, that the
# Name is good, etc...
my %attr_hash;
foreach my $at (values %$attr)
{
next unless length $at->{Name}; # people have trouble with autovivification
if( $at->{NamespaceURI} )
{
my $uri = $self->{NSHelper}->getURI( $at->{Prefix} );
warn "Well formed error: prefix '$at->{Prefix}' is not bound to any URI" unless defined $uri;
# ns has precedence
if( defined $uri and $uri ne $at->{NamespaceURI} )
{
$at->{Prefix} = $self->{NSHelper}->getPrefix( $at->{NamespaceURI} ); # random, but correct
$at->{Name} = $at->{Prefix} ? "$at->{Prefix}:$at->{LocalName}" : "$at->{LocalName}";
}
}
elsif ($at->{Prefix}) { # we can't have a prefix and no NS
$at->{Name} = $at->{LocalName};
$at->{Prefix} = '';
}
$attr_hash{$at->{Name}} = $at->{Value};
}
foreach my $nd (@{$self->{NSDecl}})
{
if ($nd->{Prefix})
{
$attr_hash{'xmlns:' . $nd->{Prefix}} = $nd->{NamespaceURI};
}
else
{
$attr_hash{'xmlns'} = $nd->{NamespaceURI};
}
}
$self->{NSDecl} = [];
# buffer the element opening tag
my @output;
push @output, "<", $data->{Name};
while(my( $k, $v ) = each %attr_hash)
{
push @output, " ", $k, "=", $self->{QuoteCharacter}, $self->escape( $v ), $self->{QuoteCharacter};
}
$self->{BufferElement} = join '', @output;
$self->{NSHelper}->push_context;
}
sub end_element
{
my( $self, $data ) = @_;
if( exists $self->{BufferElement} )
{
$self->output( delete($self->{BufferElement}) . ' />' );
}
else
{
$self->output( '' . $data->{Name} . '>' );
}
$self->{NSHelper}->pop_context;
}
sub characters
{
my( $self, $data ) = @_;
$self->_output_element;
my $char = $data->{Data};
if( $self->{InCDATA} )
{
# we must scan for ]]> in the CDATA and escape it if it
# is present by close--opening
# we need to have buffer text in front of this...
$char = join ']]>]]<', $char;
}
else
{
$char = $self->escape( $char );
}
$self->output( $char );
}
sub start_prefix_mapping
{
my( $self, $data ) = @_;
push @{$self->{NSDecl}}, $data;
$self->{NSHelper}->declare_prefix($data->{Prefix}, $data->{NamespaceURI});
}
sub end_prefix_mapping
{
}
sub processing_instruction
{
my( $self, $data ) = @_;
$self->_output_element;
$self->_output_dtd;
$self->output( "$data->{Target} $data->{Data}?>" );
}
sub ignorable_whitespace
{
my( $self, $data ) = @_;
$self->_output_element;
$self->output( $data->{Data} );
}
sub skipped_entity
{
my( $self, $data ) = @_;
$self->_output_element;
$self->_output_dtd;
my $ent;
if ($data->{Name} =~ m/^%/) {
$ent = $data->{Name} . ';';
} elsif ($data->{Name} eq '[dtd]') {
# ignoring
} else {
$ent = '&' . $data->{Name} . ';';
}
$self->output( $ent );
}
sub notation_decl
{
my( $self, $data ) = @_;
$self->_output_dtd;
# I think that param entities are normalized before this
my $not = " {Name};
if ($data->{PublicId} and $data->{SystemId}) {
$not .= ' PUBLIC \'' . $self->escape($data->{PublicId}) . '\' \'' . $self->escape($data->{SystemId}) . '\'';
}
elsif ($data->{PublicId}) {
$not .= ' PUBLIC \'' . $self->escape($data->{PublicId}) . '\'';
}
else {
$not .= ' SYSTEM \'' . $self->escape($data->{SystemId}) . '\'';
}
$not .= " >\n";
$self->output( $not );
}
sub unparsed_entity_decl
{
my( $self, $data ) = @_;
$self->_output_dtd;
# I think that param entities are normalized before this
my $ent = " {Name};
if ($data->{PublicId}) {
$ent .= ' PUBLIC \'' . $self->escape($data->{PublicId}) . '\' \'' . $self->escape($data->{SystemId}) . '\'';
}
else {
$ent .= ' SYSTEM \'' . $self->escape($data->{SystemId}) . '\'';
}
$ent .= " NDATA $data->{Notation} >\n";
$self->output( $ent );
}
sub element_decl
{
my( $self, $data ) = @_;
$self->_output_dtd;
# I think that param entities are normalized before this
my $eld = " {Name} . ' ' . $data->{Model} . " >\n";
$self->output( $eld );
}
sub attribute_decl
{
my( $self, $data ) = @_;
$self->_output_dtd;
# to be backward compatible with Perl SAX 2.0
$data->{Mode} = $data->{ValueDefault}
if not(exists $data->{Mode}) and exists $data->{ValueDefault};
# I think that param entities are normalized before this
my $atd = " {eName} . ' ' . $data->{aName} . ' ';
$atd .= $data->{Type} . ' ' . $data->{Mode} . ' ';
$atd .= $data->{Value} . ' ' if $data->{Value};
$atd .= " >\n";
$self->output( $atd );
}
sub internal_entity_decl
{
my( $self, $data ) = @_;
$self->_output_dtd;
# I think that param entities are normalized before this
my $ent = " {Name} . ' \'' . $self->escape($data->{Value}) . "' >\n";
$self->output( $ent );
}
sub external_entity_decl
{
my( $self, $data ) = @_;
$self->_output_dtd;
# I think that param entities are normalized before this
my $ent = " {Name};
if ($data->{PublicId}) {
$ent .= ' PUBLIC \'' . $self->escape($data->{PublicId}) . '\' \'' . $self->escape($data->{SystemId}) . '\'';
}
else {
$ent .= ' SYSTEM \'' . $self->escape($data->{SystemId}) . '\'';
}
$ent .= " >\n";
$self->output( $ent );
}
sub comment
{
my( $self, $data ) = @_;
$self->_output_element;
$self->_output_dtd;
$self->output( '' );
}
sub start_dtd
{
my( $self, $data ) = @_;
my $dtd = '{Name};
if ($data->{PublicId}) {
$dtd .= ' PUBLIC \'' . $self->escape($data->{PublicId}) . '\' \'' . $self->escape($data->{SystemId}) . '\'';
}
elsif ($data->{SystemId}) {
$dtd .= ' SYSTEM \'' . $self->escape($data->{SystemId}) . '\'';
}
$self->{BufferDTD} = $dtd;
}
sub end_dtd
{
my( $self, $data ) = @_;
my $dtd;
if( defined(delete $self->{BufferDTD}) )
{
$dtd = $self->{BufferDTD} . ' >';
}
else
{
$dtd = ' ]>';
}
$self->output( $dtd );
}
sub start_cdata
{
my( $self, $data ) = @_;
$self->_output_element;
$self->{InCDATA} = 1;
$self->output( '{InCDATA} = 0;
$self->output( ']]>' );
}
sub start_entity
{
my( $self, $data ) = @_;
$self->_output_element;
$self->_output_dtd;
my $ent;
if ($data->{Name} eq '[dtd]') {
# we ignore the fact that we're dealing with an external
# DTD entity here, and prolly shouldn't write the DTD
# events unless explicitly told to
# this will prolly change
}
elsif ($data->{Name} =~ m/^%/) {
$ent = $data->{Name} . ';';
}
else {
$ent = '&' . $data->{Name} . ';';
}
$self->output( $ent );
}
sub end_entity
{
my( $self, $data ) = @_;
# depending on what is done above, we might need to do sth here
}
### SAX1 stuff ######################################################
sub xml_decl
{
my( $self, $data ) = @_;
# version info is compulsory, contrary to what some seem to think
# also, there's order in the pseudo-attr
my $xd = '';
if ($data->{Version}) {
$xd .= "{Version}'";
if ($data->{Encoding}) {
$xd .= " encoding='$data->{Encoding}'";
}
if ($data->{Standalone}) {
$xd .= " standalone='$data->{Standalone}'";
}
$xd .= '?>';
}
$self->output( $xd );
}
sub _output_element
{
my( $self ) = @_;
if( exists $self->{BufferElement} )
{
$self->output( delete($self->{BufferElement}) . '>' );
}
}
sub _output_dtd
{
my( $self ) = @_;
if( exists $self->{BufferDTD} )
{
$self->output( delete($self->{BufferDTD}) . " [\n" );
}
}
sub escape
{
my( $self, $str ) = @_;
$str =~ s/($ESCAPE_REGEX)/$DEFAULT_ESCAPE{$1}/oge;
return $str;
}
sub escape_comment
{
my( $self, $str ) = @_;
$str =~ s/($COMMENT_ESCAPE_REGEX)/$COMMENT_ESCAPE{$1}/oge;
return $str;
}
sub output
{
&{$_[0]->{_output}};
}
1;
=head1 COPYRIGHT
=for COPYRIGHT BEGIN
Copyright 2000-2011 University of Southampton.
=for COPYRIGHT END
=for LICENSE BEGIN
This file is part of EPrints L.
EPrints is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
EPrints is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with EPrints. If not, see L.
=for LICENSE END