package EPrints::Plugin::Import::OPFXML;

use strict;

# Declare Namespaces
our $DC_NS = "http://purl.org/dc/elements/1.1/";
our $DCTERMS_NS = "http://purl.org/dc/terms/";
our $OPF_NS = "http://www.idpf.org/2007/opf";

# This is just a variant of the DefaultXML plug-in
use EPrints::Plugin::Import::DefaultXML;

our @ISA = qw/ EPrints::Plugin::Import::DefaultXML /;

sub new
{
	my( $class, %params ) = @_;

	my $self = $class->SUPER::new(%params);

	$self->{name} = "OPF Resource";
# Make it visible on the import menu and elsewhere
	$self->{visible} = "all";
	$self->{produce} = [ 'list/eprint', 'dataobj/eprint' ];

# Functionality to recognise XML types on import by recognising the base namespace, works with the sword packaging format, dc:conformsTo or similar.
	$self->{xmlns} = "http://www.idpf.org/2007/opf";

	my $rc = EPrints::Utils::require_if_exists("MIME::Types");

	unless( $rc )
	{
		$self->{visible} = "";
		$self->{error} = "Failed to load required module MIME::Types";
	}

	return $self;
}

# Input File Handle Method, for when files are uploaded
sub input_fh
{
	my( $plugin, %opts ) = @_;

	my $fh = $opts{"fh"};

	my $xml = join "", <$fh>;

	my $list;

	if( $xml =~ /^<\?xml/ )
	{
		$list = $plugin->input_fh_xml( $xml, %opts );
	}
	else
	{
		$list = $plugin->input_fh_list( $xml, %opts );
	}

	$list ||= EPrints::List->new(
			dataset => $opts{dataset},
			session => $plugin->{session},
			ids => [] );

	return $list;
}

# Handle direct XML input
sub input_fh_xml
{
	my( $plugin, $xml, %opts ) = @_;

	my $doc = EPrints::XML::parse_xml_string( $xml );

	my $dataobj = $plugin->xml_to_dataobj( $opts{dataset}, $doc->documentElement );

	EPrints::XML::dispose( $doc );

	return EPrints::List->new(
			dataset => $opts{dataset},
			session => $plugin->{session},
			ids => [defined($dataobj) ? $dataobj->get_id : ()] );
}

# Go grab input from a URL
sub input_fh_list
{
	my( $plugin, $url, %opts ) = @_;

	$url =~ s/\s+//g;

	my $tmpfile = File::Temp->new;

	my $r = EPrints::Utils::wget( $plugin->{session}, $url, $tmpfile );
	seek($tmpfile,0,0);

	if( $r->is_error )
	{
		$plugin->error( "Error reading resource from $url: ".$r->code." ".$r->message );
		return;
	}

	my @ids;

	while(my $url = <$tmpfile>)
	{
		$url =~ s/\s+//g;
		next unless $url =~ /^http/;

		my $doc;
		eval { $doc = EPrints::XML::parse_url( $url ) };
		if( $@ )
		{
			$plugin->warning( "Error parsing: $url\n" );
		}

		my $dataobj = $plugin->xml_to_dataobj( $opts{dataset}, $doc->documentElement );

		EPrints::XML::dispose( $doc );

		if( defined $dataobj )
		{
			push @ids, $dataobj->get_id;
		}
	}

	return EPrints::List->new(
			dataset => $opts{dataset},
			session => $plugin->{session},
			ids => \@ids );
}
# Translate this XML into an EPrint
sub xml_to_dataobj
{
# $xml is the PubmedArticle element
	my( $plugin, $dataset, $xml ) = @_;

	my $session = $plugin->{session};

# Locate the metadata element
	my $metadata = $xml->getElementsByTagNameNS( $OPF_NS, "metadata" )->[0];

# Load the DC plugin
	my $dc_plugin = $session->plugin( "Import::XSLT::DC",
			processor => $plugin->{processor},
			dataset => $dataset,
			);

	$dc_plugin->{Handler} = $plugin->{Handler};
	$dc_plugin->{parse_only} = $plugin->{parse_only};

# Spew the metadata element to a temp file
	my $tmpfile2 = File::Temp->new;
	print $tmpfile2 $metadata->toString();
	seek($tmpfile2,0,0);

# Parse the file using the plug-in to get back a list of eprints
	my $list = $dc_plugin->input_fh( fh => $tmpfile2, dataset => $dataset );

	my( $eprint ) = $list->get_records( 0, 1 );

	return $eprint;
}

1;