=head1 NAME
EPrints::Plugin::Import::DOI
=cut
package EPrints::Plugin::Import::CheckDOI;
# 10.1002/asi.20373
use strict;
use EPrints::Plugin::Import::TextFile;
use URI;
our @ISA = qw/ EPrints::Plugin::Import::DOI /;
sub input_text_fh
{
my( $plugin, %opts ) = @_;
my @ids;
my $pid = $plugin->param( "pid" );
#AKS286 20140228 Added $session for DOI validation checks
my $session = $plugin->{repository};
#AKS286 20140228 Added parameters for deciding whether to include the "doi:" prefix on searches and in the database storage,
# and the fieldname for storing the doi in the eprints table (in case this has been customised).
my $use_prefix = $plugin->param( "use_prefix" );
my $doi_field = $plugin->param( "doi_field" );
$use_prefix = 1 unless defined ( $use_prefix );
$doi_field = "id_number" unless defined ( $doi_field );
unless( $pid )
{
$plugin->error( 'You need to configure your pid by setting the `pid\' variable in cfg.d/plugins.pl (see http://www.crossref.org/openurl): $c->{plugins}->{"Import::DOI"}->{params}->{pid} = "ourl_username:password";' );
return undef;
}
my $fh = $opts{fh};
while( my $doi = <$fh> )
{
$doi =~ s/^\s+//;
$doi =~ s/\s+$//;
next unless length($doi);
#AKS286 20140228 Only include prefix if config parameter says so
if ( $use_prefix )
{
$doi =~ s/^(doi:)?/doi:/i;
}
else
{
$doi =~ s/^(doi:)?//i;
}
#AKS286 20140228 START check and exclude DOI from fetch if DOI already exists in the 'archive' dataset
my $sql = "SELECT eprintid, title, ".$doi_field." FROM eprint WHERE eprint_status = 'archive' and ".$doi_field."='".$doi."'";
my $sth = $session->get_database->prepare( $sql );
$session->get_database->execute( $sth, $sql );
if (my ($eprintid, $title, $db_doi) = $sth->fetchrow_array)
{
$plugin->handler->message( "warning", $plugin->html_phrase( "duplicate_doi",
doi => $plugin->{session}->make_text( $db_doi ),
msg => $plugin->{session}->make_text( "DOI already exists in repository as item ".$eprintid )
));
next;
}
#AKS286 20140228 END check and exclude DOI from fetch if DOI already exists in the 'archive' dataset
my %params = (
pid => $pid,
noredirect => "true",
id => $doi,
);
my $url = URI->new( "http://www.crossref.org/openurl" );
$url->query_form( %params );
my $dom_doc;
eval {
$dom_doc = EPrints::XML::parse_url( $url );
};
my $dom_top = $dom_doc->getDocumentElement;
my $dom_query_result = ($dom_top->getElementsByTagName( "query_result" ))[0];
if( $@ || !defined $dom_query_result)
{
$plugin->handler->message( "warning", $plugin->html_phrase( "invalid_doi",
doi => $plugin->{session}->make_text( $doi ),
msg => $plugin->{session}->make_text( "No or unrecognised response" )
));
next;
}
my $dom_body = ($dom_query_result->getElementsByTagName( "body" ))[0];
my $dom_query = ($dom_body->getElementsByTagName( "query" ))[0];
my $status = $dom_query->getAttribute( "status" );
if( defined($status) && ($status eq "unresolved" || $status eq "malformed") )
{
my $msg = ($dom_query->getElementsByTagName( "msg" ))[0];
$msg = EPrints::Utils::tree_to_utf8( $msg );
$plugin->handler->message( "warning", $plugin->html_phrase( "invalid_doi",
doi => $plugin->{session}->make_text( $doi ),
msg => $plugin->{session}->make_text( $msg )
));
next;
}
my $data = { doi => $doi };
foreach my $node ( $dom_query->getChildNodes )
{
next if( !EPrints::XML::is_dom( $node, "Element" ) );
my $name = $node->tagName;
if( $node->hasAttribute( "type" ) )
{
$name .= ".".$node->getAttribute( "type" );
}
if( $name eq "contributors" )
{
$plugin->contributors( $data, $node );
}
else
{
$data->{$name} = EPrints::Utils::tree_to_utf8( $node );
}
}
EPrints::XML::dispose( $dom_doc );
my $epdata = $plugin->convert_input( $data );
next unless( defined $epdata );
my $dataobj = $plugin->epdata_to_dataobj( $opts{dataset}, $epdata );
if( defined $dataobj )
{
push @ids, $dataobj->get_id;
}
}
return EPrints::List->new(
dataset => $opts{dataset},
session => $plugin->{session},
ids=>\@ids );
}
sub convert_input
{
my( $plugin, $data ) = @_;
my $epdata = {};
my $use_prefix = $plugin->param( "use_prefix" );
my $doi_field = $plugin->param( "doi_field" );
$use_prefix = 1 unless defined ( $use_prefix );
$doi_field = "id_number" unless defined ( $doi_field );
if( defined $data->{creators} )
{
$epdata->{creators} = $data->{creators};
}
elsif( defined $data->{author} )
{
$epdata->{creators} = [
{
name=>{ family=>$data->{author} },
}
];
}
if( defined $data->{year} && $data->{year} =~ /^[0-9]{4}$/ )
{
$epdata->{date} = $data->{year};
}
if( defined $data->{"issn.electronic"} )
{
$epdata->{issn} = $data->{"issn.electronic"};
}
if( defined $data->{"issn.print"} )
{
$epdata->{issn} = $data->{"issn.print"};
}
if( defined $data->{"doi"} )
{
#AKS286 20140228 Amended code to use field name from config parameter, in case it has been customised.
$epdata->{$doi_field} = $data->{"doi"};
my $doi = $data->{"doi"};
$doi =~ s/^\s*doi:\s*//gi;
$epdata->{official_url} = "http://dx.doi.org/$doi";
}
if( defined $data->{"volume_title"} )
{
$epdata->{book_title} = $data->{"volume_title"};
}
if( defined $data->{"journal_title"} )
{
$epdata->{publication} = $data->{"journal_title"};
}
if( defined $data->{"article_title"} )
{
$epdata->{title} = $data->{"article_title"};
}
if( defined $data->{"series_title"} )
{
# not sure how to map this!
# $epdata->{???} = $data->{"series_title"};
}
if( defined $data->{"isbn"} )
{
$epdata->{isbn} = $data->{"isbn"};
}
if( defined $data->{"volume"} )
{
$epdata->{volume} = $data->{"volume"};
}
if( defined $data->{"issue"} )
{
$epdata->{number} = $data->{"issue"};
}
if( defined $data->{"first_page"} )
{
$epdata->{pagerange} = $data->{"first_page"};
}
if( defined $data->{"last_page"} )
{
$epdata->{pagerange} = "" unless defined $epdata->{pagerange};
$epdata->{pagerange} .= "-" . $data->{"last_page"};
}
if( defined $data->{"doi.conference_paper"} )
{
$epdata->{type} = "conference_item";
}
if( defined $data->{"doi.journal_article"} )
{
$epdata->{type} = "article";
}
return $epdata;
}
sub url_encode
{
my ($str) = @_;
$str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
return $str;
}
1;
=head1 COPYRIGHT
=for COPYRIGHT BEGIN
Copyright 2000-2011 University of Southampton.
=for COPYRIGHT END
=for LICENSE BEGIN
This file is part of EPrints L.
EPrints is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
EPrints is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with EPrints. If not, see L.
=for LICENSE END