=head1 NAME EPrints::Plugin::Import::DOI =cut package EPrints::Plugin::Import::CheckDOI; # 10.1002/asi.20373 use strict; use EPrints::Plugin::Import::TextFile; use URI; our @ISA = qw/ EPrints::Plugin::Import::DOI /; sub input_text_fh { my( $plugin, %opts ) = @_; my @ids; my $pid = $plugin->param( "pid" ); #AKS286 20140228 Added $session for DOI validation checks my $session = $plugin->{repository}; #AKS286 20140228 Added parameters for deciding whether to include the "doi:" prefix on searches and in the database storage, # and the fieldname for storing the doi in the eprints table (in case this has been customised). my $use_prefix = $plugin->param( "use_prefix" ); my $doi_field = $plugin->param( "doi_field" ); $use_prefix = 1 unless defined ( $use_prefix ); $doi_field = "id_number" unless defined ( $doi_field ); unless( $pid ) { $plugin->error( 'You need to configure your pid by setting the `pid\' variable in cfg.d/plugins.pl (see http://www.crossref.org/openurl): $c->{plugins}->{"Import::DOI"}->{params}->{pid} = "ourl_username:password";' ); return undef; } my $fh = $opts{fh}; while( my $doi = <$fh> ) { $doi =~ s/^\s+//; $doi =~ s/\s+$//; next unless length($doi); #AKS286 20140228 Only include prefix if config parameter says so if ( $use_prefix ) { $doi =~ s/^(doi:)?/doi:/i; } else { $doi =~ s/^(doi:)?//i; } #AKS286 20140228 START check and exclude DOI from fetch if DOI already exists in the 'archive' dataset my $sql = "SELECT eprintid, title, ".$doi_field." FROM eprint WHERE eprint_status = 'archive' and ".$doi_field."='".$doi."'"; my $sth = $session->get_database->prepare( $sql ); $session->get_database->execute( $sth, $sql ); if (my ($eprintid, $title, $db_doi) = $sth->fetchrow_array) { $plugin->handler->message( "warning", $plugin->html_phrase( "duplicate_doi", doi => $plugin->{session}->make_text( $db_doi ), msg => $plugin->{session}->make_text( "DOI already exists in repository as item ".$eprintid ) )); next; } #AKS286 20140228 END check and exclude DOI from fetch if DOI already exists in the 'archive' dataset my %params = ( pid => $pid, noredirect => "true", id => $doi, ); my $url = URI->new( "http://www.crossref.org/openurl" ); $url->query_form( %params ); my $dom_doc; eval { $dom_doc = EPrints::XML::parse_url( $url ); }; my $dom_top = $dom_doc->getDocumentElement; my $dom_query_result = ($dom_top->getElementsByTagName( "query_result" ))[0]; if( $@ || !defined $dom_query_result) { $plugin->handler->message( "warning", $plugin->html_phrase( "invalid_doi", doi => $plugin->{session}->make_text( $doi ), msg => $plugin->{session}->make_text( "No or unrecognised response" ) )); next; } my $dom_body = ($dom_query_result->getElementsByTagName( "body" ))[0]; my $dom_query = ($dom_body->getElementsByTagName( "query" ))[0]; my $status = $dom_query->getAttribute( "status" ); if( defined($status) && ($status eq "unresolved" || $status eq "malformed") ) { my $msg = ($dom_query->getElementsByTagName( "msg" ))[0]; $msg = EPrints::Utils::tree_to_utf8( $msg ); $plugin->handler->message( "warning", $plugin->html_phrase( "invalid_doi", doi => $plugin->{session}->make_text( $doi ), msg => $plugin->{session}->make_text( $msg ) )); next; } my $data = { doi => $doi }; foreach my $node ( $dom_query->getChildNodes ) { next if( !EPrints::XML::is_dom( $node, "Element" ) ); my $name = $node->tagName; if( $node->hasAttribute( "type" ) ) { $name .= ".".$node->getAttribute( "type" ); } if( $name eq "contributors" ) { $plugin->contributors( $data, $node ); } else { $data->{$name} = EPrints::Utils::tree_to_utf8( $node ); } } EPrints::XML::dispose( $dom_doc ); my $epdata = $plugin->convert_input( $data ); next unless( defined $epdata ); my $dataobj = $plugin->epdata_to_dataobj( $opts{dataset}, $epdata ); if( defined $dataobj ) { push @ids, $dataobj->get_id; } } return EPrints::List->new( dataset => $opts{dataset}, session => $plugin->{session}, ids=>\@ids ); } sub convert_input { my( $plugin, $data ) = @_; my $epdata = {}; my $use_prefix = $plugin->param( "use_prefix" ); my $doi_field = $plugin->param( "doi_field" ); $use_prefix = 1 unless defined ( $use_prefix ); $doi_field = "id_number" unless defined ( $doi_field ); if( defined $data->{creators} ) { $epdata->{creators} = $data->{creators}; } elsif( defined $data->{author} ) { $epdata->{creators} = [ { name=>{ family=>$data->{author} }, } ]; } if( defined $data->{year} && $data->{year} =~ /^[0-9]{4}$/ ) { $epdata->{date} = $data->{year}; } if( defined $data->{"issn.electronic"} ) { $epdata->{issn} = $data->{"issn.electronic"}; } if( defined $data->{"issn.print"} ) { $epdata->{issn} = $data->{"issn.print"}; } if( defined $data->{"doi"} ) { #AKS286 20140228 Amended code to use field name from config parameter, in case it has been customised. $epdata->{$doi_field} = $data->{"doi"}; my $doi = $data->{"doi"}; $doi =~ s/^\s*doi:\s*//gi; $epdata->{official_url} = "http://dx.doi.org/$doi"; } if( defined $data->{"volume_title"} ) { $epdata->{book_title} = $data->{"volume_title"}; } if( defined $data->{"journal_title"} ) { $epdata->{publication} = $data->{"journal_title"}; } if( defined $data->{"article_title"} ) { $epdata->{title} = $data->{"article_title"}; } if( defined $data->{"series_title"} ) { # not sure how to map this! # $epdata->{???} = $data->{"series_title"}; } if( defined $data->{"isbn"} ) { $epdata->{isbn} = $data->{"isbn"}; } if( defined $data->{"volume"} ) { $epdata->{volume} = $data->{"volume"}; } if( defined $data->{"issue"} ) { $epdata->{number} = $data->{"issue"}; } if( defined $data->{"first_page"} ) { $epdata->{pagerange} = $data->{"first_page"}; } if( defined $data->{"last_page"} ) { $epdata->{pagerange} = "" unless defined $epdata->{pagerange}; $epdata->{pagerange} .= "-" . $data->{"last_page"}; } if( defined $data->{"doi.conference_paper"} ) { $epdata->{type} = "conference_item"; } if( defined $data->{"doi.journal_article"} ) { $epdata->{type} = "article"; } return $epdata; } sub url_encode { my ($str) = @_; $str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg; return $str; } 1; =head1 COPYRIGHT =for COPYRIGHT BEGIN Copyright 2000-2011 University of Southampton. =for COPYRIGHT END =for LICENSE BEGIN This file is part of EPrints L. EPrints is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. EPrints is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with EPrints. If not, see L. =for LICENSE END