=head1 NAME EPrints::Plugin::Import::Youtube =cut package EPrints::Plugin::Import::Youtube; use EPrints::Plugin::Import; @ISA = qw( EPrints::Plugin::Import ); use strict; sub new { my( $class, %opts ) = @_; my $self = $class->SUPER::new( %opts ); $self->{name} = "Youtube"; $self->{produce} = [qw( dataobj/eprint dataobj/document )]; $self->{accept} = [qw( )]; $self->{advertise} = 1; return $self; } sub input_fh { my( $self, %opts ) = @_; my $fh = $opts{"fh"}; my $url = join "", <$fh>; my $dataset = $opts{dataset}; my $dataobj = $self->getPubFromYoutube($url,$dataset); my @ids; push @ids, $dataobj->id if $dataobj; return EPrints::List->new( session => $self->{session}, dataset => $opts{dataset}, ids => \@ids ); } sub getPubFromYoutube { my ( $self, $url, $dataset ) = @_; my $repo = $self->{repository}; my $title = $self->getYouTubeTitle($url); my $filename = $self->getYouTubeFilename($url); my $epdata = $self->getDataFromWebSite($url); $epdata->{title} = $title; $epdata->{official_url} = $url; my $dataobj = $self->epdata_to_dataobj( $dataset, $epdata ); $dataobj = $self->addVideoToEPrint($dataobj,$url,$filename); return $dataobj; } sub getDataFromWebSite { my ( $self, $url ) = @_; my $repo = $self->{repository}; my $epdata = (); $epdata->{type} = "video"; $epdata->{output_media} = "Video"; $epdata->{publisher} = "Youtube"; $epdata->{ispublished} = "pub"; $epdata->{date_type} = "published"; use HTTP::Request::Common qw(GET); my $ua = LWP::UserAgent->new; my $req = GET $url; my $res = $ua->request($req); if ($res->is_success) { my $content = $res->content; $epdata->{keywords} = HTML::Entities::decode_entities($self->getKeywords($content)); $epdata->{abstract} = HTML::Entities::decode_entities($self->getDescription($content)); $epdata->{creators}->[0]->{id} = HTML::Entities::decode_entities($self->getAuthorID($content)); $epdata->{creators}->[0]->{name}->{family} = HTML::Entities::decode_entities($self->getAuthorSurname($content)); # $epdata->{creators}->[0]->{name}->{given} = $self->getAuthorFirstname($content); $epdata->{date} = $self->getVideoDate($content); } return $epdata; } sub getAuthorID { my ( $self, $data ) = @_; my $id = substr($data,index($data,'Uploaded by')+11,length($data)); $id = substr($id,0,index($id,'')); $id = substr($id,index($id,'')); $id = substr($id,index($id,'">')+2,length($id)); return $id; } sub getVideoDate { my ( $self, $data ) = @_; my $length = length(''); my $id = substr($data,index($data,'')+$length,length($data)); $id = substr($id,0,index($id,'')); use POSIX qw/strftime/; use Time::Piece; my $time = Time::Piece->strptime($id, "%d %b %Y"); return $time->strftime('%Y-%m-%d'); } sub getDescription { my ( $self, $data ) = @_; my $repo = $self->{repository}; my $description = substr($data,index($data,'

')+25,length($data)); $description = substr($description,0,index($description,'

')); return $description; } sub getKeywords { my ( $self, $data ) = @_; my $repo = $self->{repository}; my $keywords = substr($data,index($data,'{repository}; my $output = File::Temp->new( SUFFIX => ".txt"); $repo->exec( "youtube-title", VIDURL => $url, OUTPUT => $output, ); open FILE, "<", $output; my $title = ; close FILE; return $title; } sub getYouTubeFilename { my ( $self, $url ) = @_; my $repo = $self->{repository}; my $output = File::Temp->new( SUFFIX => ".txt"); $repo->exec( "youtube-filename", VIDURL => $url, OUTPUT => $output, ); open FILE, "<", $output; my $filename = ; close FILE; return $filename; } sub addVideoToEPrint { my ( $self, $dataobj, $url, $filename ) = @_; $filename = EPrints->system->sanitise( $filename ); $filename =~ s!/!_!g; my $repo = $self->{repository}; my $suffix = substr($filename,rindex($filename,"."),length($filename)); $suffix =~ tr/[a-z][A-Z][0-9]/ /c; $suffix =~ s/ //g; $suffix = "." . $suffix; $filename = substr($filename,0,index($filename,".")) . $suffix; my $tmpdir = File::Spec->tmpdir; my $output = File::Temp::tempnam($tmpdir,"youtube"); my $tempfilename = $output; if (!defined $dataobj) { return $dataobj; } my $document = $dataobj->create_subdataobj( "documents", { format => "video", }); $repo->exec( "youtube-dl", VIDURL => $url, OUTPUT => $tempfilename, ); my $count = 0; my $filesize = -s $tempfilename; while (($filesize < 1) and ($count < 10)) { sleep(2); $filesize = -s $tempfilename; $count += 1; } if ($filesize < 1) { print STDERR "FILESIZE $tempfilename $filesize $count\n"; return $dataobj; } my $success = $document->add_file( $tempfilename, $filename ); unlink( $tempfilename ); $document->commit; if ($success) { $document->set_main( $filename ); } $document->commit; return $dataobj; } 1; =head1 COPYRIGHT =for COPYRIGHT BEGIN Copyright 2000-2011 University of Southampton. =for COPYRIGHT END =for LICENSE BEGIN This file is part of EPrints L. EPrints is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. EPrints is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with EPrints. If not, see L. =for LICENSE END