package EPrints::Plugin::Export::TweetStream::GraphML;
@ISA = ( "EPrints::Plugin::Export" );
use strict;
use warnings;
sub new
{
my( $class, %opts ) = @_;
my( $self ) = $class->SUPER::new( %opts );
$self->{name} = "NodeXL (GraphML)";
$self->{accept} = [ 'dataobj/tweetstream' ];
$self->{visible} = "all";
$self->{suffix} = ".graphml";
$self->{mimetype} = "application/graphml+xml";
return $self;
}
sub output_dataobj
{
my( $plugin, $dataobj, %opts ) = @_;
my $repository = $dataobj->repository;
#this is only for little sets of tweets
if
(
$dataobj->value('tweet_count') > $repository->config('tweepository_export_threshold')
)
{
my $msg =
"Disallowing GraphML export of Tweetstream " . $dataobj->id .
" with " . $dataobj->value('tweet_count') . " tweets ";
$repository->log($msg);
if( defined $opts{fh} )
{
print {$opts{fh}} $msg;
return;
}
return $msg;
}
my $data = {
max_tweet_count => 0,
max_mention_count => 0,
tweeters => {},
};
$dataobj->tweets->map(sub
{
my ($repository, $dataset, $tweet, $data) = @_;
aggregate_tweet($tweet, $data);
}, $data);
my $r = graphML_head();
foreach my $handle (keys %{$data->{tweeters}})
{
$r .= tweeter_data_to_graphML($data->{tweeters}->{$handle}, $data->{max_tweet_count}, $data->{max_mention_count});
}
$r .= graphML_tail();
if( defined $opts{fh} )
{
print {$opts{fh}} $r;
return;
}
return $r;
}
sub tweeter_data_to_graphML
{
my ($tweeter, $max_tweet_count, $max_mention_count) = @_;
my $r;
$r .= '';
my $data_elements =
{
'V-Size' => generate_size($tweeter->{tweet_count}, $max_tweet_count, 100),
'V-Image File' => ($tweeter->{profile_image_url} ? $tweeter->{profile_image_url} : '') ,
'V-Custom Menu Item Text' => 'Open Twitter Page for This Person',
'V-Custom Menu Item Action' => 'http://twitter.com/'.$tweeter->{handle},
'V-Label' => $tweeter->{handle},
'V-Tooltip' => generate_tweeter_tooltip($tweeter)
};
foreach my $k (keys %{$data_elements})
{
next unless $data_elements->{$k}; #don't create empty tags
$r .= "";
$r .= $data_elements->{$k};
$r .= '';
}
$r .= "\n";
foreach my $handle (keys %{$tweeter->{mentions}})
{
$r .= '';
$r .= '' . generate_size($tweeter->{mentions}->{$handle}, $max_mention_count, 5) . '';
$r .= "\n";
}
return $r;
}
sub generate_tweeter_tooltip
{
my ($tweeter_data) = @_;
my @parts;
push @parts, '@' . $tweeter_data->{handle};
push @parts, ' - author of ' . $tweeter_data->{tweet_count} . ' tweets';
push @parts, ' - mentioned in ' . $tweeter_data->{mention_count} . ' tweets';
return join("\n",@parts);
}
sub generate_size
{
my ($n, $max_n, $return_max) = @_;
return 1 if $n == 0;
my $percentage = $n / $max_n;
my $return = int ($return_max * $percentage);
$return++ unless $return; #we don't want to return 0
return $return;
}
sub aggregate_tweet
{
my ($tweet, $data) = @_;
my $tweeter_handle = $tweet->value('from_user');
return unless $tweeter_handle;
my $tweeter = $data->{tweeters}->{$tweeter_handle};
if (
!defined $tweeter or #uninitialised -- we know nothing about this tweeter
$tweeter->{tweet_count} == 0 #we have very little data - previously mentioned, but no tweets yet
)
{
$tweeter = tweet_to_tweeter_data($tweet, $tweeter);
$data->{tweeters}->{$tweeter_handle} = $tweeter;
}
$tweeter->{tweet_count}++;
if ($tweeter->{tweet_count} > $data->{max_tweet_count})
{
$data->{max_tweet_count} = $tweeter->{tweet_count};
}
my $tweetees = $tweet->value('tweetees');
foreach my $tweetee (@{$tweetees})
{
$tweetee =~ s/^\@//;
$tweeter->{mentions}->{$tweetee}++;
if ($tweeter->{mentions}->{$tweetee} > $data->{max_mention_count})
{
$data->{max_mention_count} = $tweeter->{mentions}->{$tweetee};
}
if (!defined $data->{tweeters}->{$tweetee})
{
$data->{tweeters}->{$tweetee} =
{
'handle' => $tweetee,
'tweet_count' => 0,
'mention_count' => 0,
}
}
$data->{tweeters}->{$tweetee}->{mention_count}++;
}
}
sub tweet_to_tweeter_data
{
my ($tweet, $tweeter) = @_;
$tweeter = {} unless $tweeter;
$tweeter->{handle} = $tweet->value('from_user');
$tweeter->{profile_image_url} = $tweet->value('profile_image_url');
$tweeter->{mention_count} = 0 unless $tweeter->{mention_count};
$tweeter->{tweet_count} = 0 unless $tweeter->{tweet_count};
return $tweeter;
}
sub graphML_head
{
return <
END
}
sub graphML_tail
{
return <
TAIL
}
1;