package EPrints::Plugin::Export::TweetStream::GraphML; @ISA = ( "EPrints::Plugin::Export" ); use strict; use warnings; sub new { my( $class, %opts ) = @_; my( $self ) = $class->SUPER::new( %opts ); $self->{name} = "NodeXL (GraphML)"; $self->{accept} = [ 'dataobj/tweetstream' ]; $self->{visible} = "all"; $self->{suffix} = ".graphml"; $self->{mimetype} = "application/graphml+xml"; return $self; } sub output_dataobj { my( $plugin, $dataobj, %opts ) = @_; my $repository = $dataobj->repository; #this is only for little sets of tweets if ( $dataobj->value('tweet_count') > $repository->config('tweepository_export_threshold') ) { my $msg = "Disallowing GraphML export of Tweetstream " . $dataobj->id . " with " . $dataobj->value('tweet_count') . " tweets "; $repository->log($msg); if( defined $opts{fh} ) { print {$opts{fh}} $msg; return; } return $msg; } my $data = { max_tweet_count => 0, max_mention_count => 0, tweeters => {}, }; $dataobj->tweets->map(sub { my ($repository, $dataset, $tweet, $data) = @_; aggregate_tweet($tweet, $data); }, $data); my $r = graphML_head(); foreach my $handle (keys %{$data->{tweeters}}) { $r .= tweeter_data_to_graphML($data->{tweeters}->{$handle}, $data->{max_tweet_count}, $data->{max_mention_count}); } $r .= graphML_tail(); if( defined $opts{fh} ) { print {$opts{fh}} $r; return; } return $r; } sub tweeter_data_to_graphML { my ($tweeter, $max_tweet_count, $max_mention_count) = @_; my $r; $r .= ''; my $data_elements = { 'V-Size' => generate_size($tweeter->{tweet_count}, $max_tweet_count, 100), 'V-Image File' => ($tweeter->{profile_image_url} ? $tweeter->{profile_image_url} : '') , 'V-Custom Menu Item Text' => 'Open Twitter Page for This Person', 'V-Custom Menu Item Action' => 'http://twitter.com/'.$tweeter->{handle}, 'V-Label' => $tweeter->{handle}, 'V-Tooltip' => generate_tweeter_tooltip($tweeter) }; foreach my $k (keys %{$data_elements}) { next unless $data_elements->{$k}; #don't create empty tags $r .= ""; $r .= $data_elements->{$k}; $r .= ''; } $r .= "\n"; foreach my $handle (keys %{$tweeter->{mentions}}) { $r .= ''; $r .= '' . generate_size($tweeter->{mentions}->{$handle}, $max_mention_count, 5) . ''; $r .= "\n"; } return $r; } sub generate_tweeter_tooltip { my ($tweeter_data) = @_; my @parts; push @parts, '@' . $tweeter_data->{handle}; push @parts, ' - author of ' . $tweeter_data->{tweet_count} . ' tweets'; push @parts, ' - mentioned in ' . $tweeter_data->{mention_count} . ' tweets'; return join("\n",@parts); } sub generate_size { my ($n, $max_n, $return_max) = @_; return 1 if $n == 0; my $percentage = $n / $max_n; my $return = int ($return_max * $percentage); $return++ unless $return; #we don't want to return 0 return $return; } sub aggregate_tweet { my ($tweet, $data) = @_; my $tweeter_handle = $tweet->value('from_user'); return unless $tweeter_handle; my $tweeter = $data->{tweeters}->{$tweeter_handle}; if ( !defined $tweeter or #uninitialised -- we know nothing about this tweeter $tweeter->{tweet_count} == 0 #we have very little data - previously mentioned, but no tweets yet ) { $tweeter = tweet_to_tweeter_data($tweet, $tweeter); $data->{tweeters}->{$tweeter_handle} = $tweeter; } $tweeter->{tweet_count}++; if ($tweeter->{tweet_count} > $data->{max_tweet_count}) { $data->{max_tweet_count} = $tweeter->{tweet_count}; } my $tweetees = $tweet->value('tweetees'); foreach my $tweetee (@{$tweetees}) { $tweetee =~ s/^\@//; $tweeter->{mentions}->{$tweetee}++; if ($tweeter->{mentions}->{$tweetee} > $data->{max_mention_count}) { $data->{max_mention_count} = $tweeter->{mentions}->{$tweetee}; } if (!defined $data->{tweeters}->{$tweetee}) { $data->{tweeters}->{$tweetee} = { 'handle' => $tweetee, 'tweet_count' => 0, 'mention_count' => 0, } } $data->{tweeters}->{$tweetee}->{mention_count}++; } } sub tweet_to_tweeter_data { my ($tweet, $tweeter) = @_; $tweeter = {} unless $tweeter; $tweeter->{handle} = $tweet->value('from_user'); $tweeter->{profile_image_url} = $tweet->value('profile_image_url'); $tweeter->{mention_count} = 0 unless $tweeter->{mention_count}; $tweeter->{tweet_count} = 0 unless $tweeter->{tweet_count}; return $tweeter; } sub graphML_head { return < END } sub graphML_tail { return < TAIL } 1;