package EPrints::Plugin::Event::UpdateTweetStreams;

use EPrints::Plugin::Event;
@ISA = qw( EPrints::Plugin::Event );

use strict;

use URI;
use LWP::UserAgent;
use JSON;
use Encode qw(encode);


sub action_update_tweetstreams
{
	my ($self) = @_;

	my $FEEDS_IN_PARALLEL = 3;
	my $api_url = 'http://search.twitter.com/search.json';

	my $active_tweetstreams = $self->active_tweetstreams;
	my $queue_items = {};
	$active_tweetstreams->map( \&EPrints::Plugin::Event::UpdateTweetStreams::create_queue_item, $queue_items);

	my @queue = values %{$queue_items};

	my $ua = LWP::UserAgent->new;
	my $nosort = 0;

	ITEM: while ( scalar @queue ) #future development -- test API limits too
	{
		#prioritise by date, but have some parallelisation
		#nosort flag counts down from FEEDS_IN_PARALLEL
		if (!$nosort)
		{
			@queue = $self->order_queue(@queue);
			$nosort = $FEEDS_IN_PARALLEL + 1;
		}
		$nosort--;

		#remove item from the front of the queue
		my $current_item = shift @queue;

		#query Twitter API
		my $url = URI->new( $api_url );
		$url->query_form( %{$current_item->{search_params}} );
		my $response = $ua->get($url);

		my $json_tweets;
		if ($response->is_success)
		{
			$json_tweets = encode('utf-8',$response->decoded_content);
		}
		else
		{
			#handle failure
			my $code = $response->code;
			if ($code == 403) #forbidden -- probably because we've gone back too many pages on this item
			{
				#We've got all we can.  Move onto the next and let this one fall off of the queue
				next ITEM;
			}

			#otherwise, assume we've gone over the API limit, and halt *all* requests
			print STDERR 'Got failure status, assuming API limit reached: ',$response->status_line, "\n";
			last ITEM;
		}

		#convert JSON to perl structure
		my $json = JSON->new->allow_nonref;
		my $tweets = eval { $json->utf8()->decode($json_tweets); };
		if ($@)
		{
			print STDERR "Couldn't decode json: $@\n";
			if ($current_item->{retries})
			{
				#requeue X times (where X is the number of retries)
				$current_item->{retries}--;
				push @queue, $current_item
			}
			#else let this one fall off the end of the queue
		}

		next ITEM unless scalar @{$tweets->{results}}; #if an empty page of results, assume no more tweets

print STDERR ';';
		my $first = 1;
		my $update_finished;
		#create a tweet dataobj for each tweet and store the objid in the queue item
		TWEET_IN_UPDATE: foreach my $tweet (@{$tweets->{results}})
		{
print STDERR '.';
			$update_finished = 0;	
			if (!$current_item->{search_params}->{max_id})
			{
				$current_item->{search_params}->{max_id} = $tweet->{id}; #highest ID, for consistant paging
			}
			$current_item->{orderval} = $tweet->{id}; #lowest processed so far, for queue ordering

			#check to see if we already have a tweet with this twitter id in this repository
			my $tweetobj = EPrints::DataObj::Tweet::tweet_with_twitterid($self->repository,$tweet->{id});
			if (!defined $tweetobj)
			{
				$tweetobj = EPrints::DataObj::Tweet->create_from_data(
					$self->repository,
					{
						twitterid => $tweet->{id},
						json_source => $tweet,
						tweetstreams => $current_item->{tweetstreamids},
					} 
				);
			}
			else
			{
print STDERR "Tweet exists in multiple streams\n";
print STDERR $tweet->{id}, '-', $tweetobj->id, "\n";
print STDERR "Currently: " . join(',',@{$tweetobj->value('tweetstreams')}), "\n";
print STDERR "Want to add: " . join(',',@{$current_item->{tweetstreamids}}), "\n";

				$tweetobj->add_to_tweetstreamid($current_item->{tweetstreamids});
print STDERR "Now: " . join(',',@{$tweetobj->value('tweetstreams')}), "\n";
			}
			#only the first in the update doesn't have a following tweet
			if (!$first)
			{
				$tweetobj->set_next_in_tweetstream($current_item->{tweetstreamids});
			}
			$tweetobj->commit;

			if ($tweet->{id} <= $current_item->{since_twitterid}) #the one we're considering is the same or younger than the oldest in our stream
			{
				$update_finished = 1;
				last TWEET_IN_UPDATE;
			}
			$first = 0;
		}

		#request the next page of results (unless we've reached a previously seen item)
		if ($current_item->{search_params}->{page})
		{
			$current_item->{search_params}->{page}++;
		}
		else
		{
			$current_item->{search_params}->{page} = 2;
		}
		push @queue, $current_item unless $update_finished;
		
	}

	#the creation of the tweets does not modify the tweetstreams, but they need committing to update the digests.
	$active_tweetstreams->map( sub
	{
		my ($repo, $ds, $tweetstream) = @_;
		$tweetstream->set_value('newborn', 'FALSE'); #tweetstream has had at least one update
		$tweetstream->commit;
	});

}

sub order_queue
{
	my ($self, @queue) = @_;

	return sort { ( $a->{orderval} ? $b->{orderval} : -1 ) <=> ( $b->{orderval} ? $a->{orderval} : -1) } @queue; #if there's no orderval, sort highest (i.e. prioritise new streams)
}

sub create_queue_item
{
	my ($repo, $ds, $tweetstream, $queue_items) = @_;

	my $search_string = $tweetstream->get_value('search_string');
	my $highest_id = $tweetstream->highest_twitterid;
	$highest_id = 0 unless $highest_id;

	if ($queue_items->{$search_string})
	{
		push @{$queue_items->{$search_string}->{tweetstreamids}}, $tweetstream->id;
		if ($highest_id < $queue_items->{$search_string}->{since_twitterid})
		{
			$queue_items->{$search_string}->{since_twitterid} = $highest_id;
			$queue_items->{$search_string}->{orderval} = $highest_id;
		}
	}
	else
	{
		$queue_items->{$search_string} = {
			search_params => {
				q => $search_string,
				rpp => 100,
	#			max_id => set to first ID we get -- used for accurate paging
	#			page => set to current page + 1 when this item is requeued
			},
			tweetstreamids => [ $tweetstream->id ], #for when two streams have identical search strings
			retries => 5, #if there's a failure, we'll try again.
			since_twitterid => $highest_id,
			orderval => $highest_id,
		};
	}
}


sub active_tweetstreams
{
	my ($self) = @_;

	my $ds = $self->repository->get_dataset( "tweetstream" );

	my $searchexp = EPrints::Search->new(
			session => $self->repository,
			dataset => $ds,
			);
	my $today = EPrints::Time::get_iso_date( time );
	$searchexp->add_field(
			$ds->get_field( "expiry_date" ),
			$today."-" );

	return $searchexp->perform_search;
}


1;