package EPrints::Plugin::Event::UpdateTweetStreams;

use EPrints::Plugin::Event::LockingEvent;
@ISA = qw( EPrints::Plugin::Event::LockingEvent );

use strict;

use URI;
use LWP::UserAgent;
use JSON;
use Encode qw(encode);
use Net::Twitter::Lite::WithAPIv1_1;

my $HTTP_RETRIES = 5; #for network errors
my $QUERY_RETRIES = 5; #for API errors
my $QUERIES_BEFORE_RATE_CHECK = 100; #because one query may use more than one of the quota if it's complex

sub action_update_tweetstreams
{
	my ($self, $verbose) = @_;

	$self->{verbose} = 1 if $verbose;

	if ($self->is_locked)
	{
		$self->repository->log( (ref $self) . " is locked.  Unable to run.");
		return;
	}
	$self->create_lock;

	$self->{log_data}->{start_time} = scalar localtime time;

	$self->wait;

	my $nt = $self->connect_to_twitter;
	if (!$nt)
	{
		$self->repository->log( (ref $self) . " was unable to connect to twitter.");
		return;
	}

	$self->output_status('Connected to Twitter');

	my $limit = get_search_rate_limit($nt); 

	$self->output_status("Initial Rate Limit: $limit");

	my $active_tweetstreams = $self->active_tweetstreams;
	my $queue_items = {};
	$active_tweetstreams->map( \&EPrints::Plugin::Event::UpdateTweetStreams::create_queue_item, $queue_items);
	my @queue = values %{$queue_items};

	$self->output_status('Queue has ' . scalar @queue . ' items');

	QUERYSET: while ($limit > 0)
	{
		my $n = $QUERIES_BEFORE_RATE_CHECK;
		$n = $limit if $limit < $n;
		QUERY: for (1..$n)
		{
			if (scalar @queue ==0)
			{
				$self->output_status('Update queue emptied');
				$self->{log_data}->{end_state} = 'Update queue emptied';
				last QUERYSET;
			}

			my $current_item = shift @queue;
			my $results = undef;
			my $results_flag = 0;
			my $err = undef;
			my $end_state = undef;

			RETRY: foreach my $retry (1..$HTTP_RETRIES)
			{
				$self->output_status('Attempting to query: ' . $current_item->{search_params}->{q});
				if (!$nt->authorized)
				{
					$self->output_status('Oops, not authorise.  Reconnecting....');
					sleep 10;
					$nt = $self->connect_to_twitter;
					next RETRY; #try again
				}

				eval {
					$results = $nt->search($current_item->{search_params});
				};

				#if we have an error, sleep and then try again, otherwise exit the retry loop.
				#note that this approach only records the final error -- oh well.
				if ( $err = $@ ) {
					#handle response codes -- see https://dev.twitter.com/docs/error-codes-responses
					if (ref $err and $err->isa('Net::Twitter::Error'))
					{
						if ($err->code == 403) #no more data for this stream -- we've gone back as far as we can
						{
							$self->output_status('Err 403: No more results for this search');
							$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{end_state} = 'No More Results (403, went back as far as possible)';
							last RETRY;
						}
						elsif ($err->code == 429) #rate limit reached -- stop all requests
						{
							$self->output_status('ERR 429: API limit exceeded');
							$limit = -1; #we've gone over the limit
							last RETRY;
						}
					}

					$self->output_status('Uncategorised error, retrying...');
					sleep 10;
					next RETRY;
				}
				else
				{
					$self->output_status('Results successfully retrieved');
					$results_flag = 1;
					last RETRY; #we have our results
				}
			}

			#process results and put the current item at the end of the queue (if appropriate)
			if ($results_flag)
			{
				my $results_count = scalar @{$results->{statuses}};
				#no errors, process the results
				if ($results_count < 1)#if an empty page of results, assume no more tweets
				{
					$self->output_status('Empty Results Set');
					$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{end_state} = 'Update Completed \o/';
				}
				else
				{
					$self->output_status('Retrieved ', scalar @{$results->{statuses}}, ' statuses');
					$self->process_results($current_item, $results);

					$self->output_status('Tweets created');

					#if less than a page of data, assume we've reached the end of the results
					if ($results_count < $current_item->{search_params}->{count})
					{
						$self->output_status('Less than ' . $current_item->{search_params}->{count} . ' results -- not requeueing');
						$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{end_state} = 'Update Completed (Hooray)';
					}
					else
					{ 
						$self->output_status('Full results set -- requeueing');
						#requeue the current item

						###############
						#REQUEUE ITEM##
						###############
						push @queue, $current_item;
					}
				}
			}
			else
			{
				#we tried N times, and failed -- do not re-queue the current item
				if (ref($err) and $err->isa('Net::Twitter::Error'))
				{

					$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{end_state} =
						"BAD RESPONSE FROM TWITTER:\n" .
						"\tHTTP Response Code: " . $err->code . "\n" .
						"\tHTTP Message......: " . $err->message . "\n" .
						"\tTwitter error.....: " . $err->error . "\n";
				}
				else
				{
					$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{end_state} = 'Unexpected Error: ' . $@;
					#do not re-queue the current item
				}
				$self->output_status($self->{log_data}->{tweetstreams}->{$current_item->{id}}->{end_state});
			}

		}
		#update the limit, just in case.
		$limit = get_search_rate_limit($nt);
		$self->output_status("Updated limit.  It's now $limit");
	}

	$self->output_status("Updating ended");
	$self->{log_data}->{end_time} = scalar localtime time;
	$self->{log_data}->{api_rate_limit} = $limit;
	$self->write_log;
	$self->remove_lock;
}

sub process_results
{
	my ($self, $current_item, $results) = @_;

	my $repo = $self->repository;
	my $tweetstream_ds = $repo->dataset('tweetstream');

	my $tweet_dataobjs = [];

	#create a tweet dataobj for each tweet and store the objid in the queue item
	TWEET_IN_UPDATE: foreach my $tweet (@{$results->{statuses}})
	{
		$self->{log_data}->{tweets_processed}++; #global count
		$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{earliest_seen} = $tweet->{created_at}; #keep updating this as we walk backwards, though it

		#only need to set these once
		if (!$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{latest_seen})
		{
			#search results go backwards, so the first result returned will be the latest one
			$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{latest_seen} = $tweet->{created_at};
			$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{search_string} = $current_item->{search_params}->{q};
		}

		#keep track of the lowest twitterid we've seen for paging
		if (
			!$current_item->{search_params}->{max_id}
			|| $tweet->{id} < $current_item->{search_params}->{max_id}
		)
		{
			$current_item->{search_params}->{max_id} = $tweet->{id};
		}

		#check to see if we already have a tweet with this twitter id in this repository
		my $tweetobj = EPrints::DataObj::Tweet::tweet_with_twitterid($repo, $tweet->{id});
		if (!defined $tweetobj)
		{
			$self->output_status('Creating Tweet Object');
			$tweetobj = EPrints::DataObj::Tweet->create_from_data(
				$self->repository,
				{
					twitterid => $tweet->{id},
					json_source => $tweet,
#this is now handled by a call to $tweetstream->add_tweets
#					tweetstreams => $current_item->{tweetstreamids},
				} 
			);
			$tweetobj->commit; #will enrich the tweet

			$self->{log_data}->{tweets_created}++; #global_count
			$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{tweets_created}++;
		}
		else
		{
			$self->{log_data}->{tweetstreams}->{$current_item->{id}}->{tweets_added}++;
			$self->output_status('Tweet Object Exists');
		}

		#safe to do because we're updating in pages of 100 -- we won't run out of memory
		push @{$tweet_dataobjs}, $tweetobj;
	}

	$self->output_status('Created all tweet objects');

	#set max_id for paging
	$current_item->{search_params}->{max_id}--; #set it to one lower to an ID we have previously seen for paging

	foreach my $tweetstreamid (@{$current_item->{tweetstreamids}})
	{
		my $tweetstream = $tweetstream_ds->dataobj($tweetstreamid);
		die ("UNEXPECTED CRITICAL ERROR: couldn't create tweetstream $tweetstreamid") unless $tweetstream;

		$tweetstream->add_tweets($tweet_dataobjs);
	}
}

sub connect_to_twitter
{
	my ($self) = @_;

	my %nt_args = ( traits => [qw/API::RESTv1_1/] );
	foreach (qw( consumer_key consumer_secret access_token access_token_secret ))
	{
		$nt_args{$_} = $self->repository->config('twitter_oauth',$_);
	}

	my $nt = Net::Twitter::Lite::WithAPIv1_1->new( %nt_args );

#handle this error properly;
	if (!$nt->authorized)
	{
		$self->repository->log( (ref $self) . " Net::Twitter Oauth issue\n");
		return undef;
	}
	return $nt;
}


#return a value or an empty string
sub v
{
	my ($val, $default) = @_;
	return $val if defined $val;
	return $default if defined $default;
	return '';
}

sub generate_log_string
{
	my ($self) = @_;
	my $l = $self->{log_data};

	my @r;

	push @r, "Update started at: " . v($l->{start_time});
	push @r, "Update finished at: " . v($l->{end_time});
	push @r, "API Queries left: " . v($l->{api_rate_limit});
	push @r, v($l->{tweets_processed}, 0) . " tweets processed";
	push @r, v($l->{tweets_created}, 0) . " tweets created";
	push @r, (scalar keys %{$l->{tweetstreams}}, 0) . " tweetstreams updated:";

	foreach my $ts_id (sort keys %{$l->{tweetstreams}})
	{
		my $ts = $l->{tweetstreams}->{$ts_id};

		my $new = v($ts->{tweets_created},0);
		my $added = v($ts->{tweets_added},0);
		my $end = v($ts->{end_state},'Unknown Endstate');
		my $earliest = v($ts->{earliest_seen},'unknown');
		my $latest = v($ts->{latest_seen},'unknown');

		push @r, "\t$ts_id: " . v($ts->{search_string},'undef') ;
		push @r, "\t\t$new created";
		push @r, "\t\t$added existing tweets added (stream overlap or page shifting)";
		push @r, "\t\tFrom: $earliest";
		push @r, "\t\tTo:   $latest";
		push @r, "\t\tCompleted with status: $end";
	}

	my $end = v($l->{end_state},'No Known Errors');
	push @r, "Complete with status: " . $end;

	return join("\n",@r);
}

sub create_queue_item
{
	my ($repo, $ds, $tweetstream, $queue_items) = @_;

	return unless $tweetstream->is_set('search_string');
	return if (
		$tweetstream->is_set('status')
		&& $tweetstream->value('status') eq 'archived'
	);  #should never be true, but let's be explicit.

	my $search_string = $tweetstream->value('search_string');

	my $geocode = '';
	$geocode = $tweetstream->value('geocode') if $tweetstream->is_set('geocode');

	my $key = $search_string . 'XXXXXXX' . $geocode;

	if ($queue_items->{$key})
	{
		push @{$queue_items->{$key}->{tweetstreamids}}, $tweetstream->id;
		$queue_items->{$key}->{id} = join(',',sort(@{$queue_items->{$key}->{tweetstreamids}}));
	}
	else
	{
		$queue_items->{$key} = {
			id => $tweetstream->id, #id for logging
			search_params => {
				q => $search_string,
				count => 100,
				include_entities => 1,
	#			max_id => Will be set to the lowest id we find for the purposes of paging
				since_id => $tweetstream->highest_twitterid, 
			},
			tweetstreamids => [ $tweetstream->id ], #for when two streams have identical search strings
			retries => $QUERY_RETRIES, #if there's a failure, we'll try again.
		};
		#optional param
		$queue_items->{$key}->{search_params}->{geocode} = $geocode if $geocode;
	}
}


sub active_tweetstreams
{
	my ($self) = @_;

	my $ds = $self->repository->get_dataset( "tweetstream" );

	my $searchexp = EPrints::Search->new(
			session => $self->repository,
			dataset => $ds,
			);
	my $today = EPrints::Time::get_iso_date( time );
	$searchexp->add_field(
			$ds->get_field( "expiry_date" ),
			$today."-" );
#	$searchexp->add_field(
#			$ds->get_field( "status" ),
#			"active" );
	

	return $searchexp->perform_search;
}


sub get_search_rate_limit
{
	my ($nt) = @_;

	my $rl = $nt->rate_limit_status('search');

	foreach my $key (qw( resources search /search/tweets remaining ))
	{
		if (!exists $rl->{$key})
		{
			$rl = undef;
			return $rl;
		}
		$rl = $rl->{$key};
	}
	return $rl;
};


1;