#!/usr/bin/perl -w -I/opt/eprints3/perl_lib

use strict;
use EPrints;

# Set STDOUT to auto flush (without needing a \n)
$|=1;

my $repoid = shift @ARGV;
unless( defined $repoid )
{
	print STDERR "\nNeed repository_id\n\n";
	exit(1);
}

my $session = new EPrints::Session( 1, $repoid );
exit( 1 ) unless( defined $session );

# to be safe... :)
my $ok = EPrints::Utils::get_input( '^(yes|no)$', "Never run this script on a live repository as this will create LOADS of fake access data that cannot be removed. Continue?", "yes" );

unless( $ok eq "yes" )
{
	$session->terminate;
	exit;
}


# some real and a fake user agent strings:
my $UA = [
	'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322)',
	'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; InfoPath.1)',
	'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en) AppleWebKit/419.2.1 (KHTML like Gecko) Safari/419.3',
	'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.12) Gecko/20080225 Ubuntu/8.04 (hardy) Firefox/2.0.0.12',
	'msnbot/1.1 (+http://search.msn.com/msnbot.htm)',
	'Opera/9.51 (Windows NT 6.0; U; en)',
	'Googlebot-Image/1.0',
	'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
	'FakeUserAgentString',
];


# real and fake referrers
my $REF = [
	'http://www.google.com/search?hl=en&q=test+data+stuff+word&start=10&sa=N',
	'http://www.google.fr/search?client=firefox-a&rls=org.mozilla%3Afr-FR%3Aofficial&channel=s&hl=fr&q=sometopic&meta=&btnG=Recherche+Google',
	'http://www.google.pl/search?hl=pl&q=rzecz&btnG=Szukaj+w+Google&lr=&aq=f&oq=',
	'http://hk.search.yahoo.com/search/combo?p=conics+equation&rd=r1&fr=FP-tab-web-t&fr2=sb-top&xargs=0&pstart=1&b=11',
	'http://search.yahoo.co.jp/search?p=Southampton&ei=UTF-8&fr=top_ga1&x=wrt',
	'http://www.bing.com/search?q=university+of+southampton+research+proposal&form=QBRE&filt=all&qs=n&adlt=strict',
	'http://www.bing.com/search?q=vector+equations+pdf&FORM=MSNH90&mkt=en-gb',
	'http://some.host.com/blaf/search?z=something',
];

# adding some internal search:
my $base_url = $session->get_repository->get_conf( "base_url" );
my $int_search_1 = "$base_url/cgi/search/simple?_action_search=Search&q=test+data+stat&_order=bytitle&basic_srchtype=ALL&_satisfyall=ALL&_default_action=search";
push @$REF, $int_search_1;
my $int_search_2 = "$base_url/cgi/search/advanced?_action_search=Search&q=test+stat&_order=bytitle&basic_srchtype=ALL&_satisfyall=ALL&_default_action=search";
push @$REF, $int_search_2;


# how many access record to create:
my $N = 20000;

# need to know valid eprintid and docid ?

# load all known eprintids in the archive
my $searchexp = new EPrints::Search(
                session=>$session,
                dataset=>$session->get_repository->get_dataset( "archive" ),
                allow_blank => 1 
);

my $eplist = $searchexp->perform_search;

my $EPRINT_IDS;

if( $eplist->count )
{
	$EPRINT_IDS = $eplist->get_ids;
}
else
{
	print STDERR "\nDidnt find any eprint ids, will use epid=1,2,3,4,5 for all stats";
	push @$EPRINT_IDS, "1";
	push @$EPRINT_IDS, "2";
	push @$EPRINT_IDS, "3";
	push @$EPRINT_IDS, "4";
	push @$EPRINT_IDS, "5";
}
my $EP_COUNT = scalar( @$EPRINT_IDS );

$eplist->dispose;

srand;

my $now = time;

# starting about 5 years ago:
my $start_date_offset = 3600*24*365*5;
my $ctime = time - $start_date_offset;

my $time_offset = $start_date_offset / $N;

my $c = 0;
while(1)
{
	last if( $c++ >= $N );

	$ctime += $time_offset;

	my $epid = $$EPRINT_IDS[int( rand $EP_COUNT )];
	my $docid = int(rand 10) > 5 ? undef : 0;

	my $access = {};

	$access->{datestamp} = EPrints::Time::get_iso_timestamp( int($ctime) );
	$access->{requester_id} = &_generate_ip();
	$access->{referent_id} = $epid;
	$access->{referent_docid} = $docid if( defined $docid);
	$access->{referring_entity_id} = &_generate_ref();	#$r->headers_in->{ "Referer" };
	$access->{service_type_id} = defined $docid ? "?fulltext=yes" : "?abstract=yes";
	$access->{requester_user_agent} = &_generate_user_agent();			#$r->headers_in->{ "User-Agent" };

	#print STDERR "\nCreating record:".join(",",keys %$access)." \nvalues: ".join(",",values %$access);

	$session->get_repository->get_dataset( "access" )->create_object( $session, $access );
}

$session->terminate;
exit;

sub _generate_ip
{
	my $ip = "";
	for(1..4)
	{
		$ip .= (int(rand 250) + 1).".";
	}
	chop($ip);	
	return $ip;
}

sub _generate_ref
{
	return "" if( int( rand 10 ) > 5 );
	return $$REF[int rand scalar(@$REF)];
}

sub _generate_user_agent
{
	return $$UA[int rand scalar(@$UA)];
}



