package EPrints::Plugin::Export::WordleLink;
use EPrints::Plugin::Export;
@ISA = ( "EPrints::Plugin::Export" );
use strict;
sub new
{
my( $class, %opts ) = @_;
my( $self ) = $class->SUPER::new( %opts );
$self->{name} = "Wordle Link";
$self->{accept} = [ 'list/eprint', 'dataobj/eprint' ];
$self->{visible} = "all";
$self->{mimetype} = 'text/html; charset=utf-8';
$self->{extension} = '.html';
$self->{wordle_data} = {};
$self->{wordle_configs} = [
{
id => 'title',
aggregate => 'words',
fields => ['title'],
force_lower => 1,
title => 'Titles (word frequency)'
},
{
id => 'titlew',
aggregate => 'concatenate',
fields => ['title'],
force_lower => 1,
title => 'Titles (all text)'
},
{
id => 'people',
aggregate => 'values',
fields => ['creators_name', 'editors_name', 'contributors_name'],
force_lower => 0,
limit => 5000,
title => 'People'
},
{
id => 'abstract',
aggregate => 'words',
fields => ['abstract'],
force_lower => 1,
limit => 5000,
title => 'Abstracts (word frequency)'
},
];
return $self;
}
sub generate_html_output
{
my ($self) = @_;
my $html = '
Wordle Links
Generate a Wordle
Please submit one of the textboxes below to wordle. Feel free to tweak the data if you want to merge synonyms or remove stop words.
';
my @boxes;
foreach my $wordle (@{$self->{wordle_configs}})
{
push @boxes, $self->generate_wordle_input($wordle);
}
my $i = 0;
while ($boxes[$i])
{
$html .= ''. $boxes[$i] . ' | ';
if ($boxes[$i+1])
{
$html .= ''. $boxes[$i+1] . ' | ';
}
$html .= '
';
$i+=2;
}
$html .= '
';
return $html;
}
sub generate_wordle_input
{
my ($self, $wordle) = @_;
my $html;
my $title = $wordle->{title};
$html .= "$title
";
$html .= '';
return $html;
}
sub generate_wordle_input_text
{
my ($self, $wordle) = @_;
my $agg = $wordle->{aggregate};
my $wordleid = $wordle->{id};
if ($agg eq 'concatenate')
{
return $self->{wordle_data}->{$wordleid};
}
elsif ($agg eq 'values')
{
my $r;
my $vals = $self->{wordle_data}->{$wordleid};
my $i = 0;
foreach my $v (sort {$vals->{$b} <=> $vals->{$a}} keys %{$vals})
{
$r .= $v . ':' . $vals->{$v} . "\n";
if ($wordle->{limit})
{
$i++;
last if $i >= $wordle->{limit};
}
}
return $r;
}
elsif ($agg eq 'words')
{
my $r;
my $wordsets = $self->{wordle_data}->{$wordleid};
my $i = 0;
foreach my $set_id (sort {
$wordsets->{$b}->{'total count'} <=> $wordsets->{$a}->{'total count'}
} keys %{$wordsets})
{
$r .= $self->wordset_to_wordle_val($wordsets->{$set_id}) . "\n";
if ($wordle->{limit})
{
$i++;
last if $i >= $wordle->{limit};
}
}
return $r;
}
return ''; #should never happen
}
sub wordset_to_wordle_val
{
my ($self, $wordset) = @_;
# wordset looks like this:
# {
# 'total count' => 53,
# 'MIT' => 20,
# 'M.I.T' => 23,
# 'mit.' => 5,
# 'mit,' => 5,
# }
#find most common instance
my $word_actual; my $max = 0;
foreach my $word (keys %{$wordset})
{
next if $word eq 'total count';
if ($wordset->{$word} > $max)
{
$word_actual = $word;
$max = $wordset->{$word};
}
}
return "$word_actual:" . $wordset->{'total count'};
}
sub generate_eprint_data
{
my ($self, $eprint) = @_;
foreach my $wordle (@{$self->{wordle_configs}})
{
foreach my $fieldid (@{$wordle->{fields}})
{
$self->generate_field_data($eprint, $fieldid, $wordle);
}
}
}
sub generate_field_data
{
my ($self, $eprint, $fieldid, $wordle) = @_;
return unless $eprint->exists_and_set($fieldid);
if ($wordle->{aggregate} eq 'values')
{
my $vals = $self->get_field_values($eprint, $fieldid);
foreach my $v (@{$vals})
{
$v = lc($v) if $wordle->{force_lower};
$self->{wordle_data}->{$wordle->{id}}->{$v}++;
}
}
elsif ($wordle->{aggregate} eq 'concatenate')
{
my $html = $eprint->render_value($fieldid);
my $text = EPrints::Utils::tree_to_utf8($html);
$text = lc($text) if $wordle->{force_lower};
$self->{wordle_data}->{$wordle->{id}} .= ' ' . $text;
}
elsif ($wordle->{aggregate} eq 'words')
{
my $html = $eprint->render_value($fieldid);
my $text = EPrints::Utils::tree_to_utf8($html);
$text = lc($text) if $wordle->{force_lower};
my @words = split(/\s+/, $text);
foreach my $word (@words)
{
next unless $word =~ m/\w/;
my $cv = $self->generate_compareval($word);
$self->{wordle_data}->{$wordle->{id}}->{$cv}->{$word}++;
$self->{wordle_data}->{$wordle->{id}}->{$cv}->{'total count'}++; #cheeky, but there's a space so there won't be a collision
}
}
}
sub generate_compareval
{
my ($self, $word) = @_;
my $val = $word;
$val = lc($val);
$val =~ s/[^\w]//g;
return $val;
}
sub get_field_values
{
my ($self, $eprint, $fieldid) = @_;
my $r = []; #put the values in here.
my $f = $eprint->dataset->field($fieldid);
$f = $f->clone;
if ($f->property('multiple'))
{
$f->set_property('multiple',0);
my $vals = $eprint->value($fieldid);
foreach my $val (@{$vals})
{
my $html = $f->render_value($self->repository,$val);
push @{$r}, EPrints::Utils::tree_to_utf8($html);
}
}
else
{
my $html = $eprint->render_value($fieldid);
push @{$r}, EPrints::Utils::tree_to_utf8($html);
}
return $r;
}
sub output_list
{
my( $self, %opts ) = @_;
$opts{list}->map( sub {
my( $session, $dataset, $item ) = @_;
$self->generate_eprint_data( $item, %opts );
} );
my $html = $self->generate_html_output;
if( defined $opts{fh} )
{
print {$opts{fh}} $html;
return;
}
return $html;
}
sub output_dataobj
{
my( $self, $dataobj, %opts ) = @_;
$self->generate_eprint_data($dataobj);
my $html = $self->generate_html_output;
if( defined $opts{fh} )
{
print {$opts{fh}} $html;
return;
}
return $html;
}
1;