package EPrints::Plugin::Export::WordleLink; use EPrints::Plugin::Export; @ISA = ( "EPrints::Plugin::Export" ); use strict; sub new { my( $class, %opts ) = @_; my( $self ) = $class->SUPER::new( %opts ); $self->{name} = "Wordle Link"; $self->{accept} = [ 'list/eprint', 'dataobj/eprint' ]; $self->{visible} = "all"; $self->{mimetype} = 'text/html; charset=utf-8'; $self->{extension} = '.html'; $self->{wordle_data} = {}; $self->{wordle_configs} = [ { id => 'title', aggregate => 'words', fields => ['title'], force_lower => 1, title => 'Titles (word frequency)' }, { id => 'titlew', aggregate => 'concatenate', fields => ['title'], force_lower => 1, title => 'Titles (all text)' }, { id => 'people', aggregate => 'values', fields => ['creators_name', 'editors_name', 'contributors_name'], force_lower => 0, limit => 5000, title => 'People' }, { id => 'abstract', aggregate => 'words', fields => ['abstract'], force_lower => 1, limit => 5000, title => 'Abstracts (word frequency)' }, ]; return $self; } sub generate_html_output { my ($self) = @_; my $html = ' Wordle Links

Generate a Wordle

Please submit one of the textboxes below to wordle. Feel free to tweak the data if you want to merge synonyms or remove stop words.

'; my @boxes; foreach my $wordle (@{$self->{wordle_configs}}) { push @boxes, $self->generate_wordle_input($wordle); } my $i = 0; while ($boxes[$i]) { $html .= ''; if ($boxes[$i+1]) { $html .= ''; } $html .= ''; $i+=2; } $html .= '
'. $boxes[$i] . ''. $boxes[$i+1] . '
'; return $html; } sub generate_wordle_input { my ($self, $wordle) = @_; my $html; my $title = $wordle->{title}; $html .= "

$title

"; $html .= '
'; if ($wordle->{aggregate} eq 'concatenate') { $html .= 'Wordle will automatically remove stop words.'; $html .= '
'; return $html; } sub generate_wordle_input_text { my ($self, $wordle) = @_; my $agg = $wordle->{aggregate}; my $wordleid = $wordle->{id}; if ($agg eq 'concatenate') { return $self->{wordle_data}->{$wordleid}; } elsif ($agg eq 'values') { my $r; my $vals = $self->{wordle_data}->{$wordleid}; my $i = 0; foreach my $v (sort {$vals->{$b} <=> $vals->{$a}} keys %{$vals}) { $r .= $v . ':' . $vals->{$v} . "\n"; if ($wordle->{limit}) { $i++; last if $i >= $wordle->{limit}; } } return $r; } elsif ($agg eq 'words') { my $r; my $wordsets = $self->{wordle_data}->{$wordleid}; my $i = 0; foreach my $set_id (sort { $wordsets->{$b}->{'total count'} <=> $wordsets->{$a}->{'total count'} } keys %{$wordsets}) { $r .= $self->wordset_to_wordle_val($wordsets->{$set_id}) . "\n"; if ($wordle->{limit}) { $i++; last if $i >= $wordle->{limit}; } } return $r; } return ''; #should never happen } sub wordset_to_wordle_val { my ($self, $wordset) = @_; # wordset looks like this: # { # 'total count' => 53, # 'MIT' => 20, # 'M.I.T' => 23, # 'mit.' => 5, # 'mit,' => 5, # } #find most common instance my $word_actual; my $max = 0; foreach my $word (keys %{$wordset}) { next if $word eq 'total count'; if ($wordset->{$word} > $max) { $word_actual = $word; $max = $wordset->{$word}; } } return "$word_actual:" . $wordset->{'total count'}; } sub generate_eprint_data { my ($self, $eprint) = @_; foreach my $wordle (@{$self->{wordle_configs}}) { foreach my $fieldid (@{$wordle->{fields}}) { $self->generate_field_data($eprint, $fieldid, $wordle); } } } sub generate_field_data { my ($self, $eprint, $fieldid, $wordle) = @_; return unless $eprint->exists_and_set($fieldid); if ($wordle->{aggregate} eq 'values') { my $vals = $self->get_field_values($eprint, $fieldid); foreach my $v (@{$vals}) { $v = lc($v) if $wordle->{force_lower}; $self->{wordle_data}->{$wordle->{id}}->{$v}++; } } elsif ($wordle->{aggregate} eq 'concatenate') { my $html = $eprint->render_value($fieldid); my $text = EPrints::Utils::tree_to_utf8($html); $text = lc($text) if $wordle->{force_lower}; $self->{wordle_data}->{$wordle->{id}} .= ' ' . $text; } elsif ($wordle->{aggregate} eq 'words') { my $html = $eprint->render_value($fieldid); my $text = EPrints::Utils::tree_to_utf8($html); $text = lc($text) if $wordle->{force_lower}; my @words = split(/\s+/, $text); foreach my $word (@words) { next unless $word =~ m/\w/; my $cv = $self->generate_compareval($word); $self->{wordle_data}->{$wordle->{id}}->{$cv}->{$word}++; $self->{wordle_data}->{$wordle->{id}}->{$cv}->{'total count'}++; #cheeky, but there's a space so there won't be a collision } } } sub generate_compareval { my ($self, $word) = @_; my $val = $word; $val = lc($val); $val =~ s/[^\w]//g; return $val; } sub get_field_values { my ($self, $eprint, $fieldid) = @_; my $r = []; #put the values in here. my $f = $eprint->dataset->field($fieldid); $f = $f->clone; if ($f->property('multiple')) { $f->set_property('multiple',0); my $vals = $eprint->value($fieldid); foreach my $val (@{$vals}) { my $html = $f->render_value($self->repository,$val); push @{$r}, EPrints::Utils::tree_to_utf8($html); } } else { my $html = $eprint->render_value($fieldid); push @{$r}, EPrints::Utils::tree_to_utf8($html); } return $r; } sub output_list { my( $self, %opts ) = @_; $opts{list}->map( sub { my( $session, $dataset, $item ) = @_; $self->generate_eprint_data( $item, %opts ); } ); my $html = $self->generate_html_output; if( defined $opts{fh} ) { print {$opts{fh}} $html; return; } return $html; } sub output_dataobj { my( $self, $dataobj, %opts ) = @_; $self->generate_eprint_data($dataobj); my $html = $self->generate_html_output; if( defined $opts{fh} ) { print {$opts{fh}} $html; return; } return $html; } 1;