#!/usr/bin/perl -w 
# -*-cperl-*-
## Filename: ucs-list-am
## Modified: Wed Aug  4 02:41:45 2004 (evert)   
##   Author: Stefan Evert
##  Purpose: list built-in association measures (with or without add-on packages)

use UCS;
use UCS::AM;
use UCS::Expression;

use Getopt::Long;

$Opt_Help = 0;					# --help
$Opt_Extra = "";				# --extra
$Opt_Package = "";				# --package
$Opt_Terse = 0;					# --terse
$Opt_Verbose = 0;				# --verbose
$Opt_Code = 0;					# --code
$Opt_List = 0;					# --list
$Opt_Frequencies = "";				# --frequencies

# parse command-line options
$ok = GetOptions(
		 "verbose|v" => \$Opt_Verbose,
		 "code|c" => \$Opt_Code,
		 "terse|t" => \$Opt_Terse,
		 "extra|x=s" => \$Opt_Extra,
		 "package|p=s" => \$Opt_Package,
		 "list|l" => \$Opt_List,
		 "frequencies|f=s" => \$Opt_Frequencies,
		 "help|h" => \$Opt_Help,
		 );

die "Usage:  ucs-list-am [-v | -c | -t | -f <f,f1,f2,N>]\n"
  . "                    [-x <package> | -p <package>] [<am1> <am2> ...]\n"
  . "        ucs-list-am --list\n"
  . "[type 'ucsdoc ucs-list-am' for more information]\n"
  unless $ok and (not $Opt_Help);

# sanity checks
die "Error: please specify only one of --verbose (-v), --code (-c), --terse (-t), or --frequencies (-f).\n"
  if ($Opt_Terse + $Opt_Verbose + $Opt_Code + ($Opt_Frequencies ? 1 : 0)) > 1;
die "Error: please use either --extra (-x) or --package (-p), not both.\n"
  if $Opt_Extra and $Opt_Package;

if ($Opt_Frequencies) {
  @fsig = split /,/, $Opt_Frequencies;
  die "Error: '$Opt_Frequencies' is not a valid frequency signature.\n"
    if @fsig != 4 or grep { not $_ > 0 } @fsig; 
##  %fsig = ( f => $fsig[0], f1 => $fsig[1], f2 => $fsig[2], N => $fsig[3] );
  @fsig{qw<f f1 f2 N>} = @fsig;			# store frequency signature in hash
}

# --list: just list available add-on packages
if ($Opt_List) {
  $package_dir = "$UCS::PerlDir/lib/UCS/AM";	# need a hack to list packages
  chdir $package_dir
    or die "Internal error: can't find add-on packages. Aborted.\n";
  @names = grep { s/\.pm$// } glob "*.pm";      # list Perl modules in package directory
  print "No add-on packages found. (??)\n"
    unless @names;
  foreach $name (@names) {
    if ($Opt_Terse) {
      print "$name\n";
    }
    else {
      printf "%-20s %s\n", $name, "[ use UCS::AM::$name; ]";
    }
  } 
  exit 0;
}

# load requested add-on packages
if ($Opt_Package) {				# -p -> load only specified extension package (not built-in measures)
  %UCS::AM = ();				# need to hack into internals to delete built-in AMs
  $Opt_Extra = $Opt_Package;			# this will load the requested package
}
if ($Opt_Extra) {				# -x -> load extension package(s) in addition to built-in measures
  @loaded = UCS::Load_AM_Package(split /,/, $Opt_Extra);
  die "Error: no match for requested AM package(s) '$Opt_Extra'.\n"
    unless @loaded;
}

# optional command-line arguments are AM names or patterns
if (@ARGV) {
  @keys = ();
  foreach $spec (@ARGV) {
    @matches = UCS::Match($spec, UCS::AM_Keys());
    print STDERR "Warning: no association measure found for '$spec' (ignored).\n"
      unless @matches;
    push @keys, @matches;
  }
  @keys = UCS::Unique(@keys);
}
else {
  @keys = UCS::AM_Keys();
}

# print list of (matching) association measures, optionally with additional information
print "\n"
  if $Opt_Verbose;
printf "f=%d, f1=%d, f2=%d, N=%d\n", @fsig{qw<f f1 f2 N>}
  if $Opt_Frequencies;
foreach $am (@keys) {
  if ($Opt_Verbose) {
    print "== $am (", UCS::AM_Name($am), ")\n";
    $desc = UCS::AM_Description($am);
    print "$desc\n"
      if $desc;
    print "\n";
  }
  elsif ($Opt_Code) {
    print "$am\n";
    $exp = UCS::AM_Expression($am);
    @param = $exp->params;
    if (@param) {
      print "Parameters:";
      foreach $p (@param) {
	print "  $p=", $exp->param($p);
      }
      print "\n";
    }
    print $exp->string, "\n\n";
  }
  elsif ($Opt_Frequencies) {
    $exp = UCS::AM_Expression($am);
    $score = $exp->eval(\%fsig);
    printf "%30s = %.8g\n", $am, $score;
  }
  else {
    print "$am\n";				# --terse: just the internal name of the AM
    print "\t", UCS::AM_Name($am), "\n"         # + long, descriptive name (one line) in normal mode
      unless $Opt_Terse;
  }
}


__END__

=head1 NAME

ucs-list-am - List built-in association measures and add-on packages


=head1 SYNOPSIS

  ucs-list-am [-v | -c | -t | -f <f,f1,f2,N>] 
              [-x <package> | -p <package>] [<am1> <am2> ...]

  ucs-list-am --list


=head1 DESCRIPTION

This program is a convenient front-end to the registry of association measures
maintained by the B<UCS> module.  It can be used to print a list of built-in
association measures, add-on packages, and display additional information
about the measures (where available).  Detailed information about the measures
can be found in the L<UCS::AM|UCS::AM> manpage and the respective manpages of
the extension packages.  See the L<ucsam|ucsam> manpage for an introduction
and overview.

  ucs-list-am --list 

With the C<--list> (or C<-l>) option, B<ucs-list-am> lists all available 
add-on packages.

  ucs-list-am [<options>] [<am1>, <am2>, ...]

When B<ucs-am-list> is called without arguments, it prints the names of all
built-in association measures on stdout, each one followed by a short one-line
description of the measure.  Specific association measures can be selected by
giving their names as command-line arguments.  UCS wildcard patterns (see the
L<ucsexp|ucsexp> manpage) will list all matching measures.

The C<--extra> (or C<-x>) option can be used to load one or more add-on
packages so that the association measures from these packages will be included
in the listing (in addition to the built-in measures).  Its argument is a
comma-separated list of package names, which are case-insensitive and may be
abbreviated to unique prefixes.  For instance, both
C<--extra=HTest,Parameteric> an C<-x htest,param> will load the
B<UCS::AM::HTest> and B<UCS::AM::Parametric> packages.  The special keyword
C<ALL> loads all available AM packages.

The C<--package> (or C<-p>) option is used to list the association measures
from a single package (I<without> the built-in measures).  Again, the package
name is case-insensitive and may be abbreviated to a unique prefix.  Note that
the C<--package> option cannot be used to load multiple packages.

The amount of information provided can be controlled with the C<--verbose> (or
C<-v>), C<--code> (or C<-c>), and C<--terse> (or C<-t>) options.  In
C<--terse> mode, only the names of packages are printed, so that the output
can be easily processed by other programs.  In C<--verbose> mode, the name of
each association measure is immediately followed by a one-line description (in
parentheses).  When available, one or more lines of additional comments will
also be shown.  In C<--code> mode, the output consists of the name of each
measure, followed by its implementation (as a UCS expression), followed by a
blank line.  For parameteric measures, a list of parameters and their default
values is shown on a separate line between the name and the implementation.

Alternatively, a frequency signature can be specified as an argument to the
C<--frequencies> (or C<-f>) option.  The expected format is a comma-separated
list of four integers, representing the variables C<f>, C<f1>, C<f2> and C<N>.
In this case, association scores for all selected measures are computed on the
specified frequency signature.  Note that it is not possible to compute scores
for different frequency signatures with a single invocation of the
B<ucs-list-am> tool.


=head1 COPYRIGHT

Copyright 2004 Stefan Evert.

This software is provided AS IS and the author makes no warranty as to
its use and performance. You may use the software, redistribute and
modify it under the same terms as Perl itself.

=cut
