#!/usr/bin/perl -w 
# -*-cperl-*-
## Filename: ucs-print
## Modified: Sun Aug 22 00:59:26 2004 (evert)   
##   Author: Stefan Evert
##  Purpose: ASCII-format UCS data set for viewing or printing
STDERR->autoflush(1);

use UCS;
use UCS::DS::Memory;
use UCS::DS::Format;

use Getopt::Long;

$Opt_Help = 0;					# --help
$Opt_Verbose = 0;				# --verbose
$Opt_Digits = 0;				# --digits
$Opt_Interactive = 0;				# --interactive
$Opt_Pagesize = -1;				# --pagesize
$Opt_Output = "";				# --output
$Opt_PostScript = 0;				# --postscript
$Opt_Landscape = 0;				# --landscape
$Opt_Two = 0;					# -2 (as in a2ps)

$ok = GetOptions(
		 "help|h" => \$Opt_Help,
		 "verbose|v" => \$Opt_Verbose,
		 "digits|d=i" => \$Opt_Digits,
		 "interactive|i" => \$Opt_Interactive,
		 "pagesize|p=i" => \$Opt_Pagesize,
		 "output|o=s" => \$Opt_Output,
		 "postscript|ps" => \$Opt_PostScript,
		 "landscape|l" => \$Opt_Landscape,
		 "two-up|2" => \$Opt_Two,
		);
$UCS::Verbose = 0 unless $Opt_Verbose;

die "Error: can't specify both --output and --interactive.\n"
  if $Opt_Interactive and $Opt_Output;
die "Error: can't specify both --postscript and --interactive.\n"
  if $Opt_Interactive and $Opt_PostScript;
warn "Warning: --landscape option ignored (can only be used in --postscript mode).\n"
  if $Opt_Landscape and not $Opt_PostScript;

# parse command line
@varspec = ();
if (@ARGV > 1) {
  $dsfile = undef;
  while (@ARGV) {
    $arg = shift @ARGV;
    if (uc($arg) eq "FROM") {
      $ok = 0 unless @ARGV == 1;
      $dsfile = shift @ARGV;
    }
    else {
      push @varspec, $arg;
    }
  }
  $ok = 0 unless defined $dsfile;
}
else {
  $dsfile = (@ARGV > 0) ? shift @ARGV : "-";
  push @varspec, '%';
}

die "Usage:  ucs-print [-i] [-d <digits>] [-p <lines>] data.ds.gz\n"
  . "        ucs-print [-o <file>] [-ps [-2] [-l]] [-d <digits>] [-p <lines>] data.ds.gz\n"
  . "        ucs-print [<options>] '*' 'am.%.pv' FROM data.ds.gz\n"
  . "[type 'ucsdoc ucs-print' for more information]\n"
  unless $ok and (not $Opt_Help);

if ($Opt_Interactive) {				# --interactive: send output to less pager
  $Opt_Output = "| less --chop-long-lines --tilde  --max-forw-scroll=10  --clear-screen";
  if ($Opt_Pagesize < 0) {		# automatic paging in interactive mode
    $lines = get_lines();
    $Opt_Pagesize = $lines - 4
      if defined $lines;
  }
}
elsif ($Opt_PostScript) {			# --postscript: use a2ps to format output
  die "Sorry, --postscript is not supported on this system (a2ps seems to be missing).\n"
    unless defined $UCS::a2ps;
  $Opt_Output = "-"				# default output is on STDOUT
    unless $Opt_Output;
  $Opt_Output = "| $UCS::a2ps -o $Opt_Output -i --pretty-print=plain "; # construct a2ps call
  $Opt_Output .= ($Opt_Two) ? "-2 " : "-1 ";
  $Opt_Output .= "-r "
    if $Opt_Landscape;
  $Opt_Output .= ($dsfile eq "-") ? "--stdin='Data Set' " : "--stdin='$dsfile' ";
  $Opt_Output .= "-c -L ".($Opt_Pagesize + 3)." "
    if $Opt_Pagesize > 0;
}

# load data set into memory
print STDERR "Loading data set $dsfile ... "
  if $Opt_Verbose;
$ds = new UCS::DS::Memory $dsfile;
$size = $ds->size;
print STDERR "$size rows\n"
  if $Opt_Verbose;

# configure data set formatter
$formatter = new UCS::DS::Format $ds;
$formatter->digits($Opt_Digits) if $Opt_Digits > 0;
$formatter->pagelength($Opt_Pagesize) if $Opt_Pagesize > 0;
$formatter->vars(@varspec);

# print formatted data set
if ($Opt_Output) {
  $formatter->print($Opt_Output);
}
else {
  $formatter->print;
}


##
##  subroutines
##

# try to guess the number of lines on the screen, using different methods
sub get_lines {
  my $lines = undef;
  eval 'use Term::ReadKey; (undef, $lines) = GetTerminalSize(STDOUT); 1'; # best solution, if Term::ReadKey is installed
  return $lines if $lines;
  $lines = $ENV{LINES};		                # Linux BASH (seems to need "export LINES")
  return $lines if $lines;
  my $output = `echo \$LINES`;			# try to get at $LINES when it isn't exported
  $lines = $1
    if $output =~ /^\s*([0-9]+)/;
  return $lines;
}


__END__

=head1 NAME

ucs-print - ASCII-format UCS data set for viewing and printing


=head1 SYNOPSIS

  ucs-print [-i] [-p <lines>] [-d <digits>] data.ds.gz

  ucs-print [-o <file>] [-ps [-2] [-l]] [-p <lines>] [-d <digits>] data.ds.gz

  ucs-print [<options>] '*' 'am.%.pv' FROM data.ds.gz


=head1 DESCRIPTION

Format data set as ASCII table for inclusion in text files, on-line viewing
(in a terminal window, with C<--interactive> option), and printing (in
PostScript format, with C<--postscript> option).  The B<ucs-print> utility
automatically adjusts column widths and chooses an appropriate format for
floating-point numbers.  Boolean attributes are displayed as C<yes> and C<no>,
while missing values are shown as C<NA>.

In the first forms of the command (used in the first two examples above), all
variables are displayed (which usually results in a very wide table).  The
name of the data set may be omitted, in which case data is read from C<STDIN>.

In the second form, variables can be selected with a whitespace-separated list
of UCS wildcard patterns (see L<the ucsexp manpage|ucsexp>) or by explicitly
specifying the variable names. This feature can also be used to re-order the
columns or display a variable in multiple columns.  The C<FROM> clause is
mandatory in this mode, but data can be read from C<STDIN> by using C<-> as
the name of the data set.

Note that there may be some delay while the data set is read into memory and
analysed, especially without the C<--pagesize> option.


=head1 OPTIONS

=over 4

=item *

C<--help>, C<-h>

Prints short usage reminder.

=item *

C<--verbose>, C<-v>

Prints some (minimal) progress information on C<STDERR>.

=item *

C<--output> I<file>, C<-o> I<file>

Write output to I<file>, rather than printing it on C<STDOUT>.

=item *

C<--postscript>, C<-ps>

Uses the B<a2ps> program (see the L<a2ps(1)|a2ps(1)> manpage) to create a
PostScript version of the formatted table for printing.  By default, the
PostScript code will be shown on C<STDOUT> (and I<not> be sent to a printer).
It can be saved into a file with the C<--output> option.  If the C<--pagesize>
option is used, each page will contain the specified number of rows and the
table will be truncated if it is too wide.  If this happens, try increasing
the number of rows on the page or use C<--landscape>.  If the table still
fails to fit, split the variables into two or more groups that are printed
separately.

=item *

C<--landscape>, C<-l>

[In C<--postscript> mode only.]  Print pages in landscape orientation rather
than portrait.  Especially useful for wide tables.

=item *

C<--two-up>, C<-2>

[In C<--postscript> mode only.]  Print two pages on a single sheet, same as
the C<-2> option in B<a2ps>.  This option may give a more satisfactory result
for very narrow tables (e.g. when showing only the pair types).

=item *

C<--interactive>, C<-i>

Send output to terminal pager (B<less>) for interactive viewing.  This option
may not be used together with C<--output>.  The data will automatically be
displayed in paged mode, with the page size adjusted to the height of the
terminal window.  If the screen size cannot be automatically determined, use
the C<--pagesize> option to activate paging explicitly.  The page size should
be set to the screen height (number of text lines) minus 4 for optimal
results.  Use C<-p 0> to deactivate paging in interactive mode.

=item *

C<--pagesize> I<n>, C<-p> I<n>

Split data set into smaller tables of (up to) I<n> rows each, which are
separated by blank lines.  Use of this option may improve the formatting
quality, helps to avoid excessive columns widths, and reduces the delay before
(partial) results can be displayed (especially for large data sets).  By
default, the entire data set is formatted as a single large table (unless
C<--interactive> was specified).

=item *

C<--digits> I<n>, C<-d> I<n>

Display floating-point numbers with a precision of approximately I<n>
significant digits.  The actual number of digits shown may differ slightly
when a fixed-point format is chosen by the formatter.  The default is I<n> =
8.

=back


=head1 BUGS

The code used to determine the screen height in C<--interactive> mode may not
work on some platforms.  It has only been tested under Linux so far.  If you
are using the B<bash> shell, you might try C<export LINES> before running the
B<ucs-print> tool.


=head1 COPYRIGHT

Copyright 2004 Stefan Evert.

This software is provided AS IS and the author makes no warranty as to
its use and performance. You may use the software, redistribute and
modify it under the same terms as Perl itself.

=cut
