#!/usr/bin/perl -- -*- cperl -*-
#(note -- -w removed after corrupt database caused much yelling.)
# 
#  makepbook -- generate LaTeX for telephone directory from flatfile database
#
#  1999/09/18 Rich Lafferty <rich@alcor.concordia.ca>
# 

#
# Don't touch the 'require' and 'use' and 'setlocale' below here!
#
require 5.004;                            # minimum perl version
use strict;                               # extra compiler checks
use Text::ParseWords;                     # break up TSV/CSV
use locale;                               # locale support
use POSIX qw(locale_h);                   # locale: macros
setlocale(LC_CTYPE, "fr_CA.ISO8859-1");   # locale: characters
setlocale(LC_COLLATE, "fr_CA.ISO8859-1"); # locale: sorting
use Convert::Recode qw(mac_to_latin1);    # charset conversion
use File::Copy;                           # atomic file copying routine
use File::Basename;                       # basename(1) function
my $version = "1.0";                      # current version. Don't touch.

############################################################################
### CONFIGURATION VARIABLES ################################################
############################################################################

# CHANGE the following to reflect the environment in which makepbook runs.

## OPERATIONAL SETTINGS ####################################################
## 1 to enable, 0 to disable
my $VERBOSE         = 1;   # 1 to produce verbose output
my $RUN_LATEX       = 0;   # 1 to run LaTeX automagically
my $TRUNCATE_FIELDS = 1;   # 1 to truncate overlong fields with '...'
my $USE_MAC_CHARSET = 1;   # 1 if input file uses Macintosh character set

## FILE LOCATIONS ##########################################################
# Location of data file.
my $datafile = "/home/telesis/data/WEBTEL.TXT";

# Location of output files.
my $texfileroot = "/home/telesis/tex";          # root directory of tex output

# Location of output files.
my $pdffile = "/home/telesis/www/pbook.pdf";            # current file
my $pdfbackupdir = "/home/telesis/www/old-phonebooks";  # dir for old files

# Location of LaTeX compiler.
my $latexpath = "/local/paths/pdflatex";

# Name of initial file to LaTeX. This file should call the generated
# files with \input.
my $texmain = "pbook.tex";

## MISCELLANEOUS ##########################################################
# Field separator (probably either "," for CSV or "\t" for TSV).
my $fieldsep = "\t";

# Number of backup PDF files to keep around.
my $pdfbacknum = 3;     

# Maximum length of records before they're truncated. 100 seems to
# work well here; don't change this unless you know what you're doing
# (and the output needs tuning)!
my $maxlength = 100;


############################################################################
## Nothing to change below here. ###########################################
############################################################################

# Put together locations of individual files.
my %texfiles = (                             
    "people"  => "$texfileroot/pbpeople.tex",
    "contact" => "$texfileroot/pbcontact.tex",
    "dept"    => "$texfileroot/pbdept.tex",
    "fax"     => "$texfileroot/pbfax.tex"
);

# Summarize configuration.
print STDERR "This is makepbook, Version $version, ";
print STDERR "by Rich Lafferty <rich\@alcor.concordia.ca>\n";
if ($VERBOSE) {
    print STDERR "+++ Start:  " . scalar(localtime()) . "\n";
    print STDERR "+++ Configuration data:\n";
    print STDERR "+++     LaTeX main: $texfileroot/$texmain\n";
    print STDERR "+++     Output file: $pdffile\n";
    foreach (keys %texfiles) {
	print STDERR "+++     $_ output to $texfiles{$_}\n";
    }
    print STDERR "+++     Input format: ";
    if ($fieldsep eq ",") {
	print STDERR "CSV\n";
    } elsif ($fieldsep eq "\t") {
	print STDERR "TSV\n";
    } else {
	print STDERR "'$fieldsep' separated values\n";
    }
    print STDERR "+++     Options: VERBOSE\n";
    print STDERR "+++              RUN_LATEX[$latexpath]\n" if $RUN_LATEX;
    print STDERR "+++              TRUNCATE_FIELDS[$maxlength]\n" if $TRUNCATE_FIELDS;
    print STDERR "+++              USE_MAC_CHARSET\n" if $USE_MAC_CHARSET;
}
  
my $today = localtime(time);  # today's date
my $tstart = time();          # timestamp for elapsed-time measurement

# Make the datafile unix-centric instead of mac-centric by
# fixing newlines.
#
# This section doesn't know anything about the format of the data;
# it just processes a text-file line by line.

open (RAWDATA, "$datafile") or die "Can't open $datafile: $!\n";
open (CLEANDATA, ">$datafile.clean") or die "Can't write cleaned data: $!\n";

print STDERR "+++ Fixing carriage returns in $datafile...\n" if $VERBOSE;

if ($USE_MAC_CHARSET) {
    while (<RAWDATA>) {
        tr/\015/\012/;                     # carriage returns -> linefeeds
        print CLEANDATA mac_to_latin1($_); # convert character set 
    }
} else { # latin1
    while (<RAWDATA>) {
        tr/\015/\012/;                     # carriage returns -> linefeeds
        print CLEANDATA $_;                # no character set conversion
    }
}

close RAWDATA or warn;
close CLEANDATA or die "Can't close cleaned data: $!\n";
chmod 0600, "$datafile.clean";

## read in the datafile
open (DATAFILE, "$datafile.clean") or die "Can't open cleaned data: $!\n";
my @data; # array of hashes, each hash is a database entry

print STDERR "+++ Reading datafile...\n" if $VERBOSE;
while (<DATAFILE>) {
    my @item  = quotewords($fieldsep, 0, $_);  # @item contains fields from
                                               # one record
    # clean things up
    foreach (@item) {
	chomp;      # strip trailing \n
	s/^\s+//;   # strip leading whitespace
	s/\s+$//;   # strip trailing whitespace
	s/^\W$//;   # get rid of ^@'s
	s/([&_])/\\$1/g;  # escape ampersands and underscores
	            # handle accents
    }

    # append the faculty to "Office of the Dean" in DEPARTM
    if ($item[22] eq "Office of the Dean") {
    	$item[22] = ($item[25] ? $item[25] : $item[19]);
    }

    # and add to our collection. :-)
    push @data, {
	ARR_COLS  => $item[0],
        R_NUMBER  => $item[1],
        AREA      => $item[2],
        EXCH      => $item[3],
        PHONE     => $item[4],
        FAX       => $item[5],
        FIRSTNAM  => $item[6],
        LASTNAME  => $item[7],
        PREFIX    => $item[8],
        TITLE     => $item[9],
        REV_SEQ   => $item[10],
        REV_TITL  => $item[11],
        EMAIL     => $item[12],
        CAMPUS    => $item[13],
        BUILDING  => $item[14],
        ROOM      => $item[15],
        SROOM     => $item[16],
        DEP_NAME  => $item[17],
        DEP_MAIL  => $item[18],
        DEPT_NAME => $item[19],
        ABRV_NAME => $item[20],
        GROUPMEM  => $item[21],
        DEPARTM   => $item[22],
	N_LIST    => $item[23],
	GR_SEQ    => $item[24],
	ABRV_DEPT => $item[25]
    };
}

print STDERR "+++     Read " . scalar(@data) . " records\n" if $VERBOSE; 
close DATAFILE;

#############################################################################
## generate 'People' directory                                       ZPEOPLE
##

print STDERR qq(+++ Making "Faculty and Staff" list\n) if $VERBOSE;

open (TEXPEOPLE, ">$texfiles{people}") 
    or die "Can't open $texfiles{people}: $!\n:";

print TEXPEOPLE "%% Concordia phone directory Faculty and Staff List\n";
print TEXPEOPLE "%% Generated: $today with makepbook $version\n";
print TEXPEOPLE "\\pbpBeginTable\n";

{ # don't want to keep @sorted or $letter around, so put this in its own block

    my $letter = "";  # current letter of alphabet headers

    print STDERR "+++     Sorting data...\n" if $VERBOSE;
    my @sorted = sort {($a->{LASTNAME} . $a->{FIRSTNAM}) cmp 
		       ($b->{LASTNAME} . $b->{FIRSTNAM})} @data;
    print STDERR "+++     Finished sort.\n" if $VERBOSE;
    
    print STDERR "+++     Creating LaTeX source...\n" if $VERBOSE;
    foreach my $record (@sorted) {
	if ($record->{LASTNAME} and ($record->{N_LIST} ne "N")) {
	    my $phone = $record->{PHONE};

	    my $name = $record->{LASTNAME};
	    if ($record->{FIRSTNAM}) {
		$name .=  ", " . $record->{PREFIX} . " " . $record->{FIRSTNAM};
	    }

	    # Update alphabet-letter headings.
	    if ( uc(substr($name,0,1)) ne $letter ) {
		$letter = uc(substr($name,0,1));
		print TEXPEOPLE "\\pbpSection{$letter}\n";
	    }
	    
	    # Update dictionary-style headers
	    print TEXPEOPLE '\markboth{' . $record->{LASTNAME} . '}{' . 
                                           $record->{LASTNAME} . "}\n";

	    my $title = $record->{TITLE};
	    my $dept = $record->{ABRV_DEPT};
	    my $location = $record->{CAMPUS} . "-" .
	                   $record->{BUILDING} . " " .
	                   $record->{ROOM} . " " .
		           $record->{SROOM};
	    my $email = $record->{EMAIL};

	    my $entry = "\\pbpEntry{$phone}{$name}{$title}{$dept}{$email}{$location}\n";
	    $entry =~ s/\. /\.~/g;  # fix spacing after abbreviations

	    print TEXPEOPLE $entry;
	}
    }
} # @sorted reclaimed here

print TEXPEOPLE "\\pbpEndTable\n";
close TEXPEOPLE;

############################################################################
## generate 'Departments' directory                                   ZDEPT
##

print STDERR qq(+++ Making "Department List" list\n) if $VERBOSE;

open (TEXDEPTS, ">$texfiles{dept}") 
    or die "Can't open $texfiles{dept}: $!\n:";

print TEXDEPTS "%% Concordia phone directory Department List\n";
print TEXDEPTS "%% Generated: $today with makepbook $version\n";
print TEXDEPTS "\\pbdBeginTable\n";

{ # don't want to keep @sorted around, so we'll put this in its own block

    my $letter = "";       # Alphabetic headers -- current letter.
    my $truncnum = 0;      # Number of truncated records.

    print STDERR "+++     Sorting data...\n" if $VERBOSE;

    # Sort by entire department name (cat DEPARTM and DEP_NAME)
    my @sorted = sort {($a->{DEPARTM} . $a->{DEP_NAME}) cmp 
                       ($b->{DEPARTM} . $b->{DEP_NAME})} @data;
    print STDERR "+++     Finished sort.\n" if $VERBOSE;

    print STDERR "+++     Creating LaTeX source...\n" if $VERBOSE;

    foreach my $record (@sorted) {
      if ($record->{DEPARTM} and (!$record->{LASTNAME}) and (!$record->{FAX})){
	    my $phone = $record->{PHONE};

	    my $dept = $record->{DEPARTM};
	    if ($record->{DEP_NAME}) {
		$dept .= " - " . $record->{DEP_NAME};
	    }

	    # Only append abbreviation if there's plenty of room left.
	    if ((length($dept) < $maxlength) and $record->{ABRV_NAME}) {
		my $abdept = $dept . " - " . $record->{ABRV_NAME};
		$dept = $abdept if length($abdept) < $maxlength;
	    }

	    # Cut it off if it's too long, so we can at least see our
	    # output. (This will probably go away in production; they'll
	    # fix extremely-long records in the database.)
	    # Ha, ha, ha, foolish me. It's still here! :-)
	    if (length($dept) > $maxlength and $TRUNCATE_FIELDS) {
		$truncnum++;
		$dept = substr($dept,0,$maxlength) . '\ldots{}';
	    }

	    # Update alphabet-letter headings.
	    if ( uc(substr($dept,0,1)) ne $letter ) {
		$letter = uc(substr($dept,0,1));
		print TEXDEPTS "\\pbdSection{$letter}\n";
	    }

	    my $location = $record->{CAMPUS} . "-" .
	                   $record->{BUILDING} . " " .
	                   $record->{ROOM} . " " .
		           $record->{SROOM};

	    my $email = $record->{DEP_MAIL};

	    my $entry = "\\pbdEntry{$phone}{$dept}{$email}{$location}\n";
	    $entry =~ s/\. /\.~/g;  # fix spacing after abbreviations

	    # Update dictionary-style headers
	    my ($hdept) = (split(" ", $dept))[0];
	    print TEXDEPTS '\markboth{' . $hdept . '}{' . 
                                         $hdept . "}\n";
	    print TEXDEPTS $entry;


	}
    }

    if ($VERBOSE and $TRUNCATE_FIELDS and $truncnum) {
	print STDERR "+++         $truncnum records truncated!\n";
    }

} # @sorted reclaimed here

print TEXDEPTS "\\pbdEndTable\n";
close TEXDEPTS;

############################################################################
## generate 'Fax Numbers' directory                                    ZFAX
##

print STDERR qq(+++ Making "Fax Numbers" list\n) if $VERBOSE;

open (TEXFAX, ">$texfiles{fax}") 
    or die "Can't open $texfiles{fax}: $!\n:";

print TEXFAX "%% Concordia phone directory Fax Numbers\n";
print TEXFAX "%% Generated: $today with makepbook $version\n";
print TEXFAX "\\pbfBeginTable\n";

{ # don't want to keep @sorted around, so we'll put this in its own block

    my $letter = "";    # current letter for alphabet headers
    my $truncnum = 0;   # number of truncated records

    print STDERR "+++     Sorting data...\n" if $VERBOSE;

    # Sort by concatenation of DEPARTM and DEP_NAME.
    my @sorted = sort {($a->{DEPARTM} . $a->{DEP_NAME}) cmp 
                       ($b->{DEPARTM} . $b->{DEP_NAME})} @data;

    print STDERR "+++     Finished sort.\n" if $VERBOSE;

    print STDERR "+++     Creating LaTeX source...\n" if $VERBOSE;
    foreach my $record (@sorted) {
	if ( (! $record->{LASTNAME}) and $record->{FAX}) {
	    my $phone = $record->{PHONE};

	    my $dept = $record->{DEPARTM};
	    
	    # Ignore DEP_NAME if it only says "Fax". Caution required!
	    # Can't match /fax/i because some records have DEP_NAME
	    # with "Fax - " prepended!
	    if ($record->{DEP_NAME} and ($record->{DEP_NAME} ne "Fax") ) {
		$dept .= " - " . $record->{DEP_NAME};
		# We already know it's a fax
		$dept =~ s/Fax( - )?//;
	    }

	    # Only append abbreviation if there's plenty of room left.
	    if ((length($dept) < $maxlength) and $record->{ABRV_NAME}) {
		my $abdept = $dept . " - " . $record->{ABRV_NAME};
		$dept = $abdept if length($abdept) < $maxlength;
	    }

	    # Cut it off if it's too long, so we can at least see out
	    # output. (This will probably go away in production; they'll
	    # fix extremely-long records in the database.)
	    # Or not.
	    if (length($dept) > $maxlength and $TRUNCATE_FIELDS) {
		$truncnum++;
		$dept = substr($dept,0,$maxlength) . '\ldots{}';
	    }

	    # Update alphabet-letter headings.
	    if ( uc(substr($dept,0,1)) ne $letter ) {
		$letter = uc(substr($dept,0,1));
		print TEXFAX "\\pbfSection{$letter}\n";
	    }

	    # Update dictionary-style headers
	    my ($hdept) = (split(" ", $dept))[0];
	    print TEXFAX '\markboth{' . $hdept . '}{' . 
                                         $hdept . "}\n";

	    my $location = $record->{CAMPUS} . "-" .
	                   $record->{BUILDING} . " " .
	                   $record->{ROOM} . " " .
		           $record->{SROOM};

	    my $entry = "\\pbfEntry{$phone}{$dept}{$location}\n";
	    $entry =~ s/\. /\.~/g;  # fix spacing after abbreviations

	    print TEXFAX $entry;
	}
    }

    if ($VERBOSE and $TRUNCATE_FIELDS and $truncnum) {
	print STDERR "+++         $truncnum records truncated!\n";
    }

} # @sorted reclaimed here

print TEXFAX "\\pbfEndTable\n";
close TEXFAX;

############################################################################
## Generate "Contact List" directory                               ZCONTACT
##

print STDERR qq(+++ Making "Contact List" list\n) if $VERBOSE;

open (TEXCONTACT, ">$texfiles{contact}") 
    or die "Can't open $texfiles{contact}: $!\n:";

print TEXCONTACT "%% Concordia phone directory Contact List\n";
print TEXCONTACT "%% Generated: $today with makepbook $version\n";
print TEXCONTACT "\\pbcBeginTable\n";

{ # Localize variables to this section (block).
  
    # Make list of DEPARTMs and GROUPMEMs.
    # %depts is a hash (with DEPARTM keys) 
    #        of hashes (with GR_SEQ . GROUPMEM keys)
    #        of arrays (of people's records)
    #        of hashes (of fields within a record).
    # 
    # This will not be on the exam. :-) Reading "man perlref" might help.

    ## gather data
    my %depts;
    my $gr_seq_len = 0;    # Length of GR_SEQ field must be constant 
                           # through whole datafile (asserted below).

    print STDERR "+++     Grouping data...\n" if $VERBOSE;

    foreach my $record (@data) {
	if ($record->{LASTNAME} and 
	    ($record->{REV_SEQ} ne "Z") and 
	    $record->{DEPARTM}) {
	    
	    # use GR_SEQ and GROUPMEM concatenated as hash key --
	    # solves sorting problems later (can just sort keys) and
	    # we can easily strip off the single-character GR_SEQ.
	    my $grouplabel = $record->{GR_SEQ} . $record->{GROUPMEM};

	    # assertion: GR_SEQ is of constant length. Required to
	    #            strip off GR_SEQ later to get GROUPNAM.
	    if ((length($record->{GR_SEQ}) != $gr_seq_len) and 
                ($gr_seq_len != 0) ) {
		die "Assertion failed: GR_SEQ not of constant length";
	    }
	    $gr_seq_len = length($record->{GR_SEQ});

	    # Brace yourself! (add reference to record to array in
	    # hash of hashes).  NOTE that emacs perl-mode breaks on
	    # these brackets -- count them by hand! (cperl-mode is
	    # fine too)

	    push @{${$depts{$record->{DEPARTM}}}{$grouplabel}}, $record;
        }
    } 

    if ($VERBOSE) {
	print STDERR "+++         Using $gr_seq_len-character GR_SEQ.\n";
	print STDERR "+++     Finished grouping.\n";
    }

    #
    # output TeX from data generated above
    #

    print STDERR "+++     Creating LaTeX source...\n" if $VERBOSE;

    my $letter = "";  # Current letter for alphabet headers

    foreach my $thisdept (sort keys %depts) { # cycle through departments

	# Update alphabet-letter headings.
	if ( uc(substr($thisdept,0,1)) ne $letter ) {
	    $letter = uc(substr($thisdept,0,1));
	    print TEXCONTACT "\\pbcSection{$letter}\n";
	}

        # Update dictionary headings with *first words* of dept names.
        my $hdept = $thisdept;    
	$hdept =~ s/(.*?) .*/$1/;  # grab first word only
	print TEXCONTACT "\\markboth{$hdept}{$hdept}\n";

	print TEXCONTACT "\\pbcHeader{$thisdept}\n";

	foreach my $grouplabel (sort keys %{$depts{$thisdept}}) { # and groups
	    # $grouplabel has GR_SEQ prepended, only for sorting

	    my $groupname = substr($grouplabel, $gr_seq_len); # strip GR_SEQ
	    print TEXCONTACT "\\pbcSubhead{$groupname}\n" 
		if $groupname ne $thisdept;

	    # Sort by REV_SEQ, then by REV_TITL (concatenate them)
	    my @people = sort { 
      ($a->{REV_SEQ} . $a->{REV_TITL} . $a->{LASTNAME} . $a->{FIRSTNAM})
		    cmp
      ($b->{REV_SEQ} . $b->{REV_TITL} . $b->{LASTNAME} . $b->{FIRSTNAM}) 
		              } @{${$depts{$thisdept}}{$grouplabel}};
    
	    foreach my $person ( @people ) { # and people
	    
		my $phone = $person->{PHONE};
		my $title = $person->{REV_TITL};
		my $name = $person->{LASTNAME};
		if ($person->{FIRSTNAM}) {
		    $name .=  ", ".$person->{PREFIX}." ".$person->{FIRSTNAM};
		}

		my $email = $person->{EMAIL};

		my $location = $person->{CAMPUS} . "-" .
		               $person->{BUILDING} . " " .
	                       $person->{ROOM} . " " .
		               $person->{SROOM};
	    
		my $entry = "\\pbcEntry{$phone}{$title}{$name}{$email}{$location}\n";

		$entry =~ s/\. /\.~/g;  # fix spacing after abbreviations

		print TEXCONTACT $entry;

	    }
        }
    }
}

print TEXCONTACT "\\pbcEndTable\n";
close TEXCONTACT;

############################################################################
## Run pdflatex to make PDF phonebook                                  ZPDF
##

if ($RUN_LATEX) {
    my $ttex = time();
    
    print STDERR "+++ Running pdflatex:\n" if $VERBOSE;
    chdir($texfileroot) or die "Can't chdir to $texfileroot: $!\n";
    
# longtable prints a warning if it needs another pass (LaTeX is not
# always a one-pass system). We start with needing another run, since
# it's not TeXed yet at all.
    my $texoutput = "Rerun LaTeX";  # how LaTeX warns that we need to reprocess
    my $texit = 0;                  # Current iteration

    while ($texoutput =~ /Rerun LaTeX/) {
	if ($VERBOSE) {
	    $texit++;
	    print STDERR "+++     Iteration $texit\n";
	}
	$texoutput = `$latexpath $texmain 2>&1`;
	if ($VERBOSE) {
	    print STDERR "+++         Overfull hboxen!\n" 
		if $texoutput =~ /Overfull \\hbox/;
	}	
    }
    my $tdone = time();  # Timestamp for elapsed-time

    print STDERR "+++ Backing up old PDF files...\n" if $VERBOSE;

    # Backup old PDF file.  Iterate through old backups, incrementing
    # the number in their filename, and copying over the oldest one
    my $pdfbasename = basename($pdffile);
    for (my $i = $pdfbacknum; $i > 1; $i--) {
        my $oldfile = "$pdfbackupdir/$pdfbasename." . ($i-1);
        my $newfile = "$pdfbackupdir/$pdfbasename.$i";
        copy($oldfile, $newfile) if $oldfile
            or warn "WARNING: Can't make backup of $oldfile: $!\n";
    }
        
    copy ($pdffile, "$pdfbackupdir/$pdfbasename.1") if $pdffile 
      or warn "WARNING: Can't make backup of $pdffile\n";

    # Copy output to www-accessible directory.
    my $newpdf = "$texfileroot/$texmain";       # Location of TeX's output
    $newpdf =~ s/.tex$/.pdf/;
    print STDERR "+++ Copying output to $pdffile...\n" if $VERBOSE;
    copy("$newpdf", "$pdffile") 
        or die "Couldn't copy output to $pdffile: $!\n";
    
    # Summarize.
    if ($VERBOSE) {
	my $tparse = $ttex - $tstart + 1; # perl share of elapsed time
	my $tpdf = $tdone - $ttex + 1;    # TeX share of elapsed time
        my $pdfsize = (stat($pdffile))[7];
	print STDERR "+++ Elapsed time: ${tparse}s processing\n";
	print STDERR "+++               ${tpdf}s TeXing in $texit pass";
	print STDERR ($texit==1) ? "\n" : "es\n";
	print STDERR "+++ Output file was $pdfsize bytes\n";
	print STDERR "+++ Done.\n";
    }
}

__END__

=pod

=head1 NAME

F<makepbook> - telephone directory generator
 

=head1 DESCRIPTION

The F<makepbook> system is a set of programs which work together to
generate Concordia's telephone directory directly from the directory
database with little to no human intervention. The directory created
by F<makepbook> is in Adobe's PDF format, suitable for on-screen viewing and
printing.

=head1 CONFIGURATION

Configuring F<makepbook> mostly consists of telling the program where it
can find input files, where it should put output files, and what
environment it is running in. Once configured, F<makepbook> should run
happily on its own.

I<This section assumes that> F<makepbook> I<and its supporting files have already
been installed; if this is not the case, then your Unix administrator
should follow the instructions at the beginning of>
L<"Programmer's Notes"> I<below>.

The configuration for F<makepbook> is stored in the program file itself
(usually installed as F<$HOME/bin/makepbook>). It must
be edited either on the Unix machine on which the program resides, or
with a text editor that understands Unix text files (such as BBEdit
in Mac OS, or UltraEdit in Windows).

The configuration variables (found below the initial C<require>
and C<use> commands) are as follows:

=head2 Operational settings

I<$VERBOSE> (Default: 1)

Setting I<$VERBOSE> to 1 will cause F<makepbook> to output a
detailed status report as it runs. Setting I<$VERBOSE> to 0 will
suppress the status report. (In the standard F<makepbook> installation,
this report is mailed to the user whenever a directory is generated.)

I<$RUN_LATEX> (Default: 1)

Setting I<$RUN_LATEX> to 1 will cause F<makepbook> to run the C<LaTeX>
typesetter on the data generated by F<makepbook>, generating the directory in
PDF format. Setting I<$RUN_LATEX> to 0 will prevent F<makepbook> from
actually generating the directory. This should always be set to 1
except when debugging.

I<$TRUNCATE_FIELDS> (Default: 1)

Setting I<$TRUNCATE_FIELDS> to 1 will allow F<makepbook> to truncate
department names in the B<Department List> and B<Fax
Numbers List> in order to fit the results on a page. Setting
I<$TRUNCATE_FIELDS> to 0 will prevent F<makepbook> from truncating
overlong department names. This will usually result in tables being
produced that are wider than a page. (See also L<"Diagnostics">.)

I<$USE_MAC_CHARSET> (Default: 1)
Set I<$USE_MAC_CHARSET? to 1 if the data file is using the
Macintosh extended character set, or to 0 if the data file is using
the ISO-8859-1 standard Latin-1 character set. An incorrect character
set will cause abnormal program termination.

=head2 File locations

I<$datafile> (Default: F</home/telesis/data/WEBTEL.TXT>)

This variable points to the database file from which F<makepbook>
generates its directory. (The format of this datafile is documented in
L<"Programmer's Notes">, below. Be sure to also update
I<DATAFILE> in the F<autorun> script.)

I<$texfileroot> (Default: F</home/telesis/tex>)

This variable must be set to the directory in which all of the C<LaTeX>
source code which comes with F<makepbook> can be found. Usually, the parts
generated by F<makepbook> will also end up in this directory.

I<$pdffile> (Default: F</home/telesis/www/pbook.pdf>)

This variable contains the fully-qualified path and filename of the
PDF file generated by F<makepbook>. It should usually go somewhere where the
Web server can access it. (Be sure to also update I<PDFFILE> in
the F<autorun> script.)

I<$pdfbackupdir> (Default: F</home/telesis/www/old-phonebooks/>)

This variable contains the name of a directory into which backup copies
of phonebooks should be stored. F<makepbook> always creates a backup copy of
the current phonebook before replacing it with the newly-generated one.

I<$latexpath> (Default: F</local/paths/pdflatex>)

This points to the location of the C<LaTeX> typesetting program.

I<$texmain> (Default: F<pbook.tex>)

This contains the name of the ``root'' C<LaTeX> file from which the
directory will be generated. The file is expected to be in
I<$texfileroot>.

=head2 Miscellaneous variables

I<$pdfbacknum> (Default: 3)

This variable controls the number of directories that are stored in
the backup directory, I<$pdfbackupdir>. F<makepbook> will rotate its
backups so that multiple previous phone directories are always
available (assuming that I<$pdfbacknum> is greater than 1).

I<$fieldsep> (Default: "\t")

This contains the field separator used in the database from which
F<makepbook> generates its directory. This will usually be I<"\t"> for
tab-separated values (TSV), or I<","> for comma-separated values
(CSV).

I<$maxlength> (Default: 100)

This contains the maximum number of characters that will be allowed in
a Department name in the B<Department List> or B<Fax Numbers>
section of the Directory. Department names longer than I<$maxlength>
will be truncated at that point if I<$TRUNCATE_FIELDS> is set to 1.

=head1 OPERATION

Daily operation of F<makepbook> is simple---whenever a new telephone
directory needs to be generated, drop the new database into the "db
dropbox" folder over AppleShare. The F<makepbook> system checks the dropbox
periodically (usually, daily); when it notices an update, it will generate
a new phone directory automatically. After the directory has been generated
and copied to its distribution location (I<$pdffile> in the
configuration), it will report on its status via email.

If a new directory needs to be generated right away, F<makepbook> can be
started manually by logging in to the Unix account and typing F<makepbook> at
the command prompt. In the case of manual operation, the status report
will be displayed on-screen.

=head1 DIAGNOSTICS

=head2 Table is wider than page

This problem, where a table protrudes past the right edge of the page,
will usually also be accompanied by an "Overfull hboxen!"  error on
the final C<LaTeX> iteration in the status report. C<LaTeX> generates
table columns as wide as the largest data that has to fit in that
column; if that pushes the table off the edge of the page, C<LaTeX>
will happily comply. This behavior is controlled in F<makepbook> by
the I<$TRUNCATE_FIELDS> option and I<$maxlength> variable; if it
occurs, ensure that I<$TRUNCATE_FIELDS> is enabled and I<$maxlength>
is sufficiently large, or shorten the offending data in the original
data source.

=head2 Fonts are too small on printed output

If printed copies of the generated PDF file seem to be using unusually
tiny fonts, then the PDF viewer (Acrobat or other) used to view and
print the directory probably has its "Shrink to Fit" or "Fit to
Page" option enabled; if the output has 1" margins, this is almost
certainly the case.  F<makepbook> generates output that will I<fit> on a
standard letter-size page, but which is slightly larger than what
Acrobat Reader I<thinks> will fit.  Disabling "Shrink to Fit" or
"Fit to Page" will solve this problem.

=head2 makepbook hangs while running pdflatex

Any errors generated by F<pdflatex> will be hidden by F<makepbook>. If
F<makepbook> seems to hang at this point, then there is something
wrong with the C<LaTeX> source files. Try rerunning F<makepbook> with
a fresh data source.  This error can also occur when the character set
of the data doesn't match the character set F<makepbook> expects;
check to see that I<$USE_MAC_CHARSET> is at the proper setting for the
character set used in the data.

If necessary, disable I<$RUN_LATEX> and then run F<pdflatex>
manually on I<$texmain> to see the C<LaTeX> error messages.

=head2 makepbook complains that it can't locate a file

Check to make sure that all of the file locations described above
point to the appropriate locations.

=head2 makepbook complains, "GR_SEQ not of constant length"

In order to group subgroups in the contact list in the proper order,
F<makepbook> relies on the C<GR_SEQ> field being of the same length in all
the data. C<GR_SEQ> is usually of length 1, but any length will work
as long as it is consistent between records.

If the length of C<GR_SEQ> hasn't changed, search the original data
for inconsistencies in that field and rerun F<makepbook> on a fresh datafile.

=head1 PROGRAMMER'S NOTES

=head2 Overview

The F<makepbook> system is a set of programs which work together to generate
Concordia's telephone directory directly from the directory database
with little to no human intervention. The directory created by F<makepbook>
is in Adobe's PDF format, suitable for on-screen viewing and printing.

The general behavior of the system is as follows: a shell script
called F<autorun> is called by F<cron> periodically (usually,
daily). This shell script compares the mtime of the directory database
and of the current PDF file, and if the database is newer (i.e., if
the database has been updated since the last time a PDF file was
generated), runs the Perl program F<makepbook> to generate the new
database.

=head2 Requirements

The F<makepbook> system requires the following:

=over 4

=item C<pdfTeX> 3.14159 or later

=item Perl 5.004 or later

=item The following Perl modules: 

=over 4

=item Text::ParseWords 

=item locale 

=item POSIX

=item Convert::Recode 

=item File::Copy 

=item File::Basename

=back

=item The following C<LaTeX> packages:

=over 4

=item fancyhdr 

=item inputenc 

=item longtable

=item graphics

=back

=item The (free) URW Nimbus C<LaTeX> and Adobe Type 1 fonts

=item GNU F<recode> version 3.4

=back

=head2 Installation

Once the files are in place (usually in separate F<bin>, F<data>,
F<graphics>, and F<tex> directories) and F<makepbook> has been
configured as detailed in the User's Guide, there are two system
modifications to complete.

B<Netatalk:> 
The F<data> directory is intended to be accessible as an
AppleShare share point. Add the following line to the user's
F<$HOME/.AppleVolumes>:

    ~/data "Phonebook db dropbox" options=crlf,tolower

B<Crontab:>
The following crontab entry will run autorun daily during the week:

    30 5 * * 2-6 /home/telesis/bin/autorun

=head2 Implementation Notes

B<autorun:>
The F<autorun> program is a Bourne shell script which starts
F<makepbook> if the most recently generated PDF file is older than the
database (based on an mtime comparison, C<test -nt>). Putting
this in its own shell script rather than in the crontab simplifies
the crontab and allows easy maintenance of file locations.

B<Database format:>
The database around which the program was built was exported as
tab-separated values (TSV) from FoxPro under MS-DOS, then opened in
WordPerfect under Mac OS and saved as a text (TTXT) file. Variations
in input data during development led to the inclusion of an option to
switch from TSV to comma-separated (CSV), and from the Macintosh
character set to ISO 8859-1 (Latin-1). The database contains the
following fields:

    0. ARR_COLS  
    1. R_NUMBER   Record number
    2. AREA       Telephone area code
    3. EXCH       Telephone exchange
    4. PHONE      Telephone extension number
    5. FAX        Fax number
    6. FIRSTNAM   First name
    7. LASTNAME   Surname
    8. PREFIX     Honorific title (Dr., Sr., but not Mr., Mrs.)
    9. TITLE      Job title
   10. REV_SEQ    Sequence order for people in contact list (1-9A-Z)
   11. REV_TITL   Job title for contact list
   12. EMAIL      Email address
   13. CAMPUS     Campus
   14. BUILDING   Building
   15. ROOM       Room number
   16. SROOM      Sub-room number
   17. DEP_NAME   Department name
   18. DEP_MAIL   Department email address
   19. DEPT_NAME  Official department name
   20. ABRV_NAME  Appropriate abbreviations of DEP_NAME or DEPARTM
   21. GROUPMEM   Group within department
   22. DEPARTM    Department name
   23. N_LIST     Flag for exclusion from faculty/staff list
   24. GR_SEQ     Sequence order for GROUPMEMs in contact list (1-9A-Z)
   25. ABRV_DEPT  Compressed DEPARTM name.

B<LaTeX:>
The telephone directory is generated from a mix of static and
dynamically-generated C<LaTeX> source. The dynamically-generated parts
are the directory tables themselves, and are documented in the
following section.

B<pbook.tex>:
F<pbook.tex> is the main C<LaTeX> source. It sets up a number of
parameters and commands, and then includes all of the other C<LaTeX>
files with C<\input>. Most of the parameters are standard C<LaTeX>.

The important commands are:

=over 4

=item C<\pbfont>: switches to the default font used in the
directory, which at the time of this writing was C<OT1/phv/mc/9>.

=item C<\pb[cdfp](section|header|entry|begintable|endtable|cols)>:
These control the output of the dynamically-generated C<LaTeX>. The
third letter of the command indicates the directory section: C<c> for
contact list, C<d> for department listing, C<f> for fax list, and C<p>
for faculty and staff (people) list. The part of the commandname
following the third letter indicates its function. Each section will
begin with a C<begintable> and ended with an C<endtable>; alphabetical
and sectional headers are marked with C<section> and C<header>; and
individual directory entries use C<entry>. C<cols> generates the
tables' column headers.

=item C<\sectionmark>: This is renewed to an empty
block, to prevent section headers from appearing in the left and right
page headers.

=back

The lexicon headers in the directories are accomplished with

  \rhead{\pbfont\textbc{\rightmark{}--\leftmark{}}}

as described in the "Dictionary Headers" section of the
C<fancyhdr> documentation.

F<pbook.tex> inputs the following:

=over 4

=item F<pbfront.tex>: The directory's cover page.

=item F<pbmaps.tex>: Campus maps. This file also I<contains> an
C<\eject> between pages, but the final C<\eject> occurs in
F<pbook.tex>. Since the map pages reset C<\rhead>, it must be set to
the 'Dictionary Headers' settings from the C<fancyhdr>
documentation immediately following the inclusion of
F<pbmaps.tex>.

=item F<pbptitle.tex>: The titlepage of the 'Faculty and Staff'
listing.

=item F<pbpeople.tex>: The 'Faculty and Staff' listing itself.

=item F<pbdtitle.tex>: The titlepage of the 'Department List'
listing.

=item F<pbdept.tex>: The 'Department List' listing itself.

=item F<pbctitle.tex>: The titlepage of the 'Departmental
Contacts' listing.

=item F<pbcontact.tex>: The 'Departmental Contacts' listing
itself.

=item F<pbftitle.tex>: The titlepage of the 'Fax Numbers' listing.

=item F<pbfax.tex>: The 'Fax Numbers' listing itself.

=item F<pbback.tex>: The back page (Colophon).

=back


B<makepbook>:
The F<makepbook> program itself is in Perl and can be logically broken into
seven parts. The first part is the configuration section described in
the User's Guide, which will not be explained further here. The
program compiles without warnings under the C<strict> pragma
and with full warnings enabled.

The second part takes care of obtaining the data. The datafile is
converted to the appropriate character set (either left alone or
converted from Macintosh to Latin-1) and carriage returns are
stripped; to avoid clobbering the original data, the cleaned-up
database is stored in a new file with F<.clean> appended to its
name. The data is then read from the file, cleaned up for C<TeX>ing,
and stored in an array, in which each array item is a reference to a
single-record hash of fieldnames and values.

The third, fourth, and fifth parts generate the C<LaTeX> source for
the people, department, and fax listings. For the most part, this is
just a case of sorting the array containing the data by the
appropriate hashrefs and extracting the fields we want. For each
record, the alphabetic header is updated if necessary, and a
C<\markboth> updates the lexicon headers. In the department and fax
lists, department names are truncated if necessary.  Note that the
C<\markboth> should I<follow> the update of the alphabetic header, or
an off-by-one error can occur in the lexicon headers.

The sixth part generates the contact list, and deserves further
explanation. The contact list in the phone directory is broken down
into departments, which are then broken down into groups, which
contain people; the data structures in the implementation of the
contact-list generator reflect this organization. Our original data
array is traversed and the data is copied into a hash of hashes of
arrays of hashes: the first hash uses C<DEPARTM>s as keys and references
to hashes as values; the hashes referred to from that use groups as
keys and array references as values; the arrays referred to from that
contain references to hashes, which in turn are the same hashes that
were referred to in the original data array (i.e., with field names as
keys and data as values).

The "groups" used as keys in the second hash are actually
concatenations of C<GR_SEQ> and C<GROUPMEM>; since C<GR_SEQ> can be
asserted to be of constant length throughout the entire database, it
can be stripped when we need to access the C<GROUPMEM>, and with it
prepended to C<GROUPMEM>, the keys of that hash will sort naively in the
correct order.

The output is generated by iterating through the sorted list of
C<DEPARTM>s, and then the sorted list of C<GROUPMEM>s within that, and then
the C<REV_SEQ>-ordered list of people within that.

The seventh section of F<makepbook> runs f<pdfLaTeX> on the output of the
first part of the program. Since the C<longtable> package
usually requires multiple passes, we capture STDOUT and look for the
warning string telling us to run C<LaTeX> again, and continue rerunning
C<LaTeX> on F<pbook.tex> until that warning string is not
generated. The old PDF file is backed up and the new one put in place.

=head1 Author

The F<makepbook> system and this documentation was written by Rich Lafferty,
C<rich\@alcor.concordia.ca>.

=cut