#!/usr/bin/perl
###########################################################################
#
#Copyright (c) 2002 by Ingo Schramm
#
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful, but
#WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
#General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
#USA
#
###########################################################################
use v5.6.1;
use strict;
use warnings;
use Getopt::Long;
use Text::Template;
use HTML::Entities;
################################################################################
# global variables
our $Progname = "buildtext";
our $Invoked = $0;
$Invoked =~ s:^(.*/)(.*)$:$2:g;
our $Version = 1.00;
our $ParOpen = '
'; # paragraph opening tag
our $ParClose = '
'; # paragraph closing tag
our $TmplFile = "./templet"; # template file
our $Outfile = ""; # actual used output file
our $ActFile = ""; # actually processed html file
our $UnsafeChars = 'äÄöÖüÜß"'; # chars to convert to html entities
our $DelOpen = ""; # alternative delimiters for
our $DelClose = ""; # code fragments in templates
# flags
our $ParOneLine = 1;
our $CheckComments = 1;
our $TeX = 0;
our $Quiet = 0;
our $Debug = 0;
################################################################################
# prototypes
sub action();
sub parse_commandline();
sub help(;$);
sub version();
sub docu();
sub process_input($);
sub merge();
sub debug($);
sub _warn($);
sub _die($);
################################################################################
# MAIN
action;
################################################################################
# declarations
#---help------------------------------#
#
sub docu() {
exec("perldoc $0") or exit(1);
}
sub version() {
help("v");
}
sub help(;$) {
my $flag = shift;
print $Progname, " ", $Version, " -- called as $Invoked\n";
exit(0) if ( $flag eq "v" );
print <<"EOF";
$Invoked [options]
Options:
default values are given in braces
--[no]check-comments : check for #-comments in paragraphs
or not (yes)
--debug : output some debugging info to stderr (no)
--del-open= : opening delimiter; change with care ('{')
--del-close= : closing delimiter; change with care ('}')
--doc : read enhanced documentation
-h, --help : this screen
-o, --out= : output to this file (.html)
--par-close= : paragraph closing tag ('$ParClose')
--[no]par-one-line : delete newlines inside of paragraphs
or not (yes)
--par-open= : paragraph opening tag ('$ParOpen')
-q, --quiet : suppress warning messages
-t, --templet= : specify template file
('$TmplFile')
--tex : work with TeX files; this option changes
the values of the delimiters for code fragments
in templates to '*~~' and '~~*' (no)
--unsafe-chars= : define characters that shall be converted
to html entities ( '$UnsafeChars' )
-v, --version : version information
EOF
exit 0;
}
#---parse_command_line----------------#
#
# parse command line
#
sub parse_command_line() {
if ( @ARGV ) {
Getopt::Long::config("no_ignorecase", "bundling");
GetOptions( "help|h" , \&help,
"version|v" , \&version,
"o|out=s" , \$Outfile,
"par-open=s" , \$ParOpen,
"par-close=s" , \$ParClose,
"t|templet=s" , \$TmplFile,
"q|quiet" , \$Quiet,
"debug" , \$Debug,
"unsafe-chars=s" , \$UnsafeChars,
"par-one-line!" , \$ParOneLine,
"check-comments!" , \$CheckComments,
"tex" , \$TeX,
"del-open=s" , \$DelOpen,
"del-close=s" , \$DelClose,
"doc" , \&docu
);
}
if ( $TeX ) {
$DelOpen = '*~~';
$DelClose = '~~*';
$UnsafeChars = "";
}
}
#---action-----------------------------#
sub action() {
parse_command_line;
_die("templet $TmplFile not found\n") unless ( -e $TmplFile );
_die("can't read templet $TmplFile, check permissions\n") unless ( -r $TmplFile );
if ( @ARGV ) {
# process input files
foreach ( @ARGV ) {
my $file = $_;
process_input( $file );
}
}
else {
# process STDIN
process_input( "" );
}
# merge input and template
merge;
}
#---debug-------------------------------#
sub debug($) {
my $state = shift;
print STDERR "\nDump symbol table of '$ActFile' $state:\n(long values are truncated)\n";
foreach my $symname ( sort keys %Infile:: ) {
no strict;
if ( defined ${$Infile::{$symname}} ) {
print STDERR ( sprintf ( "\$%-16s = '%.48s'\n", $symname, ${$Infile::{$symname}} ) );
}
if ( @{$Infile::{$symname}} ) {
print STDERR ( sprintf ( "\@%-16s = ", $symname ) );
my $count = 0;
foreach ( @{$Infile::{$symname}} ) {
$count++;
print STDERR ( sprintf ( "'%-.16s' ", $_ ) );
last if ( $count == 5 );
}
print STDERR "\n";
}
if ( %{$Infile::{$symname}} ) {
print STDERR ( sprintf ( "%%%-16s =\n", $symname ) );
my $count = 0;
foreach $key ( sort keys %{$Infile::{$symname}} ) {
$count++;
# the art of dereferencing-----------------------------------+
# |
print STDERR ( sprintf ( "%-12s => %.48s\n", $key, ${$Infile::{$symname}}{$key} ) );
last if ( $count == 5 );
}
}
}
}
#---process_input----------------------#
#
sub process_input($) {
my $file = shift;
my $in = undef;
$ActFile = $file ? $file : "STDIN";
if ( $file ) {
open ( IN, "<", $file ) or _die("could not open $file for read: $!");
$in = *IN;
}
else {
$in = *STDIN;
}
# eval the input file in a separate package
# for later processing of its variables via
# direct access to the symbol table of that package
{
package Infile;
no strict;
no warnings;
# predefined variables
my @lt = localtime;
$YEAR = $lt[5] + 1900;
my $mon = sprintf("%02d", ( $lt[4] + 1 ) );
my $day = sprintf("%02d", $lt[3] );
$SDATE = "$day.$mon.$YEAR";
$DATE = scalar ( localtime );
# read whole input in one single step
local undef $/;
while ( <$in> ) {
eval;
if ( $@ ) {
main::_die("Exit due to errors in '$ActFile':\n$@");
}
}
} # end of package Infile
if ( $file ) {
close( IN ) or _die("could not close $ActFile: $!\n");
}
# check for $CONTENT[0-9] and $OUT vars
#
# you have to check in a loop not only because you
# don't know about the names of the $CONTENT[0-9] vars
# but also because you would get the identifier created by
# autovivication if you try something like 'if $Infile::OUT'
my $varcontent = 0;
my $varout = 0;
foreach ( keys %Infile:: ) {
$varcontent++ if ( /^((CONTENT)([0-9]*))$/ );
$varout++ if ( /^(OUT)$/ );
}
_warn('no declaration of $CONTENT in '."'$ActFile'") unless ( $varcontent );
_warn('useless use of $OUT in '."'$ActFile'") if ( $varout );
debug("before encoding") if ( $Debug );
}
#---merge------------------------------#
sub merge() {
# encode html entities
if ( $UnsafeChars ) {
foreach my $symname ( sort keys %Infile:: ) {
# scalar slot
if ( defined ${$Infile::{$symname}} ) {
${$Infile::{$symname}} = encode_entities( ${$Infile::{$symname}}, $UnsafeChars )
}
# array slot
if ( @{$Infile::{$symname}} ) {
foreach ( @{$Infile::{$symname}} ) {
$_ = encode_entities( $_, $UnsafeChars );
}
}
# hash slot
if ( %{$Infile::{$symname}} ) {
foreach my $key ( sort keys %{$Infile::{$symname}} ) {
${$Infile::{$symname}}{$key} =
encode_entities( ${$Infile::{$symname}}{$key}, $UnsafeChars );
}
}
} # end foreach $symname
debug("after encoding") if $Debug;
} # end if $UnsafeChars
# preprocess CONTENT variables
foreach my $symname ( keys %Infile:: ) {
if ( $symname =~ /^((CONTENT)([0-9]*))$/ ) {
my @Buffer = ();
@Buffer = split( /^\s*$/gms, ${$Infile::{$symname}} );
foreach ( @Buffer ) {
chomp;
s/^\s*//;
s/\s*$//;
# delete comments
s/^\s*#.*$//mg if $CheckComments;
# \n -> [spc]
if ( $ParOneLine ) {
use bytes;
# try DOS, UNIX, Mac
s/(\015\012)+|(\012)+|(\015)+/\040/sg;
}
$_ = $ParOpen.$_.$ParClose;
} # end foreach @Buffer
${$Infile::{$symname}} = join( "\n", @Buffer );
} # end if
} # end foreach $symname
debug("after preprocessing") if $Debug;
# create output filehandle
my $out = undef;
if ( $Outfile ) {
open( OUT, ">", $Outfile) or _die("could not open output file '$Outfile':$!");
$out = \*OUT;
}
else {
$out = \*STDOUT;
}
# create template object
my %config = (TYPE => 'FILE',
SOURCE => $TmplFile );
if ( $DelOpen && $DelClose ) {
$config{ DELIMITER } = \($DelOpen, $DelClose);
}
my $tmplobj = Text::Template->new( %config );
# merge text file with template
my $success = $tmplobj->fill_in (OUTPUT => $out,
PACKAGE => 'Infile' );
unless ( $success ) {
_warn("Compilation of template '$TmplFile' failed\n$Text::Template::ERROR");
}
if ( $Outfile ) {
close( OUT ) or _die("could not close output file '$Outfile':$!");
}
}
#---_warn------------------------------#
sub _warn($) {
my $msg = shift;
return if $Quiet;
warn("Warning: $msg\n");
}
#---_die-------------------------------#
sub _die($) {
my $msg = shift;
die("Error: $msg\n");
}
__END__
###########################################################################
=pod
=head1 NAME
buildtext - merge a text file and a template
=head1 SYNOPSIS
buildtext [options] []
=head1 DESCRIPTION
=head2 Overview
The standalone perl script B merges a text as read from a
given file or from stdin with a template file and sends the output
either to stdout or to a specified file. Though the script is initialy
intended to build html output, it may be used to create any kind of
formated text as given by the template. The not that new but good idea
behind the game is to separate content and formatting.
The script uses the functionality of the famous Text::Template module
by Mark Jason Dominus. Additionaly it provides some kind of variable
expansion not only in the template file, but in the text input file
too.
=head2 Input File
Input files are plain text files that define some variables in the
easy way like Perl does itself. All you have to do ist to declare some
variables which you want use in your template file and assign their
values.
First of all you should assign large amounts of text to a scalar
variable called $CONTENT which is treated in a special manner by the
script. The value will be read paragraph by paragraph and some
conversions are applied to these paragraphs, like deleting inserted
comments. Like all other variable the $CONTENT is searched for unsafe
characters that are to convert to html entities. Beside of the scalar
$CONTENT variable you may declare all kinds of variables, even arrays
or hashes in the input file for later use in template processing.
Technicaly the input file is evaluated in a separate package. So it is
possible to read the names and values of the variables directly out of
the symbol table of that package. Doing this, code or IO symbols are
ignored. So if you use code to evaluate values--it is possible at
all--this code should always result in a scalar or list variable of
global scope. C-variables will have no effect.
=head2 Template
Template files contain all formating information of the document as
input files contain the content. B uses the template
mechanism of Text::Template, so read what the documentation of that
module says about how to create templates:
"The Text::Template module scans the template source. An open brace {
begins a program fragment, which continues until the matching close
brace }. When the template is filled in, the program fragments are
evaluated, and each one is replaced with the resulting value to yield
the text that is returned.
A backslash \ in front of a brace (or another backslash that is in
front of a brace) escapes its special meaning. The result of filling
out this template:
\{ The sum of 1 and 2 is {1+2} \}
is
{ The sum of 1 and 2 is 3 }
If you have an unmatched brace, Text::Template will return a failure
code and a warning about where the problem is. Backslashes that do
not precede a brace are passed through unchanged. If you have a
template like this:
{ "String that ends in a newline.\n" }
The backslash inside the string is passed through to Perl unchanged,
so the \n really does turn into a newline.
Each program fragment should be a sequence of Perl statements, which
are evaluated the usual way. The result of the last statement
executed will be evaluted in scalar context; the result of this
statement is a string, which is interpolated into the template in
place of the program fragment itself.
The fragments are evaluated in order, and side effects from earlier
fragments will persist into later fragments:
{$x = @things; ''}The Lord High Chamberlain has gotten {$x}
things for me this year.
{ $diff = $x - 17;
$more = 'more'
if ($diff == 0) {
$diff = 'no';
} elsif ($diff < 0) {
$more = 'fewer';
}
'';
}
That is {$diff} {$more} than he gave me last year.
The value of $x set in the first line will persist into the next
fragment that begins on the third line, and the values of $diff and
$more set in the second fragment will persist and be interpolated into
the last line. The output will look something like this:
The Lord High Chamberlain has gotten 42
things for me this year.
That is 25 more than he gave me last year.
"
Note: Backslash processing is not done when you specify alternative
delimiters with the B<--del-open> or B<--del-close> options.
=head1 VARIABLES
=head2 Syntax
The syntax of variable definition in input text files is Perl
syntax. That's all what to say about this topic.
You may use Perl code to produce values while the input file is
read. The script uses an C to do this. Compiler errors will
cause the script to stop execution. All code should result in a single
variable of global scope, otherwise it is quite senseless to
B.
Note: Do not use C declarations for variables that are defined for
later use in template processing. These variables may silently
disappear.
=head2 Predefined Variables And Reserved Words
The script defines some variables for your comfort and assigns
corresponding values. You may overwrite them in your input file (but
not the special variable $OUT).
=over
=item B<$CONTENT[0-9]>
This identifier should I be inserted in the template and the
variable should I be defined in the text file. If there is no
declaration of $CONTENT in the text file, a warning will be
issued. The best way of declaration is using a Here document. If you
double quote the Here document, you may use any formerly declared
variables in your text.
Example:
$TITLE = "A short example";
$CONTENT = ( <<"EOF" );
$TITLE
This is a very short example.
EOF
You may declare more than one $CONTENT variable in a single
document. All you have to do is to add a number to the $CONTENT
identifier like '$CONTENT1', '$CONTENT2' or even '$CONTENT123456789'
(Note: $CONTENT23B will not work).
For the $CONTENT class of variables some special treatment comes into
use. They are read not line by line but paragraph by paragraph. Two
paragraphs are separated by one or more empty lines. In the output
file a paragraph will be enclosed by an opening and a closing
tag. These tags default to html: ' and '
'. You may
change them using the B<--par-open> and B<--par-close> options.
Inside of paragraphs you have the possibility to insert comments that
will not appear in the output. A comment is a line that starts with a
'#' sign. Additionaly all newlines in a paragraph will be deleted, so
one paragraph appears as a single line in output. You may switch this
off by using the command line options B<--nopar-one-line> and
B<--nocheck-comments>.
The $CONTENT variables should be scalars, otherwise the special
treatment will not work as expected.
=item B<$YEAR>
The year when the script is running in four digit notation.
=item B<$DATE>
The recent date when the script is running in format "Wkd Mon DD
HH:MM:SS YYYY".
=item B<$SDATE>
The recent date in short format "DD.MM.YYYY".
=item B<$OUT>
The variable $OUT is reserved by Text::Template. A warning will be
issued if you use this identifier in your input text file. Read what
the documentation for Text::Template says about this variable:
"There is one special trick you can play in a template. Here is the
motivation for it: Suppose you are going to pass an array, @items,
into the template, and you want the template to generate a bulleted
list with a header, like this:
Here is a list of the things I have got for you since 1907:
* Ivory
* Apes
* Peacocks
* ...
One way to do it is with a template like this:
Here is a list of the things I have got for you since 1907:
{ my $blist = '';
foreach $i (@items) {
$blist .= qq{ * $i\n};
}
$blist;
}
Here we construct the list in a variable called $blist, which we
return at the end. This is a little cumbersome. There is a shortcut.
Inside of templates, there is a special variable called $OUT. Anything
you append to this variable will appear in the output of the template.
Also, if you use $OUT in a program fragment, the normal behavior, of
replacing the fragment with its return value, is disabled; instead the
fragment is replaced with the value of $OUT. This means that you can
write the template above like this:
Here is a list of the things I have got for you since 1907:
{ foreach $i (@items) {
$OUT .= " * $i\n";
}
}
$OUT is reinitialized to the empty string at the start of each program
fragment. It is private to Text::Template, so you can't use a
variable named $OUT in your template without invoking the special
behavior."
=back
=head1 OPTIONS
The following commandline options may be used to customize the
behavior of B. Short options for which the long versions
require an argument require that argument too.
=over
=item B<--[no]check-comments>
Inside of $CONTENT variables a check for #-comments will be performed
if this switch is set, what is the default. Comments are single lines
that begin with a '#' and will be deleted.
=item B<--debug>
Setting this switch will show you a dump of the internal symbol table
as it is produced by reading and processing the input text file. The
information is printed to STDERR and may be redirected.
=item B<--del-open>=I
Use this switch with care! It changes the opening delimiter of code
snippets inside of template files. Default is '{'.
=item B<--del-close>=I
Use with care! Change the closing delimiter of code snippets inside of
templates. Defaults to '}'.
=item B<--doc>
With this option you can let the script display itself via C - at least its documentation.
=item B<-h> | B<--help>
Print overview.
=item B<-o>, B<--out>=I
By default the script writes to STDOUT. You may change this behavior
by setting an output file with this switch. The full path should be
given.
=item B<--par-close>=I
Redefine the starting tag which encloses paragraphs in CONTENT
values. The default is '.
=item B<--[no]par-one-line>
With this option you may switch off the output of $CONTENT paragraphs
as a single line. If set to no, newlines are not deleted inside of
paragraphs.
=item B<--par-open>=I
Redefine the closing tag which encloses paragraphs in CONTENT
values. Defaults to '
'.
=item B<-q>, B<--quiet>
Suppress warning messages but not those of fatal errors or debugging.
=item B<-t>, B<--templet>=I
Specify an alternate template file by giving the full path. By default
the script searches for "./templet" in the current working directory.
=item B<--tex>
If working with TeX templates, it is not a good idea to enclose perl
code in curly braces. Setting the B<--tex> switch will cause the
script to scan the templates for perl code not inside of '{' and '}'
but inside of '*~~' and '~~*'. Of course you should take this into
account while you create your templates. This switch will also set
B<--unsafe-chars> to the empty string and turn off conversion to
html entities. A conversion to special TeX entities will I be
performed.
=item B<--unsafe-chars>=I
Specify a string of characters that should be converted to html
entities. You may set this string to the empty string to switch off
conversion. The default characters are 'äÄöÖüÜß"' (without the single
quotes) and have effect to I variables, not only the $CONTENT
class.
=item B<-v> | B<--version>
Print version information.
=back
=head1 EXAMPLE
The template:
$AUTHOR - $TITLE
$CONTENT
© $AUTHOR $YEAR
The text file 'input':
$AUTHOR = "John Donne";
$TITLE = "A Wondrous Work Of Powerful Progress";
$CONTENT = ( <<"EOC");
$SDATE
$TITLE
As you can see, dear reader, Mr. Dover has no umbrella in his hands
# a fool!
though a terrible rain makes the streets wet and nasty.
What the hell does he say to the white rabbit on his left shoulder?
"Bewäre the ides of march!"
EOC
The command line:
buildtext --par-open='' input
The output to STDOUT:
John Donne - A Wondrous Work Of Powerful Progress
11.03.1906
A Wondrous Work Of Powerful Progress
As you can see, dear reader, Mr. Dover has no umbrella in his hands though a terrible rain makes the streets wet and nasty.
What the hell does he say to the white rabbit on his left shoulder?
"Bewäre the ides of march!"
© John Donne 1906
=head1 DEPENDENCIES
Required are Perl as of version 5.6.1 and the modules Getopt::Long,
HTML::Entities, Text::Template.
=head1 BUGS
The script is not explicitly designed for crossplatform
compatibility. It is written and tested in daily use on a Linux 2.2.16
system.
=head1 LICENSE
Copyright (c) 2002 by Ingo Schramm
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
=head1 AUTHOR
Ingo Schramm
mailto:code@ister.org
http://www.ister.org
=head1 SEE ALSO
L
=cut