#!/usr/bin/perl ########################################################################### # #Copyright (c) 2002 by Ingo Schramm # #This program is free software; you can redistribute it and/or modify #it under the terms of the GNU General Public License as published by #the Free Software Foundation; either version 2 of the License, or #(at your option) any later version. # #This program is distributed in the hope that it will be useful, but #WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #General Public License for more details. # #You should have received a copy of the GNU General Public License #along with this program; if not, write to the Free Software #Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 #USA # ########################################################################### use v5.6.1; use strict; use warnings; use Getopt::Long; use Text::Template; use HTML::Entities; ################################################################################ # global variables our $Progname = "buildtext"; our $Invoked = $0; $Invoked =~ s:^(.*/)(.*)$:$2:g; our $Version = 1.00; our $ParOpen = '

'; # paragraph opening tag our $ParClose = '

'; # paragraph closing tag our $TmplFile = "./templet"; # template file our $Outfile = ""; # actual used output file our $ActFile = ""; # actually processed html file our $UnsafeChars = 'äÄöÖüÜß"'; # chars to convert to html entities our $DelOpen = ""; # alternative delimiters for our $DelClose = ""; # code fragments in templates # flags our $ParOneLine = 1; our $CheckComments = 1; our $TeX = 0; our $Quiet = 0; our $Debug = 0; ################################################################################ # prototypes sub action(); sub parse_commandline(); sub help(;$); sub version(); sub docu(); sub process_input($); sub merge(); sub debug($); sub _warn($); sub _die($); ################################################################################ # MAIN action; ################################################################################ # declarations #---help------------------------------# # sub docu() { exec("perldoc $0") or exit(1); } sub version() { help("v"); } sub help(;$) { my $flag = shift; print $Progname, " ", $Version, " -- called as $Invoked\n"; exit(0) if ( $flag eq "v" ); print <<"EOF"; $Invoked [options] Options: default values are given in braces --[no]check-comments : check for #-comments in paragraphs or not (yes) --debug : output some debugging info to stderr (no) --del-open= : opening delimiter; change with care ('{') --del-close= : closing delimiter; change with care ('}') --doc : read enhanced documentation -h, --help : this screen -o, --out= : output to this file (.html) --par-close= : paragraph closing tag ('$ParClose') --[no]par-one-line : delete newlines inside of paragraphs or not (yes) --par-open= : paragraph opening tag ('$ParOpen') -q, --quiet : suppress warning messages -t, --templet= : specify template file ('$TmplFile') --tex : work with TeX files; this option changes the values of the delimiters for code fragments in templates to '*~~' and '~~*' (no) --unsafe-chars= : define characters that shall be converted to html entities ( '$UnsafeChars' ) -v, --version : version information EOF exit 0; } #---parse_command_line----------------# # # parse command line # sub parse_command_line() { if ( @ARGV ) { Getopt::Long::config("no_ignorecase", "bundling"); GetOptions( "help|h" , \&help, "version|v" , \&version, "o|out=s" , \$Outfile, "par-open=s" , \$ParOpen, "par-close=s" , \$ParClose, "t|templet=s" , \$TmplFile, "q|quiet" , \$Quiet, "debug" , \$Debug, "unsafe-chars=s" , \$UnsafeChars, "par-one-line!" , \$ParOneLine, "check-comments!" , \$CheckComments, "tex" , \$TeX, "del-open=s" , \$DelOpen, "del-close=s" , \$DelClose, "doc" , \&docu ); } if ( $TeX ) { $DelOpen = '*~~'; $DelClose = '~~*'; $UnsafeChars = ""; } } #---action-----------------------------# sub action() { parse_command_line; _die("templet $TmplFile not found\n") unless ( -e $TmplFile ); _die("can't read templet $TmplFile, check permissions\n") unless ( -r $TmplFile ); if ( @ARGV ) { # process input files foreach ( @ARGV ) { my $file = $_; process_input( $file ); } } else { # process STDIN process_input( "" ); } # merge input and template merge; } #---debug-------------------------------# sub debug($) { my $state = shift; print STDERR "\nDump symbol table of '$ActFile' $state:\n(long values are truncated)\n"; foreach my $symname ( sort keys %Infile:: ) { no strict; if ( defined ${$Infile::{$symname}} ) { print STDERR ( sprintf ( "\$%-16s = '%.48s'\n", $symname, ${$Infile::{$symname}} ) ); } if ( @{$Infile::{$symname}} ) { print STDERR ( sprintf ( "\@%-16s = ", $symname ) ); my $count = 0; foreach ( @{$Infile::{$symname}} ) { $count++; print STDERR ( sprintf ( "'%-.16s' ", $_ ) ); last if ( $count == 5 ); } print STDERR "\n"; } if ( %{$Infile::{$symname}} ) { print STDERR ( sprintf ( "%%%-16s =\n", $symname ) ); my $count = 0; foreach $key ( sort keys %{$Infile::{$symname}} ) { $count++; # the art of dereferencing-----------------------------------+ # | print STDERR ( sprintf ( "%-12s => %.48s\n", $key, ${$Infile::{$symname}}{$key} ) ); last if ( $count == 5 ); } } } } #---process_input----------------------# # sub process_input($) { my $file = shift; my $in = undef; $ActFile = $file ? $file : "STDIN"; if ( $file ) { open ( IN, "<", $file ) or _die("could not open $file for read: $!"); $in = *IN; } else { $in = *STDIN; } # eval the input file in a separate package # for later processing of its variables via # direct access to the symbol table of that package { package Infile; no strict; no warnings; # predefined variables my @lt = localtime; $YEAR = $lt[5] + 1900; my $mon = sprintf("%02d", ( $lt[4] + 1 ) ); my $day = sprintf("%02d", $lt[3] ); $SDATE = "$day.$mon.$YEAR"; $DATE = scalar ( localtime ); # read whole input in one single step local undef $/; while ( <$in> ) { eval; if ( $@ ) { main::_die("Exit due to errors in '$ActFile':\n$@"); } } } # end of package Infile if ( $file ) { close( IN ) or _die("could not close $ActFile: $!\n"); } # check for $CONTENT[0-9] and $OUT vars # # you have to check in a loop not only because you # don't know about the names of the $CONTENT[0-9] vars # but also because you would get the identifier created by # autovivication if you try something like 'if $Infile::OUT' my $varcontent = 0; my $varout = 0; foreach ( keys %Infile:: ) { $varcontent++ if ( /^((CONTENT)([0-9]*))$/ ); $varout++ if ( /^(OUT)$/ ); } _warn('no declaration of $CONTENT in '."'$ActFile'") unless ( $varcontent ); _warn('useless use of $OUT in '."'$ActFile'") if ( $varout ); debug("before encoding") if ( $Debug ); } #---merge------------------------------# sub merge() { # encode html entities if ( $UnsafeChars ) { foreach my $symname ( sort keys %Infile:: ) { # scalar slot if ( defined ${$Infile::{$symname}} ) { ${$Infile::{$symname}} = encode_entities( ${$Infile::{$symname}}, $UnsafeChars ) } # array slot if ( @{$Infile::{$symname}} ) { foreach ( @{$Infile::{$symname}} ) { $_ = encode_entities( $_, $UnsafeChars ); } } # hash slot if ( %{$Infile::{$symname}} ) { foreach my $key ( sort keys %{$Infile::{$symname}} ) { ${$Infile::{$symname}}{$key} = encode_entities( ${$Infile::{$symname}}{$key}, $UnsafeChars ); } } } # end foreach $symname debug("after encoding") if $Debug; } # end if $UnsafeChars # preprocess CONTENT variables foreach my $symname ( keys %Infile:: ) { if ( $symname =~ /^((CONTENT)([0-9]*))$/ ) { my @Buffer = (); @Buffer = split( /^\s*$/gms, ${$Infile::{$symname}} ); foreach ( @Buffer ) { chomp; s/^\s*//; s/\s*$//; # delete comments s/^\s*#.*$//mg if $CheckComments; # \n -> [spc] if ( $ParOneLine ) { use bytes; # try DOS, UNIX, Mac s/(\015\012)+|(\012)+|(\015)+/\040/sg; } $_ = $ParOpen.$_.$ParClose; } # end foreach @Buffer ${$Infile::{$symname}} = join( "\n", @Buffer ); } # end if } # end foreach $symname debug("after preprocessing") if $Debug; # create output filehandle my $out = undef; if ( $Outfile ) { open( OUT, ">", $Outfile) or _die("could not open output file '$Outfile':$!"); $out = \*OUT; } else { $out = \*STDOUT; } # create template object my %config = (TYPE => 'FILE', SOURCE => $TmplFile ); if ( $DelOpen && $DelClose ) { $config{ DELIMITER } = \($DelOpen, $DelClose); } my $tmplobj = Text::Template->new( %config ); # merge text file with template my $success = $tmplobj->fill_in (OUTPUT => $out, PACKAGE => 'Infile' ); unless ( $success ) { _warn("Compilation of template '$TmplFile' failed\n$Text::Template::ERROR"); } if ( $Outfile ) { close( OUT ) or _die("could not close output file '$Outfile':$!"); } } #---_warn------------------------------# sub _warn($) { my $msg = shift; return if $Quiet; warn("Warning: $msg\n"); } #---_die-------------------------------# sub _die($) { my $msg = shift; die("Error: $msg\n"); } __END__ ########################################################################### =pod =head1 NAME buildtext - merge a text file and a template =head1 SYNOPSIS buildtext [options] [] =head1 DESCRIPTION =head2 Overview The standalone perl script B merges a text as read from a given file or from stdin with a template file and sends the output either to stdout or to a specified file. Though the script is initialy intended to build html output, it may be used to create any kind of formated text as given by the template. The not that new but good idea behind the game is to separate content and formatting. The script uses the functionality of the famous Text::Template module by Mark Jason Dominus. Additionaly it provides some kind of variable expansion not only in the template file, but in the text input file too. =head2 Input File Input files are plain text files that define some variables in the easy way like Perl does itself. All you have to do ist to declare some variables which you want use in your template file and assign their values. First of all you should assign large amounts of text to a scalar variable called $CONTENT which is treated in a special manner by the script. The value will be read paragraph by paragraph and some conversions are applied to these paragraphs, like deleting inserted comments. Like all other variable the $CONTENT is searched for unsafe characters that are to convert to html entities. Beside of the scalar $CONTENT variable you may declare all kinds of variables, even arrays or hashes in the input file for later use in template processing. Technicaly the input file is evaluated in a separate package. So it is possible to read the names and values of the variables directly out of the symbol table of that package. Doing this, code or IO symbols are ignored. So if you use code to evaluate values--it is possible at all--this code should always result in a scalar or list variable of global scope. C-variables will have no effect. =head2 Template Template files contain all formating information of the document as input files contain the content. B uses the template mechanism of Text::Template, so read what the documentation of that module says about how to create templates: "The Text::Template module scans the template source. An open brace { begins a program fragment, which continues until the matching close brace }. When the template is filled in, the program fragments are evaluated, and each one is replaced with the resulting value to yield the text that is returned. A backslash \ in front of a brace (or another backslash that is in front of a brace) escapes its special meaning. The result of filling out this template: \{ The sum of 1 and 2 is {1+2} \} is { The sum of 1 and 2 is 3 } If you have an unmatched brace, Text::Template will return a failure code and a warning about where the problem is. Backslashes that do not precede a brace are passed through unchanged. If you have a template like this: { "String that ends in a newline.\n" } The backslash inside the string is passed through to Perl unchanged, so the \n really does turn into a newline. Each program fragment should be a sequence of Perl statements, which are evaluated the usual way. The result of the last statement executed will be evaluted in scalar context; the result of this statement is a string, which is interpolated into the template in place of the program fragment itself. The fragments are evaluated in order, and side effects from earlier fragments will persist into later fragments: {$x = @things; ''}The Lord High Chamberlain has gotten {$x} things for me this year. { $diff = $x - 17; $more = 'more' if ($diff == 0) { $diff = 'no'; } elsif ($diff < 0) { $more = 'fewer'; } ''; } That is {$diff} {$more} than he gave me last year. The value of $x set in the first line will persist into the next fragment that begins on the third line, and the values of $diff and $more set in the second fragment will persist and be interpolated into the last line. The output will look something like this: The Lord High Chamberlain has gotten 42 things for me this year. That is 25 more than he gave me last year. " Note: Backslash processing is not done when you specify alternative delimiters with the B<--del-open> or B<--del-close> options. =head1 VARIABLES =head2 Syntax The syntax of variable definition in input text files is Perl syntax. That's all what to say about this topic. You may use Perl code to produce values while the input file is read. The script uses an C to do this. Compiler errors will cause the script to stop execution. All code should result in a single variable of global scope, otherwise it is quite senseless to B. Note: Do not use C declarations for variables that are defined for later use in template processing. These variables may silently disappear. =head2 Predefined Variables And Reserved Words The script defines some variables for your comfort and assigns corresponding values. You may overwrite them in your input file (but not the special variable $OUT). =over =item B<$CONTENT[0-9]> This identifier should I be inserted in the template and the variable should I be defined in the text file. If there is no declaration of $CONTENT in the text file, a warning will be issued. The best way of declaration is using a Here document. If you double quote the Here document, you may use any formerly declared variables in your text. Example: $TITLE = "A short example"; $CONTENT = ( <<"EOF" ); $TITLE This is a very short example. EOF You may declare more than one $CONTENT variable in a single document. All you have to do is to add a number to the $CONTENT identifier like '$CONTENT1', '$CONTENT2' or even '$CONTENT123456789' (Note: $CONTENT23B will not work). For the $CONTENT class of variables some special treatment comes into use. They are read not line by line but paragraph by paragraph. Two paragraphs are separated by one or more empty lines. In the output file a paragraph will be enclosed by an opening and a closing tag. These tags default to html: '

and '

'. You may change them using the B<--par-open> and B<--par-close> options. Inside of paragraphs you have the possibility to insert comments that will not appear in the output. A comment is a line that starts with a '#' sign. Additionaly all newlines in a paragraph will be deleted, so one paragraph appears as a single line in output. You may switch this off by using the command line options B<--nopar-one-line> and B<--nocheck-comments>. The $CONTENT variables should be scalars, otherwise the special treatment will not work as expected. =item B<$YEAR> The year when the script is running in four digit notation. =item B<$DATE> The recent date when the script is running in format "Wkd Mon DD HH:MM:SS YYYY". =item B<$SDATE> The recent date in short format "DD.MM.YYYY". =item B<$OUT> The variable $OUT is reserved by Text::Template. A warning will be issued if you use this identifier in your input text file. Read what the documentation for Text::Template says about this variable: "There is one special trick you can play in a template. Here is the motivation for it: Suppose you are going to pass an array, @items, into the template, and you want the template to generate a bulleted list with a header, like this: Here is a list of the things I have got for you since 1907: * Ivory * Apes * Peacocks * ... One way to do it is with a template like this: Here is a list of the things I have got for you since 1907: { my $blist = ''; foreach $i (@items) { $blist .= qq{ * $i\n}; } $blist; } Here we construct the list in a variable called $blist, which we return at the end. This is a little cumbersome. There is a shortcut. Inside of templates, there is a special variable called $OUT. Anything you append to this variable will appear in the output of the template. Also, if you use $OUT in a program fragment, the normal behavior, of replacing the fragment with its return value, is disabled; instead the fragment is replaced with the value of $OUT. This means that you can write the template above like this: Here is a list of the things I have got for you since 1907: { foreach $i (@items) { $OUT .= " * $i\n"; } } $OUT is reinitialized to the empty string at the start of each program fragment. It is private to Text::Template, so you can't use a variable named $OUT in your template without invoking the special behavior." =back =head1 OPTIONS The following commandline options may be used to customize the behavior of B. Short options for which the long versions require an argument require that argument too. =over =item B<--[no]check-comments> Inside of $CONTENT variables a check for #-comments will be performed if this switch is set, what is the default. Comments are single lines that begin with a '#' and will be deleted. =item B<--debug> Setting this switch will show you a dump of the internal symbol table as it is produced by reading and processing the input text file. The information is printed to STDERR and may be redirected. =item B<--del-open>=I Use this switch with care! It changes the opening delimiter of code snippets inside of template files. Default is '{'. =item B<--del-close>=I Use with care! Change the closing delimiter of code snippets inside of templates. Defaults to '}'. =item B<--doc> With this option you can let the script display itself via C - at least its documentation. =item B<-h> | B<--help> Print overview. =item B<-o>, B<--out>=I By default the script writes to STDOUT. You may change this behavior by setting an output file with this switch. The full path should be given. =item B<--par-close>=I Redefine the starting tag which encloses paragraphs in CONTENT values. The default is '

. =item B<--[no]par-one-line> With this option you may switch off the output of $CONTENT paragraphs as a single line. If set to no, newlines are not deleted inside of paragraphs. =item B<--par-open>=I Redefine the closing tag which encloses paragraphs in CONTENT values. Defaults to '

'. =item B<-q>, B<--quiet> Suppress warning messages but not those of fatal errors or debugging. =item B<-t>, B<--templet>=I Specify an alternate template file by giving the full path. By default the script searches for "./templet" in the current working directory. =item B<--tex> If working with TeX templates, it is not a good idea to enclose perl code in curly braces. Setting the B<--tex> switch will cause the script to scan the templates for perl code not inside of '{' and '}' but inside of '*~~' and '~~*'. Of course you should take this into account while you create your templates. This switch will also set B<--unsafe-chars> to the empty string and turn off conversion to html entities. A conversion to special TeX entities will I be performed. =item B<--unsafe-chars>=I Specify a string of characters that should be converted to html entities. You may set this string to the empty string to switch off conversion. The default characters are 'äÄöÖüÜß"' (without the single quotes) and have effect to I variables, not only the $CONTENT class. =item B<-v> | B<--version> Print version information. =back =head1 EXAMPLE The template: $AUTHOR - $TITLE $CONTENT

© $AUTHOR $YEAR

The text file 'input': $AUTHOR = "John Donne"; $TITLE = "A Wondrous Work Of Powerful Progress"; $CONTENT = ( <<"EOC"); $SDATE $TITLE As you can see, dear reader, Mr. Dover has no umbrella in his hands # a fool! though a terrible rain makes the streets wet and nasty. What the hell does he say to the white rabbit on his left shoulder? "Bewäre the ides of march!" EOC The command line: buildtext --par-open='

' input The output to STDOUT: John Donne - A Wondrous Work Of Powerful Progress

11.03.1906

A Wondrous Work Of Powerful Progress

As you can see, dear reader, Mr. Dover has no umbrella in his hands though a terrible rain makes the streets wet and nasty.

What the hell does he say to the white rabbit on his left shoulder?

"Bewäre the ides of march!"

© John Donne 1906

=head1 DEPENDENCIES Required are Perl as of version 5.6.1 and the modules Getopt::Long, HTML::Entities, Text::Template. =head1 BUGS The script is not explicitly designed for crossplatform compatibility. It is written and tested in daily use on a Linux 2.2.16 system. =head1 LICENSE Copyright (c) 2002 by Ingo Schramm This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =head1 AUTHOR Ingo Schramm mailto:code@ister.org http://www.ister.org =head1 SEE ALSO L =cut