package Template::Parser::Greedy; require 5.006; # for warnings use strict; use warnings; use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS); use vars qw($CHOMP_MODIFIER $CHOMP_REPLACE $LEADING_WHITESPACE_REGEX $TRAILING_WHITESPACE_REGEX); use base qw(Template::Parser Exporter); use constant CHOMP_GREEDY => 3; use constant CHOMP_COALESCE => 4; use Template::Constants qw(:chomp); $VERSION = '1.00'; @EXPORT_OK = qw(CHOMP_GREEDY CHOMP_COALESCE); %EXPORT_TAGS = (all => [ @EXPORT_OK ]); $CHOMP_MODIFIER = { '+' => CHOMP_NONE, '-' => CHOMP_ALL, '~' => CHOMP_GREEDY, '=' => CHOMP_COLLAPSE, '^' => CHOMP_COALESCE }; $CHOMP_REPLACE = { CHOMP_NONE, '', CHOMP_ALL, '', CHOMP_COLLAPSE, ' ', CHOMP_GREEDY, '', CHOMP_COALESCE, ' ' }; $LEADING_WHITESPACE_REGEX = { CHOMP_NONE, '', CHOMP_ALL, qr{((?:\n?[ \t]+)|\n)$}, CHOMP_GREEDY, qr{(\s+)$}, CHOMP_COLLAPSE, qr{((?:\n?[ \t]+)|\n)$}, CHOMP_COALESCE, qr{(\s+)$} }; $TRAILING_WHITESPACE_REGEX = { CHOMP_NONE, '', CHOMP_ALL, qr{^((?:[ \t]+\n?)|\n)}, CHOMP_GREEDY, qr{^(\s+)}, CHOMP_COLLAPSE, qr{^((?:[ \t]+\n?)|\n)}, CHOMP_COALESCE, qr{^(\s+)} }; sub new { my $class = shift; my $config = ($_[0] && UNIVERSAL::isa($_[0], 'HASH')) ? shift : { @_ }; for my $chomp (qw(PRE_CHOMP POST_CHOMP)) { my $arg = $config->{$chomp}; if (defined $arg) { if (exists $CHOMP_MODIFIER->{$arg}) { $config->{$chomp} = $CHOMP_MODIFIER->{$arg}; } else { # make sure it's a valid option # keyed on the valid CHOMP constants die "invalid $chomp option: '$arg'" unless (exists $CHOMP_REPLACE->{$arg}); } } else { $config->{$chomp} = CHOMP_GREEDY; } } return $class->SUPER::new($config); } #------------------------------------------------------------------------ # split_text($text) # # Split input template text into directives and raw text chunks. #------------------------------------------------------------------------ sub split_text { my ($self, $text) = @_; my ($pre, $dir, $prelines, $dirlines, $postlines, $tags, @tags); my $style = $self->{STYLE}->[-1]; my ($start, $end, $prechomp, $postchomp, $interp) = @$style{qw(START_TAG END_TAG PRE_CHOMP POST_CHOMP INTERPOLATE)}; my @tokens = (); my $line = 1; return \@tokens unless ((defined $text) && (length $text)); # extract all directives from the text my $directive_regex = qr{ ^(.*?) # $1 - start of line up to directive (?: $start # start of tag (.*?) # $2 - tag contents $end # end of tag ) }sx; while ($text =~ s/$directive_regex//s) { ($pre, $dir) = map { defined $_ ? $_ : '' } ($1, $2); $postlines = 0; # denotes lines chomped $prelines = ($pre =~ tr/\n//); # NULL - count only $dirlines = ($dir =~ tr/\n//); # ditto # the CHOMP modifiers may modify the preceding text $dir =~ s/^([-+~=^\#])?\s*//s; # remove leading whitespace and check for a '-' chomp flag if ($1 && ($1 eq '#')) { $dir = ($dir =~ /([-+~=^])$/) ? $1 : ''; # comment out entire directive except for any chomp flag } else { my $chomp = $1 ? $CHOMP_MODIFIER->{$1} : $prechomp; if ($chomp) { my $space = $CHOMP_REPLACE->{$chomp}; my $leading_whitespace_regex = $LEADING_WHITESPACE_REGEX->{$chomp}; # remove (or collapse) the selected whitespace before the directive $pre =~ s/$leading_whitespace_regex/$space/; } } $dir =~ s/\s*([-+~=^])?\s*$//s; # remove trailing whitespace and check for a '-' chomp flag my $chomp = $1 ? $CHOMP_MODIFIER->{$1} : $postchomp; if ($chomp) { my $space = $CHOMP_REPLACE->{$chomp}; my $trailing_whitespace_regex = $TRAILING_WHITESPACE_REGEX->{$chomp}; if ($text =~ /$trailing_whitespace_regex/) { my $trailing_whitespace = $1; # increment the line counter if necessary $postlines += ($trailing_whitespace =~ tr/\n/\n/); # now remove (or collapse) the selected whitespace after the directive $text =~ s/$trailing_whitespace_regex/$space/; } } # any text preceding the directive can now be added if (length $pre) { push (@tokens, $interp ? [ $pre, $line, 'ITEXT' ] : ('TEXT', $pre)); } # moved out of the preceding conditional: we might have outstanding newlines # to account for even if $pre is now zero length $line += $prelines; # and now the directive, along with line number information if (length $dir) { # the TAGS directive is a compile-time switch if ($dir =~ /^TAGS\s+(.*)/i) { my @tags = split(/\s+/, $1); if (scalar @tags > 1) { ($start, $end) = map { quotemeta($_) } @tags; } elsif ($tags = $self->SUPER::TAG_STYLE->{$tags[0]}) { ($start, $end) = @$tags; } else { warn "invalid TAGS style: $tags[0]\n"; } } else { # DIRECTIVE is pushed as: [ $dirtext, $line_no(s), \@tokens ] push @tokens, [ $dir, ($dirlines ? sprintf("%d-%d", $line, $line + $dirlines) : $line), $self->tokenise_directive($dir) ]; } } # update line counter to include directive lines and any extra # newline chomped off the start of the following text $line += $dirlines + $postlines; } # anything remaining in the string is plain text push (@tokens, $interp ? [ $text, $line, 'ITEXT' ] : ('TEXT', $text)) if (length $text); return \@tokens; } 1; __END__ =head1 NAME Template::Parser::Greedy - reader/writer friendly chomping for TT2 templates =head1 SYNOPSIS use Template; use Template::Parser::Greedy; my $parser = Template::Parser::Greedy->new(); my $config = { PARSER => $parser, INCLUDE_PATH => ... }; my $template = Template->new($config); $template->process(...) || die $template->error(); =head1 DESCRIPTION It's easy to write readable templates in L, and it's easy to exercise fine-grained control over the output of Template Toolkit templates. Achieving both at the same time, however, can be tricky given the default parser's whitespace chomping rules, which consume no more than one newline character on either side of a directive. This means that template authors optimizing for readability (and writability) may be obliged to compromise the indentation and spacing of the output and I. This module allows templates to be laid out in a readable way, while at the same time enhancing control over the spacing of the generated output. It does this by providing two new options, C and C, and their corresponding directive modifiers, C<~> and C<^>. In addition, a new modifier, C<=>, for the old CHOMP_COLLAPSE option has been added. =head2 Options This module is a drop-in replacement for L, and is fully backwards compatible if the original set of chomp options or modifiers are used. The only difference is in the default values assigned for PRE_CHOMP and POST_CHOMP. Template::Parser defaults to not chomping, while Template::Parser::Greedy defaults to chomping all contiguous whitespace characters. This behaviour can be specified explicitly by passing a value of 3, or by importing the symbolic constant C e.g, use Template::Parser::Greedy qw(CHOMP_GREEDY); my $parser = Template::Parser::Greedy->new( PRE_CHOMP => CHOMP_GREEDY, POST_CHOMP => 0 ); In addition, Template::Parser::Greedy allows all old and new chomp options to be specified by means of the corresponding directive modifier. Thus: my $parser = Template::Parser::Greedy->new( PRE_CHOMP => '+', POST_CHOMP => '=' ); Corresponds to my $parser = Template::Parser::Greedy->new( PRE_CHOMP => CHOMP_NONE, POST_CHOMP => 2 ); And: my $parser = Template::Parser::Greedy->new( PRE_CHOMP => '~', POST_CHOMP => '^' ); is equivalent to: my $parser = Template::Parser::Greedy->new( PRE_CHOMP => CHOMP_GREEDY, POST_CHOMP => 4 ); =head2 Modifiers Greedy chomping can be relaxed or revoked on a per-directive basis in templates that are greedy by default. Likewise, greedy chomping can be selectively enabled in non-greedy templates by using the C<~> and C<^> modifiers. e.g. my $parser = Template::Parser::Greedy->new( PRE_CHOMP => 2, POST_CHOMP => 0 ); my $template = Template->new({ PARSER => $parser }); And, in the template: [BLOCK foo %] [%- IF 1 ~%] bar [%~ END +%] [% END %] In this example, the C<~> modifier consumes all of the whitespace around the embedded text, and is thus equivalent to: [%- IF 1 %]bar[% END +%] The C<+> modifier at the end of the C block turns on C for the suffixed whitespace, which is therefore not chomped; and the C<-> modifier at the beginning of the C performs a C chomp, which collapses the indentation and one newline to a single space, but leaves the whitespace before that intact. Template::Parser does not provide a directive modifier for CHOMP_COLLAPSE, but it can be enabled in Template::Parser::Greedy templates by using C<=>. e.g. [% IF 1 =%] ... [%= END %] The greedy version of this modifier is C<^>. This can also be set globally by supplying a PRE/POST_CHOMP value of 4, which is also available as the symbolic constant C. use Template::Constants (:chomp); use Template::Parser::Greedy qw(CHOMP_COALESCE); my $parser = Template::Parser::Greedy->new( PRE_CHOMP => CHOMP_COLLAPSE, POST_CHOMP => CHOMP_COALESCE ); If both CHOMP_GREEDY and CHOMP_COALESCE are needed, they can be imported by using the C<:all> tag: use Template::Parser::Greedy qw(:all); =head1 USAGE As with the default parser, any whitespace inside the preceding or following text is preserved, so boilerplate only needs to concern itself with its surrounding whitespace. This leaves indentation and newlines under the explicit control of the template author, by any of the mechanisms available in the Template technician's toolkit e.g. by using explicit newline and indentation directives: [% nl = "\n" %] [% BLOCK foo %] [%# params: bar, indent %] [% outer = tab(indent) %] [% inner = tab(add(indent, 1)) %] [% outer %] [% FOR baz IN bar %] [% nl %] [% inner %] [% END %] [% nl %] [% outer %] [% END %] Or by selectively turning off left and/or right chomping: [% IF 1 +%] ------------------------------------ | alpha | beta | gamma | vlissides | ------------------------------------ | foo | bar | baz | quux | ------------------------------------ [%+ END %] Unlike their non-greedy counterparts, the CHOMP_GREEDY and CHOMP_COALESCE options and directives happily digest carriage returns (along with [\n\f\t ]), so they are less likely to do the wrong thing on non-Unix platforms. =head1 SEE ALSO =over =item * L =item * L =item * L =item * http://www.mail-archive.com/templates@template-toolkit.org/msg07575.html =item * http://www.mail-archive.com/templates@template-toolkit.org/msg07659.html =back =head1 VERSION 1.00 =head1 AUTHOR chocolateboy Echocolate.boy@email.comE =head1 COPYRIGHT AND LICENSE Copyright (C) 2006 by chocolateboy This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut