package PDF::FromHTML::Template;
use strict;
use warnings;
use base 'PDF::FromHTML::Template::Base';
our $VERSION = '0.30';
use PDF::Writer;
use File::Basename qw( fileparse );
use XML::Parser ();
# PDF_set_info - find out more about this
# Providers - I need to create some provider classes that abstract
# the process of PDF creation. This will enable P::T to work with
# different PDF providers. A provider could be passed in to the
# constructor. If non is passed, P::T should try to instantiate a
# sensible provider depending on what is installed.
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
$self->{TEMPLATES} = [] unless UNIVERSAL::isa($self->{TEMPLATES}, 'ARRAY');
$self->{PARAM_MAP} = {} unless UNIVERSAL::isa($self->{PARAM_MAP}, 'HASH');
$self->{PDF_VERSION} = 0;
if ( !defined $self->{FILE} && defined $self->{FILENAME} ) {
$self->{FILE} = $self->{FILENAME};
$self->parse_xml($self->{FILE}) if defined $self->{FILE};
return $self;
sub param {
my $self = shift;
# Allow an arbitrary number of hashrefs, so long as they're the first things
# into param(). Put each one onto the end, de-referenced.
push @_, %{shift @_} while UNIVERSAL::isa($_[0], 'HASH');
(@_ % 2)
&& die __PACKAGE__, "->param() : Odd number of parameters to param()\n";
my %params = @_;
$params{uc $_} = delete $params{$_} for keys %params;
@{$self->{PARAM_MAP}}{keys %params} = @params{keys %params};
return 1;
sub write_file {
my $self = shift;
my ($fname) = @_;
my $p = PDF::Writer->new;
$p->open($fname) or die "Could not open file '$fname'.", $/;
return 1;
sub get_buffer {
my $self = shift;
my $p = PDF::Writer->new;
$p->open() or die "Could not open buffer.", $/;
return $p->stringify();
*output = \&get_buffer;
sub parse {
my $self = shift;
my ($file) = @_;
my %Has_TextObject = map { $_ => undef } qw(
my @stack;
my @params = (
Handlers => {
Start => sub {
my $name = uc shift;
# Pass the PDF encoding in.
if ($name eq 'PDFTEMPLATE') {
if (exists $self->{PDF_ENCODING}) {
push @_, (
my $node = PDF::FromHTML::Template::Factory->create_node($name, @_);
die "'$name' (@_) didn't make a node!\n" unless defined $node;
if ($name eq 'VAR') {
return unless @stack;
if (exists $stack[-1]{TXTOBJ} && $stack[-1]{TXTOBJ}->isa('TEXTOBJECT')) {
push @{$stack[-1]{TXTOBJ}{STACK}}, $node;
elsif ($name eq 'PDFTEMPLATE') {
push @{$self->{TEMPLATES}}, $node;
else {
push @{$stack[-1]{ELEMENTS}}, $node
if @stack;
push @stack, $node;
Char => sub {
return unless @stack;
my $parent = $stack[-1];
if (exists $parent->{TXTOBJ} && $parent->{TXTOBJ}->isa('TEXTOBJECT')) {
push @{$parent->{TXTOBJ}{STACK}}, @_;
End => sub {
return unless @stack;
pop @stack if $stack[-1]->isa(uc $_[0]);
if ( exists $self->{PDF_ENCODING} ) {
push @params, ProtocolEncoding => $self->{PDF_ENCODING};
if ( ref $file ) {
*INFILE = $file;
else {
my ($filename, $dirname) = fileparse($file);
push @params, Base => $dirname;
open( INFILE, '<', $file )
|| die "Cannot open '$file' for reading: $!\n";
my $parser = XML::Parser->new( @params );
$parser->parse(do { local $/ = undef; });
close INFILE
unless ref $file;
return 1;
*parse_xml = \&parse;
my %NoSetProperty = map { $_ => 1 } qw(
CreationDate Producer ModDate Trapped
sub _prepare_output {
my $self = shift;
my ($p) = @_;
$p->parameter('openaction' => $self->{OPENACTION});
$p->parameter('openmode' => $self->{OPENMODE});
if (UNIVERSAL::isa($self->{INFO}, 'HASH')) {
foreach my $key ( keys %{$self->{INFO}} ) {
if ($NoSetProperty{$key}) {
warn "Document property '$key' cannot be set.", $/;
$p->info($key, $self->{INFO}{$key});
else {
$p->info($_, __PACKAGE__) for qw/Creator Author/;
# __PAGE__ is incremented after the page is done.
$self->{PARAM_MAP}{__PAGE__} = 1;
# __PAGEDEF__ is incremented when the pagedef begins.
$self->{PARAM_MAP}{__PAGEDEF__} = 0;
my $context = PDF::FromHTML::Template::Factory->create(
# Un-scoped variables
X => 0,
Y => 0,
# Other variables
PDF => $p,
PARAM_MAP => [ $self->{PARAM_MAP} ],
# Do a first pass through, noting important values
# $_->preprocess($context) for @{$self->{TEMPLATES}};
# Do a second pass through, for actual rendering
$_->render($context) for @{$self->{TEMPLATES}};
return 1;
sub register { shift; PDF::FromHTML::Template::Factory::register(@_) }
=head1 NAME
PDF::FromHTML::Template - PDF::FromHTML::Template
use PDF::FromHTML::Template;
my $pdf = PDF::FromHTML::Template->new({
file => 'some_template.xml',
print "Content/type: application/pdf\n\n", $pdf->get_buffer;
B: This is a fork of L 0.30, originally released by Rob Kinyon,
but (as of September 11, 2006) currently not available on CPAN. Use of this module
outside L is not advised.
PDF::FromHTML::Template is a PDF layout system that uses the same data structures as
L. Unlike L, this is a full layout system. This means
you will have to describe where each item will be on the page. (This is in
contrast to L, which adds on to Lut is determined by
the HTML, not L.)
PDF::FromHTML::Template uses an XML document as the template. However, the XML is not
completely compliant. The only difference (that I'm aware of) is that any node
can have any parameter. (This prevents the creation of a DTD.) The reason for
this is to allow scoping by parents for parameters used by children. (More on
this later.)
Each node in the document corresponds to an object, with each parameter
mapping (mostly) 1 to 1 to an object attribute. Parent-child relationships are
strictly preserved. Each parent provides a scope (similar to variable scope) to
its children. (This is why any node can have any parameter.) If a child needs
the value of a parameter and it doesn't have that value as an attribute, it will
ask its parent for the value. If the parent doesn't have it, it will ask its
parent, and so on.
=head1 METHODS
=over 4
=item * C
This will create a new instance of PDF::FromHTML::Template. $opts is an optional hashref
that can contain the following parameters:
=over 4
=item * file
This is either the name of the file or the filehandle of the open file. If it
is present, C will be called upon that filename/filehandle. Otherwise,
after new() is called, you will have to call C yourself.
filename is a synonym for file.
=item * openaction
This is the action that the PDF reader will take when it opens this file. The
valid values are:
=over 4
=item * fitbox
=item * fitheight
=item * fitpage (default)
=item * fitwidth
=item * retain
=item * openmode
This is the mode that the PDF reader will use when it opens this file. The
valid values are:
=over 4
=item * bookmarks
=item * fullscreen
=item * none (default)
=item * thumbnails
=item * info
This is a hashref of information that you wish to have the PDF retain as
metadata. If this is not present, both Author and Creator will be set to
The following keys are not supported:
=over 4
=item * CreationDate
=item * Producer
=item * ModDate
=item * Trapped
=item * pdf_encoding
This is the encoding that the template is in. It defaults to the host
encoding. This is different from the encoding parameter for the pdftemplate
=item * C
This will parse the XML template into the appropriate datastructure(s) needed
for PDF::FromHTML::Template to function.
=item * C
This is a deprecated synonym for C.
=item * C value, [ key => value, ... ] )>
This will set the parameters that PDF::FromHTML::Template will use to merge the template
with. This method is identical to the HTML::Template or Template Toolkit
method of the same name.
=item * C
This will write the rendered PDF to the file specified in $filename.
=item * C
This will return the rendered PDF stringified in a form appropriate for
returning over an HTTP connection.
=item * C