for
details. If passed an empty string, no entities are encoded.
If $indent_char is specified and defined, the HTML to be output is
intented, using the string you specify (which you probably should
set to "\t", or some number of spaces, if you specify it).
If C<\%optional_end_tags> is specified and defined, it should be
a reference to a hash that holds a true value for every tag name
whose end tag is optional. Defaults to
C<\%HTML::Element::optionalEndTag>, which is an alias to
C<%HTML::Tagset::optionalEndTag>, which, at time of writing, contains
true values for C. A useful value to pass is an empty
hashref, C<{}>, which means that no end-tags are optional for this dump.
Otherwise, possibly consider copying C<%HTML::Tagset::optionalEndTag> to a
hash of your own, adding or deleting values as you like, and passing
a reference to that hash.
=cut
sub as_HTML {
my($self, $entities, $indent, $omissible_map) = @_;
#my $indent_on = defined($indent) && length($indent);
my @html = ();
$omissible_map ||= \%HTML::Element::optionalEndTag;
my $empty_element_map = $self->_empty_element_map;
my $last_tag_tightenable = 0;
my $this_tag_tightenable = 0;
my $nonindentable_ancestors = 0; # count of nonindentible tags over us.
my($tag, $node, $start, $depth); # per-iteration scratch
if(defined($indent) && length($indent)) {
$self->traverse(
sub {
($node, $start, $depth) = @_;
if(ref $node) { # it's an element
$tag = $node->{'_tag'};
if($start) { # on the way in
if(
($this_tag_tightenable = $HTML::Element::canTighten{$tag})
and !$nonindentable_ancestors
and $last_tag_tightenable
) {
push
@html,
"\n",
$indent x $depth,
$node->starttag($entities),
;
} else {
push(@html, $node->starttag($entities));
}
$last_tag_tightenable = $this_tag_tightenable;
++$nonindentable_ancestors
if $tag eq 'pre' or $HTML::Tagset::isCDATA_Parent{$tag}; ;
} elsif (not($empty_element_map->{$tag} or $omissible_map->{$tag})) {
# on the way out
if($tag eq 'pre' or $HTML::Tagset::isCDATA_Parent{$tag}) {
--$nonindentable_ancestors;
$last_tag_tightenable = $HTML::Element::canTighten{$tag};
push @html, $node->endtag;
} else { # general case
if(
($this_tag_tightenable = $HTML::Element::canTighten{$tag})
and !$nonindentable_ancestors
and $last_tag_tightenable
) {
push
@html,
"\n",
$indent x $depth,
$node->endtag,
;
} else {
push @html, $node->endtag;
}
$last_tag_tightenable = $this_tag_tightenable;
#print "$tag tightenable: $this_tag_tightenable\n";
}
}
} else { # it's a text segment
$last_tag_tightenable = 0; # I guess this is right
HTML::Entities::encode_entities($node, $entities)
# That does magic things if $entities is undef.
unless (
(defined($entities) && !length($entities))
# If there's no entity to encode, don't call it
|| $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} }
# To keep from amp-escaping children of script et al.
# That doesn't deal with descendants; but then, CDATA
# parents shouldn't /have/ descendants other than a
# text children (or comments?)
);
if($nonindentable_ancestors) {
push @html, $node; # say no go
} else {
if($last_tag_tightenable) {
$node =~ s<[\n\r\f\t ]+>< >s;
#$node =~ s< $><>s;
$node =~ s<^ ><>s;
push
@html,
"\n",
$indent x $depth,
$node,
#Text::Wrap::wrap($indent x $depth, $indent x $depth, "\n" . $node)
;
} else {
push
@html,
$node,
#Text::Wrap::wrap('', $indent x $depth, $node)
;
}
}
}
1; # keep traversing
}
); # End of parms to traverse()
} else { # no indenting -- much simpler code
$self->traverse(
sub {
($node, $start) = @_;
if(ref $node) {
$tag = $node->{'_tag'};
if($start) { # on the way in
push(@html, $node->starttag($entities));
} elsif (not($empty_element_map->{$tag} or $omissible_map->{$tag})) {
# on the way out
push(@html, $node->endtag);
}
} else {
# simple text content
HTML::Entities::encode_entities($node, $entities)
# That does magic things if $entities is undef.
unless (
(defined($entities) && !length($entities))
# If there's no entity to encode, don't call it
|| $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} }
# To keep from amp-escaping children of script et al.
# That doesn't deal with descendants; but then, CDATA
# parents shouldn't /have/ descendants other than a
# text children (or comments?)
);
push(@html, $node);
}
1; # keep traversing
}
); # End of parms to traverse()
}
if ( $self->{_store_declarations} && defined $self->{_decl} ) {
unshift @html, sprintf "\n", $self->{_decl}->{text} ;
}
return join('', @html, "\n");
}
=head2 $h->as_text()
=head2 $h->as_text(skip_dels => 1)
Returns a string consisting of only the text parts of the element's
descendants.
Text under 'script' or 'style' elements is never included in what's
returned. If C is true, then text content under "del"
nodes is not included in what's returned.
=head2 $h->as_trimmed_text(...)
This is just like as_text(...) except that leading and trailing
whitespace is deleted, and any internal whitespace is collapsed.
=cut
sub as_text {
# Yet another iteratively implemented traverser
my($this,%options) = @_;
my $skip_dels = $options{'skip_dels'} || 0;
my(@pile) = ($this);
my $tag;
my $text = '';
while(@pile) {
if(!defined($pile[0])) { # undef!
# no-op
} elsif(!ref($pile[0])) { # text bit! save it!
$text .= shift @pile;
} else { # it's a ref -- traverse under it
unshift @pile, @{$this->{'_content'} || $nillio}
unless
($tag = ($this = shift @pile)->{'_tag'}) eq 'style'
or $tag eq 'script'
or ($skip_dels and $tag eq 'del');
}
}
return $text;
}
sub as_trimmed_text {
my $text = shift->as_text(@_);
$text =~ s/[\n\r\f\t ]+$//s;
$text =~ s/^[\n\r\f\t ]+//s;
$text =~ s/[\n\r\f\t ]+/ /g;
return $text;
}
sub as_text_trimmed { shift->as_trimmed_text(@_) } # alias, because I forget
=head2 $h->as_XML()
Returns a string representing in XML the element and its descendants.
The XML is not indented.
=cut
# TODO: make it wrap, if not indent?
sub as_XML {
# based an as_HTML
my($self) = @_;
#my $indent_on = defined($indent) && length($indent);
my @xml = ();
my $empty_element_map = $self->_empty_element_map;
my($tag, $node, $start); # per-iteration scratch
$self->traverse(
sub {
($node, $start) = @_;
if(ref $node) { # it's an element
$tag = $node->{'_tag'};
if($start) { # on the way in
if($empty_element_map->{$tag}
and !@{$node->{'_content'} || $nillio}
) {
push(@xml, $node->starttag_XML(undef,1));
} else {
push(@xml, $node->starttag_XML(undef));
}
} else { # on the way out
unless($empty_element_map->{$tag}
and !@{$node->{'_content'} || $nillio}
) {
push(@xml, $node->endtag_XML());
} # otherwise it will have been an <... /> tag.
}
} else { # it's just text
_xml_escape($node);
push(@xml, $node);
}
1; # keep traversing
}
);
join('', @xml, "\n");
}
sub _xml_escape { # DESTRUCTIVE (a.k.a. "in-place")
# Five required escapes: http://www.w3.org/TR/2006/REC-xml11-20060816/#syntax
# We allow & if it's part of a valid escape already: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-references
foreach my $x (@_) {
$x =~ s/( # Escape...
< | # Less than, or
> | # Greater than, or
' | # Single quote, or
" | # Double quote, or
&(?! # An ampersand that isn't followed by...
(\#\d+; | # A hash mark, digits and semicolon, or
\#x[\da-f]+; | # A hash mark, "x", hex digits and semicolon, or
[A-Za-z0-9]+; )) # alphanums (not underscore, hence not \w) and a semicolon
)/''.ord($1).";"/sgex; # And replace them with their XML digit counterpart
}
return;
}
=head2 $h->as_Lisp_form()
Returns a string representing the element and its descendants as a
Lisp form. Unsafe characters are encoded as octal escapes.
The Lisp form is indented, and contains external ("href", etc.) as
well as internal attributes ("_tag", "_content", "_implicit", etc.),
except for "_parent", which is omitted.
Current example output for a given element:
("_tag" "img" "border" "0" "src" "pie.png" "usemap" "#main.map")
=cut
# NOTES:
#
# It's been suggested that attribute names be made :-keywords:
# (:_tag "img" :border 0 :src "pie.png" :usemap "#main.map")
# However, it seems that Scheme has no such data type as :-keywords.
# So, for the moment at least, I tend toward simplicity, uniformity,
# and universality, where everything a string or a list.
sub as_Lisp_form {
my @out;
my $sub;
my $depth = 0;
my(@list, $val);
$sub = sub { # Recursor
my $self = $_[0];
@list = ('_tag', $self->{'_tag'});
@list = () unless defined $list[-1]; # unlikely
for (sort keys %$self) { # predictable ordering
next if $_ eq '_content' or $_ eq '_tag' or $_ eq '_parent' or $_ eq '/';
# Leave the other private attributes, I guess.
push @list, $_, $val if defined($val = $self->{$_}); # and !ref $val;
}
for (@list) {
# octal-escape it
s<([^\x20\x21\x23\x27-\x5B\x5D-\x7E])>
eg;
$_ = qq{"$_"};
}
push @out, (' ' x $depth) . '(' . join ' ', splice @list;
if(@{$self->{'_content'} || $nillio}) {
$out[-1] .= " \"_content\" (\n";
++$depth;
foreach my $c (@{$self->{'_content'}}) {
if(ref($c)) {
# an element -- recurse
$sub->($c);
} else {
# a text segment -- stick it in and octal-escape it
push @out, $c;
$out[-1] =~
s<([^\x20\x21\x23\x27-\x5B\x5D-\x7E])>
eg;
# And quote and indent it.
$out[-1] .= "\"\n";
$out[-1] = (' ' x $depth) . '"' . $out[-1];
}
}
--$depth;
substr($out[-1],-1) = "))\n"; # end of _content and of the element
} else {
$out[-1] .= ")\n";
}
return;
};
$sub->($_[0]);
undef $sub;
return join '', @out;
}
sub format {
my($self, $formatter) = @_;
unless (defined $formatter) {
require HTML::FormatText;
$formatter = HTML::FormatText->new();
}
$formatter->format($self);
}
=head2 $h->starttag() or $h->starttag($entities)
Returns a string representing the complete start tag for the element.
I.e., leading "<", tag name, attributes, and trailing ">".
All values are surrounded with
double-quotes, and appropriate characters are encoded. If C<$entities>
is omitted or undef, I unsafe characters are encoded as HTML
entities. See L for details. If you specify some
value for C<$entities>, remember to include the double-quote character in
it. (Previous versions of this module would basically behave as if
C<'&"E'> were specified for C<$entities>.) If C<$entities> is
an empty string, no entity is escaped.
=cut
sub starttag {
my($self, $entities) = @_;
my $name = $self->{'_tag'};
return $self->{'text'} if $name eq '~literal';
return "{'text'} . ">" if $name eq '~declaration';
return "" . $self->{'text'} . ">" if $name eq '~pi';
if($name eq '~comment') {
if(ref($self->{'text'} || '') eq 'ARRAY') {
# Does this ever get used? And is this right?
return
"{'text'}}))
. ">"
;
} else {
return ""
}
}
my $tag = $html_uc ? "<\U$name" : "<\L$name";
my $val;
for (sort keys %$self) { # predictable ordering
next if !length $_ or m/^_/s or $_ eq '/';
$val = $self->{$_};
next if !defined $val; # or ref $val;
if ($_ eq $val && # if attribute is boolean, for this element
exists($HTML::Element::boolean_attr{$name}) &&
(ref($HTML::Element::boolean_attr{$name})
? $HTML::Element::boolean_attr{$name}{$_}
: $HTML::Element::boolean_attr{$name} eq $_)
) {
$tag .= $html_uc ? " \U$_" : " \L$_";
}
else { # non-boolean attribute
if (ref $val eq 'HTML::Element' and
$val->{_tag} eq '~literal') {
$val = $val->{text};
}
else {
HTML::Entities::encode_entities($val, $entities) unless (defined($entities) && !length($entities));
}
$val = qq{"$val"};
$tag .= $html_uc ? qq{ \U$_\E=$val} : qq{ \L$_\E=$val};
}
} # for keys
if ( scalar $self->content_list == 0 && $self->_empty_element_map->{ $self->tag } ) {
return $tag . " />";
}
else {
return $tag . ">";
}
}
sub starttag_XML {
my($self) = @_;
# and a third parameter to signal emptiness?
my $name = $self->{'_tag'};
return $self->{'text'} if $name eq '~literal';
return '{'text'}. '>' if $name eq '~declaration';
return "" . $self->{'text'} . "?>" if $name eq '~pi';
if($name eq '~comment') {
if(ref($self->{'text'} || '') eq 'ARRAY') {
# Does this ever get used? And is this right?
$name = join(' ', @{$self->{'text'}});
} else {
$name = $self->{'text'};
}
$name =~ s/--/--/g; # can't have double --'s in XML comments
return "";
}
my $tag = "<$name";
my $val;
for (sort keys %$self) { # predictable ordering
next if !length $_ or m/^_/s or $_ eq '/';
# Hm -- what to do if val is undef?
# I suppose that shouldn't ever happen.
next if !defined($val = $self->{$_}); # or ref $val;
_xml_escape($val);
$tag .= qq{ $_="$val"};
}
@_ == 3 ? "$tag />" : "$tag>";
}
=head2 $h->endtag()
Returns a string representing the complete end tag for this element.
I.e., "", tag name, and ">".
=cut
sub endtag {
$html_uc ? "\U$_[0]->{'_tag'}>" : "\L$_[0]->{'_tag'}>";
}
# TODO: document?
sub endtag_XML {
"$_[0]->{'_tag'}>";
}
#==========================================================================
# This, ladies and germs, is an iterative implementation of a
# recursive algorithm. DON'T TRY THIS AT HOME.
# Basically, the algorithm says:
#
# To traverse:
# 1: pre-order visit this node
# 2: traverse any children of this node
# 3: post-order visit this node, unless it's a text segment,
# or a prototypically empty node (like "br", etc.)
# Add to that the consideration of the callbacks' return values,
# so you can block visitation of the children, or siblings, or
# abort the whole excursion, etc.
#
# So, why all this hassle with making the code iterative?
# It makes for real speed, because it eliminates the whole
# hassle of Perl having to allocate scratch space for each
# instance of the recursive sub. Since the algorithm
# is basically simple (and not all recursive ones are!) and
# has few necessary lexicals (basically just the current node's
# content list, and the current position in it), it was relatively
# straightforward to store that information not as the frame
# of a sub, but as a stack, i.e., a simple Perl array (well, two
# of them, actually: one for content-listrefs, one for indexes of
# current position in each of those).
my $NIL = [];
sub traverse {
my($start, $callback, $ignore_text) = @_;
Carp::croak "traverse can be called only as an object method"
unless ref $start;
Carp::croak('must provide a callback for traverse()!')
unless defined $callback and ref $callback;
# Elementary type-checking:
my($c_pre, $c_post);
if(UNIVERSAL::isa($callback, 'CODE')) {
$c_pre = $c_post = $callback;
} elsif(UNIVERSAL::isa($callback,'ARRAY')) {
($c_pre, $c_post) = @$callback;
Carp::croak("pre-order callback \"$c_pre\" is true but not a coderef!")
if $c_pre and not UNIVERSAL::isa($c_pre, 'CODE');
Carp::croak("pre-order callback \"$c_post\" is true but not a coderef!")
if $c_post and not UNIVERSAL::isa($c_post, 'CODE');
return $start unless $c_pre or $c_post;
# otherwise there'd be nothing to actually do!
} else {
Carp::croak("$callback is not a known kind of reference")
unless ref($callback);
}
my $empty_element_map = $start->_empty_element_map;
my(@C) = [$start]; # a stack containing lists of children
my(@I) = (-1); # initial value must be -1 for each list
# a stack of indexes to current position in corresponding lists in @C
# In each of these, 0 is the active point
# scratch:
my(
$rv, # return value of callback
$this, # current node
$content_r, # child list of $this
);
# THE BIG LOOP
while(@C) {
# Move to next item in this frame
if(!defined($I[0]) or ++$I[0] >= @{$C[0]}) {
# We either went off the end of this list, or aborted the list
# So call the post-order callback:
if($c_post
and defined $I[0]
and @C > 1
# to keep the next line from autovivifying
and defined($this = $C[1][ $I[1] ]) # sanity, and
# suppress callbacks on exiting the fictional top frame
and ref($this) # sanity
and not(
$this->{'_empty_element'}
|| $empty_element_map->{$this->{'_tag'} || ''}
) # things that don't get post-order callbacks
) {
shift @I;
shift @C;
#print "Post! at depth", scalar(@I), "\n";
$rv = $c_post->(
#map $_, # copy to avoid any messiness
$this, # 0: this
0, # 1: startflag (0 for post-order call)
@I - 1, # 2: depth
);
if(defined($rv) and ref($rv) eq $travsignal_package) {
$rv = $$rv; #deref
if($rv eq 'ABORT') {
last; # end of this excursion!
} elsif($rv eq 'PRUNE') {
# NOOP on post!!
} elsif($rv eq 'PRUNE_SOFTLY') {
# NOOP on post!!
} elsif($rv eq 'OK') {
# noop
} elsif($rv eq 'PRUNE_UP') {
$I[0] = undef;
} else {
die "Unknown travsignal $rv\n";
# should never happen
}
}
}
else {
shift @I;
shift @C;
}
next;
}
$this = $C[0][ $I[0] ];
if($c_pre) {
if(defined $this and ref $this) { # element
$rv = $c_pre->(
#map $_, # copy to avoid any messiness
$this, # 0: this
1, # 1: startflag (1 for pre-order call)
@I - 1, # 2: depth
);
} else { # text segment
next if $ignore_text;
$rv = $c_pre->(
#map $_, # copy to avoid any messiness
$this, # 0: this
1, # 1: startflag (1 for pre-order call)
@I - 1, # 2: depth
$C[1][ $I[1] ], # 3: parent
# And there will always be a $C[1], since
# we can't start traversing at a text node
$I[0] # 4: index of self in parent's content list
);
}
if(not $rv) { # returned false. Same as PRUNE.
next; # prune
} elsif(ref($rv) eq $travsignal_package) {
$rv = $$rv; # deref
if($rv eq 'ABORT') {
last; # end of this excursion!
} elsif($rv eq 'PRUNE') {
next;
} elsif($rv eq 'PRUNE_SOFTLY') {
if(ref($this)
and
not($this->{'_empty_element'}
|| $empty_element_map->{$this->{'_tag'} || ''})
) {
# push a dummy empty content list just to trigger a post callback
unshift @I, -1;
unshift @C, $NIL;
}
next;
} elsif($rv eq 'OK') {
# noop
} elsif($rv eq 'PRUNE_UP') {
$I[0] = undef;
next;
# equivalent of last'ing out of the current child list.
# Used to have PRUNE_UP_SOFTLY and ABORT_SOFTLY here, but the code
# for these was seriously upsetting, served no particularly clear
# purpose, and could not, I think, be easily implemented with a
# recursive routine. All bad things!
} else {
die "Unknown travsignal $rv\n";
# should never happen
}
}
# else fall thru to meaning same as \'OK'.
}
# end of pre-order calling
# Now queue up content list for the current element...
if(ref $this
and
not( # ...except for those which...
not($content_r = $this->{'_content'} and @$content_r)
# ...have empty content lists...
and $this->{'_empty_element'} || $empty_element_map->{$this->{'_tag'} || ''}
# ...and that don't get post-order callbacks
)
) {
unshift @I, -1;
unshift @C, $content_r || $NIL;
#print $this->{'_tag'}, " ($this) adds content_r ", $C[0], "\n";
}
}
return $start;
}
=head1 SECONDARY STRUCTURAL METHODS
These methods all involve some structural aspect of the tree;
either they report some aspect of the tree's structure, or they involve
traversal down the tree, or walking up the tree.
=head2 $h->is_inside('tag', ...) or $h->is_inside($element, ...)
Returns true if the $h element is, or is contained anywhere inside an
element that is any of the ones listed, or whose tag name is any of
the tag names listed.
=cut
sub is_inside {
my $self = shift;
return undef unless @_; # if no items specified, I guess this is right.
my $current = $self;
# the loop starts by looking at the given element
while (defined $current and ref $current) {
for (@_) {
if(ref) { # element
return 1 if $_ eq $current;
} else { # tag name
return 1 if $_ eq $current->{'_tag'};
}
}
$current = $current->{'_parent'};
}
0;
}
=head2 $h->is_empty()
Returns true if $h has no content, i.e., has no elements or text
segments under it. In other words, this returns true if $h is a leaf
node, AKA a terminal node. Do not confuse this sense of "empty" with
another sense that it can have in SGML/HTML/XML terminology, which
means that the element in question is of the type (like HTML's "hr",
"br", "img", etc.) that I have any content.
That is, a particular "p" element may happen to have no content, so
$that_p_element->is_empty will be true -- even though the prototypical
"p" element isn't "empty" (not in the way that the prototypical "hr"
element is).
If you think this might make for potentially confusing code, consider
simply using the clearer exact equivalent: not($h->content_list)
=cut
sub is_empty {
my $self = shift;
!$self->{'_content'} || !@{$self->{'_content'}};
}
=head2 $h->pindex()
Return the index of the element in its parent's contents array, such
that $h would equal
$h->parent->content->[$h->pindex]
or
($h->parent->content_list)[$h->pindex]
assuming $h isn't root. If the element $h is root, then
$h->pindex returns undef.
=cut
sub pindex {
my $self = shift;
my $parent = $self->{'_parent'} || return undef;
my $pc = $parent->{'_content'} || return undef;
for(my $i = 0; $i < @$pc; ++$i) {
return $i if ref $pc->[$i] and $pc->[$i] eq $self;
}
return undef; # we shouldn't ever get here
}
#--------------------------------------------------------------------------
=head2 $h->left()
In scalar context: returns the node that's the immediate left sibling
of $h. If $h is the leftmost (or only) child of its parent (or has no
parent), then this returns undef.
In list context: returns all the nodes that're the left siblings of $h
(starting with the leftmost). If $h is the leftmost (or only) child
of its parent (or has no parent), then this returns empty-list.
(See also $h->preinsert(LIST).)
=cut
sub left {
Carp::croak "left() is supposed to be an object method"
unless ref $_[0];
my $pc =
(
$_[0]->{'_parent'} || return
)->{'_content'} || die "parent is childless?";
die "parent is childless" unless @$pc;
return if @$pc == 1; # I'm an only child
if(wantarray) {
my @out;
foreach my $j (@$pc) {
return @out if ref $j and $j eq $_[0];
push @out, $j;
}
} else {
for(my $i = 0; $i < @$pc; ++$i) {
return $i ? $pc->[$i - 1] : undef
if ref $pc->[$i] and $pc->[$i] eq $_[0];
}
}
die "I'm not in my parent's content list?";
return;
}
=head2 $h->right()
In scalar context: returns the node that's the immediate right sibling
of $h. If $h is the rightmost (or only) child of its parent (or has
no parent), then this returns undef.
In list context: returns all the nodes that're the right siblings of
$h, starting with the leftmost. If $h is the rightmost (or only) child
of its parent (or has no parent), then this returns empty-list.
(See also $h->postinsert(LIST).)
=cut
sub right {
Carp::croak "right() is supposed to be an object method"
unless ref $_[0];
my $pc =
(
$_[0]->{'_parent'} || return
)->{'_content'} || die "parent is childless?";
die "parent is childless" unless @$pc;
return if @$pc == 1; # I'm an only child
if(wantarray) {
my(@out, $seen);
foreach my $j (@$pc) {
if($seen) {
push @out, $j;
} else {
$seen = 1 if ref $j and $j eq $_[0];
}
}
die "I'm not in my parent's content list?" unless $seen;
return @out;
} else {
for(my $i = 0; $i < @$pc; ++$i) {
return +($i == $#$pc) ? undef : $pc->[$i+1]
if ref $pc->[$i] and $pc->[$i] eq $_[0];
}
die "I'm not in my parent's content list?";
return;
}
}
#--------------------------------------------------------------------------
=head2 $h->address()
Returns a string representing the location of this node in the tree.
The address consists of numbers joined by a '.', starting with '0',
and followed by the pindexes of the nodes in the tree that are
ancestors of $h, starting from the top.
So if the way to get to a node starting at the root is to go to child
2 of the root, then child 10 of that, and then child 0 of that, and
then you're there -- then that node's address is "0.2.10.0".
As a bit of a special case, the address of the root is simply "0".
I forsee this being used mainly for debugging, but you may
find your own uses for it.
=head2 $h->address($address)
This returns the node (whether element or text-segment) at
the given address in the tree that $h is a part of. (That is,
the address is resolved starting from $h->root.)
If there is no node at the given address, this returns undef.
You can specify "relative addressing" (i.e., that indexing is supposed
to start from $h and not from $h->root) by having the address start
with a period -- e.g., $h->address(".3.2") will look at child 3 of $h,
and child 2 of that.
=cut
sub address {
if(@_ == 1) { # report-address form
return
join('.',
reverse( # so it starts at the top
map($_->pindex() || '0', # so that root's undef -> '0'
$_[0], # self and...
$_[0]->lineage
)
)
)
;
} else { # get-node-at-address
my @stack = split(/\./, $_[1]);
my $here;
if(@stack and !length $stack[0]) { # relative addressing
$here = $_[0];
shift @stack;
} else { # absolute addressing
return undef unless 0 == shift @stack; # to pop the initial 0-for-root
$here = $_[0]->root;
}
while(@stack) {
return undef
unless
$here->{'_content'}
and @{$here->{'_content'}} > $stack[0];
# make sure the index isn't too high
$here = $here->{'_content'}[ shift @stack ];
return undef if @stack and not ref $here;
# we hit a text node when we expected a non-terminal element node
}
return $here;
}
}
=head2 $h->depth()
Returns a number expressing C<$h>'s depth within its tree, i.e., how many
steps away it is from the root. If C<$h> has no parent (i.e., is root),
its depth is 0.
=cut
sub depth {
my $here = $_[0];
my $depth = 0;
while(defined($here = $here->{'_parent'}) and ref($here)) {
++$depth;
}
return $depth;
}
=head2 $h->root()
Returns the element that's the top of C<$h>'s tree. If C<$h> is
root, this just returns C<$h>. (If you want to test whether C<$h>
I the root, instead of asking what its root is, just test
C<< not($h->parent) >>.)
=cut
sub root {
my $here = my $root = shift;
while(defined($here = $here->{'_parent'}) and ref($here)) {
$root = $here;
}
return $root;
}
=head2 $h->lineage()
Returns the list of C<$h>'s ancestors, starting with its parent,
and then that parent's parent, and so on, up to the root. If C<$h>
is root, this returns an empty list.
If you simply want a count of the number of elements in C<$h>'s lineage,
use $h->depth.
=cut
sub lineage {
my $here = shift;
my @lineage;
while(defined($here = $here->{'_parent'}) and ref($here)) {
push @lineage, $here;
}
return @lineage;
}
=head2 $h->lineage_tag_names()
Returns the list of the tag names of $h's ancestors, starting
with its parent, and that parent's parent, and so on, up to the
root. If $h is root, this returns an empty list.
Example output: C<('em', 'td', 'tr', 'table', 'body', 'html')>
=cut
sub lineage_tag_names {
my $here = my $start = shift;
my @lineage_names;
while(defined($here = $here->{'_parent'}) and ref($here)) {
push @lineage_names, $here->{'_tag'};
}
return @lineage_names;
}
=head2 $h->descendants()
In list context, returns the list of all $h's descendant elements,
listed in pre-order (i.e., an element appears before its
content-elements). Text segments DO NOT appear in the list.
In scalar context, returns a count of all such elements.
=head2 $h->descendents()
This is just an alias to the C method.
=cut
sub descendents { shift->descendants(@_) }
sub descendants {
my $start = shift;
if(wantarray) {
my @descendants;
$start->traverse(
[ # pre-order sub only
sub {
push(@descendants, $_[0]);
return 1;
},
undef # no post
],
1, # ignore text
);
shift @descendants; # so $self doesn't appear in the list
return @descendants;
} else { # just returns a scalar
my $descendants = -1; # to offset $self being counted
$start->traverse(
[ # pre-order sub only
sub {
++$descendants;
return 1;
},
undef # no post
],
1, # ignore text
);
return $descendants;
}
}
=head2 $h->find_by_tag_name('tag', ...)
In list context, returns a list of elements at or under $h that have
any of the specified tag names. In scalar context, returns the first
(in pre-order traversal of the tree) such element found, or undef if
none.
=head2 $h->find('tag', ...)
This is just an alias to C. (There was once
going to be a whole find_* family of methods, but then look_down
filled that niche, so there turned out not to be much reason for the
verboseness of the name "find_by_tag_name".)
=cut
sub find { shift->find_by_tag_name( @_ ) }
# yup, a handy alias
sub find_by_tag_name {
my(@pile) = shift(@_); # start out the to-do stack for the traverser
Carp::croak "find_by_tag_name can be called only as an object method"
unless ref $pile[0];
return() unless @_;
my(@tags) = $pile[0]->_fold_case(@_);
my(@matching, $this, $this_tag);
while(@pile) {
$this_tag = ($this = shift @pile)->{'_tag'};
foreach my $t (@tags) {
if($t eq $this_tag) {
if(wantarray) {
push @matching, $this;
last;
} else {
return $this;
}
}
}
unshift @pile, grep ref($_), @{$this->{'_content'} || next};
}
return @matching if wantarray;
return;
}
=head2 $h->find_by_attribute('attribute', 'value')
In a list context, returns a list of elements at or under $h that have
the specified attribute, and have the given value for that attribute.
In a scalar context, returns the first (in pre-order traversal of the
tree) such element found, or undef if none.
This method is B in favor of the more expressive
C method, which new code should use instead.
=cut
sub find_by_attribute {
# We could limit this to non-internal attributes, but hey.
my($self, $attribute, $value) = @_;
Carp::croak "Attribute must be a defined value!" unless defined $attribute;
$attribute = $self->_fold_case($attribute);
my @matching;
my $wantarray = wantarray;
my $quit;
$self->traverse(
[ # pre-order only
sub {
if( exists $_[0]{$attribute}
and $_[0]{$attribute} eq $value
) {
push @matching, $_[0];
return HTML::Element::ABORT unless $wantarray; # only take the first
}
1; # keep traversing
},
undef # no post
],
1, # yes, ignore text nodes.
);
if($wantarray) {
return @matching;
} else {
return undef unless @matching;
return $matching[0];
}
}
#--------------------------------------------------------------------------
=head2 $h->look_down( ...criteria... )
This starts at $h and looks thru its element descendants (in
pre-order), looking for elements matching the criteria you specify.
In list context, returns all elements that match all the given
criteria; in scalar context, returns the first such element (or undef,
if nothing matched).
There are three kinds of criteria you can specify:
=over
=item (attr_name, attr_value)
This means you're looking for an element with that value for that
attribute. Example: C<"alt", "pix!">. Consider that you can search
on internal attribute values too: C<"_tag", "p">.
=item (attr_name, qr/.../)
This means you're looking for an element whose value for that
attribute matches the specified Regexp object.
=item a coderef
This means you're looking for elements where coderef->(each_element)
returns true. Example:
my @wide_pix_images
= $h->look_down(
"_tag", "img",
"alt", "pix!",
sub { $_[0]->attr('width') > 350 }
);
=back
Note that C<(attr_name, attr_value)> and C<(attr_name, qr/.../)>
criteria are almost always faster than coderef
criteria, so should presumably be put before them in your list of
criteria. That is, in the example above, the sub ref is called only
for elements that have already passed the criteria of having a "_tag"
attribute with value "img", and an "alt" attribute with value "pix!".
If the coderef were first, it would be called on every element, and
I what elements pass that criterion (i.e., elements for which
the coderef returned true) would be checked for their "_tag" and "alt"
attributes.
Note that comparison of string attribute-values against the string
value in C<(attr_name, attr_value)> is case-INsensitive! A criterion
of C<('align', 'right')> I match an element whose "align" value
is "RIGHT", or "right" or "rIGhT", etc.
Note also that C considers "" (empty-string) and undef to
be different things, in attribute values. So this:
$h->look_down("alt", "")
will find elements I an "alt" attribute, but where the value for
the "alt" attribute is "". But this:
$h->look_down("alt", undef)
is the same as:
$h->look_down(sub { !defined($_[0]->attr('alt')) } )
That is, it finds elements that do not have an "alt" attribute at all
(or that do have an "alt" attribute, but with a value of undef --
which is not normally possible).
Note that when you give several criteria, this is taken to mean you're
looking for elements that match I your criterion, not just I
of them. In other words, there is an implicit "and", not an "or". So
if you wanted to express that you wanted to find elements with a
"name" attribute with the value "foo" I with an "id" attribute
with the value "baz", you'd have to do it like:
@them = $h->look_down(
sub {
# the lcs are to fold case
lc($_[0]->attr('name')) eq 'foo'
or lc($_[0]->attr('id')) eq 'baz'
}
);
Coderef criteria are more expressive than C<(attr_name, attr_value)>
and C<(attr_name, qr/.../)>
criteria, and all C<(attr_name, attr_value)>
and C<(attr_name, qr/.../)>
criteria could be
expressed in terms of coderefs. However, C<(attr_name, attr_value)>
and C<(attr_name, qr/.../)>
criteria are a convenient shorthand. (In fact, C itself is
basically "shorthand" too, since anything you can do with C
you could do by traversing the tree, either with the C
method or with a routine of your own. However, C often
makes for very concise and clear code.)
=cut
sub look_down {
ref($_[0]) or Carp::croak "look_down works only as an object method";
my @criteria;
for(my $i = 1; $i < @_;) {
Carp::croak "Can't use undef as an attribute name" unless defined $_[$i];
if(ref $_[$i]) {
Carp::croak "A " . ref($_[$i]) . " value is not a criterion"
unless ref $_[$i] eq 'CODE';
push @criteria, $_[ $i++ ];
} else {
Carp::croak "param list to look_down ends in a key!" if $i == $#_;
push @criteria, [ scalar($_[0]->_fold_case($_[$i])),
defined($_[$i+1])
? ( ( ref $_[$i+1] ? $_[$i+1] : lc( $_[$i+1] )), ref( $_[$i+1] ) )
# yes, leave that LC!
: undef
];
$i += 2;
}
}
Carp::croak "No criteria?" unless @criteria;
my(@pile) = ($_[0]);
my(@matching, $val, $this);
Node:
while(defined($this = shift @pile)) {
# Yet another traverser implemented with merely iterative code.
foreach my $c (@criteria) {
if(ref($c) eq 'CODE') {
next Node unless $c->($this); # jump to the continue block
} else { # it's an attr-value pair
next Node # jump to the continue block
if # two values are unequal if:
(defined($val = $this->{ $c->[0] }))
? (
!defined $c->[1] # actual is def, critval is undef => fail
# allow regex matching
# allow regex matching
or (
$c->[2] eq 'Regexp'
? $val !~ $c->[1]
: ( ref $val ne $c->[2]
# have unequal ref values => fail
or lc($val) ne lc($c->[1])
# have unequal lc string values => fail
))
)
: (defined $c->[1]) # actual is undef, critval is def => fail
}
}
# We make it this far only if all the criteria passed.
return $this unless wantarray;
push @matching, $this;
} continue {
unshift @pile, grep ref($_), @{$this->{'_content'} || $nillio};
}
return @matching if wantarray;
return;
}
=head2 $h->look_up( ...criteria... )
This is identical to $h->look_down, except that whereas $h->look_down
basically scans over the list:
($h, $h->descendants)
$h->look_up instead scans over the list
($h, $h->lineage)
So, for example, this returns all ancestors of $h (possibly including
$h itself) that are "td" elements with an "align" attribute with a
value of "right" (or "RIGHT", etc.):
$h->look_up("_tag", "td", "align", "right");
=cut
sub look_up {
ref($_[0]) or Carp::croak "look_up works only as an object method";
my @criteria;
for(my $i = 1; $i < @_;) {
Carp::croak "Can't use undef as an attribute name" unless defined $_[$i];
if(ref $_[$i]) {
Carp::croak "A " . ref($_[$i]) . " value is not a criterion"
unless ref $_[$i] eq 'CODE';
push @criteria, $_[ $i++ ];
} else {
Carp::croak "param list to look_up ends in a key!" if $i == $#_;
push @criteria, [ scalar($_[0]->_fold_case($_[$i])),
defined($_[$i+1])
? ( ( ref $_[$i+1] ? $_[$i+1] : lc( $_[$i+1] )), ref( $_[$i+1] ) )
: undef # Yes, leave that LC!
];
$i += 2;
}
}
Carp::croak "No criteria?" unless @criteria;
my(@matching, $val);
my $this = $_[0];
Node:
while(1) {
# You'll notice that the code here is almost the same as for look_down.
foreach my $c (@criteria) {
if(ref($c) eq 'CODE') {
next Node unless $c->($this); # jump to the continue block
} else { # it's an attr-value pair
next Node # jump to the continue block
if # two values are unequal if:
(defined($val = $this->{ $c->[0] }))
? (
!defined $c->[1] # actual is def, critval is undef => fail
or (
$c->[2] eq 'Regexp'
? $val !~ $c->[1]
: ( ref $val ne $c->[2]
# have unequal ref values => fail
or lc($val) ne $c->[1]
# have unequal lc string values => fail
))
)
: (defined $c->[1]) # actual is undef, critval is def => fail
}
}
# We make it this far only if all the criteria passed.
return $this unless wantarray;
push @matching, $this;
} continue {
last unless defined($this = $this->{'_parent'}) and ref $this;
}
return @matching if wantarray;
return;
}
#--------------------------------------------------------------------------
=head2 $h->traverse(...options...)
Lengthy discussion of HTML::Element's unnecessary and confusing
C method has been moved to a separate file:
L
=head2 $h->attr_get_i('attribute')
In list context, returns a list consisting of the values of the given
attribute for $self and for all its ancestors starting from $self and
working its way up. Nodes with no such attribute are skipped.
("attr_get_i" stands for "attribute get, with inheritance".)
In scalar context, returns the first such value, or undef if none.
Consider a document consisting of:
Pati Pata
Stuff
Foo bar baz Quux.
Hooboy.
If $h is the "cite" element, $h->attr_get_i("lang") in list context
will return the list ('es-MX', 'i-klingon'). In scalar context, it
will return the value 'es-MX'.
If you call with multiple attribute names...
=head2 $h->attr_get_i('a1', 'a2', 'a3')
...in list context, this will return a list consisting of
the values of these attributes which exist in $self and its ancestors.
In scalar context, this returns the first value (i.e., the value of
the first existing attribute from the first element that has
any of the attributes listed). So, in the above example,
$h->attr_get_i('lang', 'align');
will return:
('es-MX', 'center', 'i-klingon') # in list context
or
'es-MX' # in scalar context.
But note that this:
$h->attr_get_i('align', 'lang');
will return:
('center', 'es-MX', 'i-klingon') # in list context
or
'center' # in scalar context.
=cut
sub attr_get_i {
if(@_ > 2) {
my $self = shift;
Carp::croak "No attribute names can be undef!"
if grep !defined($_), @_;
my @attributes = $self->_fold_case(@_);
if(wantarray) {
my @out;
foreach my $x ($self, $self->lineage) {
push @out, map { exists($x->{$_}) ? $x->{$_} : () } @attributes;
}
return @out;
} else {
foreach my $x ($self, $self->lineage) {
foreach my $attribute (@attributes) {
return $x->{$attribute} if exists $x->{$attribute}; # found
}
}
return undef; # never found
}
} else {
# Single-attribute search. Simpler, most common, so optimize
# for the most common case
Carp::croak "Attribute name must be a defined value!" unless defined $_[1];
my $self = $_[0];
my $attribute = $self->_fold_case($_[1]);
if(wantarray) { # list context
return
map {
exists($_->{$attribute}) ? $_->{$attribute} : ()
} $self, $self->lineage;
;
} else { # scalar context
foreach my $x ($self, $self->lineage) {
return $x->{$attribute} if exists $x->{$attribute}; # found
}
return undef; # never found
}
}
}
=head2 $h->tagname_map()
Scans across C<$h> and all its descendants, and makes a hash (a
reference to which is returned) where each entry consists of a key
that's a tag name, and a value that's a reference to a list to all
elements that have that tag name. I.e., this method returns:
{
# Across $h and all descendants...
'a' => [ ...list of all 'a' elements... ],
'em' => [ ...list of all 'em' elements... ],
'img' => [ ...list of all 'img' elements... ],
}
(There are entries in the hash for only those tagnames that occur
at/under C<$h> -- so if there's no "img" elements, there'll be no
"img" entry in the hashr(ref) returned.)
Example usage:
my $map_r = $h->tagname_map();
my @heading_tags = sort grep m/^h\d$/s, keys %$map_r;
if(@heading_tags) {
print "Heading levels used: @heading_tags\n";
} else {
print "No headings.\n"
}
=cut
sub tagname_map {
my(@pile) = $_[0]; # start out the to-do stack for the traverser
Carp::croak "find_by_tag_name can be called only as an object method"
unless ref $pile[0];
my(%map, $this_tag, $this);
while(@pile) {
$this_tag = ''
unless defined(
$this_tag = (
$this = shift @pile
)->{'_tag'}
)
; # dance around the strange case of having an undef tagname.
push @{ $map{$this_tag} ||= [] }, $this; # add to map
unshift @pile, grep ref($_), @{$this->{'_content'} || next}; # traverse
}
return \%map;
}
=head2 $h->extract_links() or $h->extract_links(@wantedTypes)
Returns links found by traversing the element and all of its children
and looking for attributes (like "href" in an "a" element, or "src" in
an "img" element) whose values represent links. The return value is a
I to an array. Each element of the array is reference to
an array with I items: the link-value, the element that has the
attribute with that link-value, and the name of that attribute, and
the tagname of that element.
(Example: C<['http://www.suck.com/',> I<$elem_obj> C<, 'href', 'a']>.)
You may or may not end up using the
element itself -- for some purposes, you may use only the link value.
You might specify that you want to extract links from just some kinds
of elements (instead of the default, which is to extract links from
I the kinds of elements known to have attributes whose values
represent links). For instance, if you want to extract links from
only "a" and "img" elements, you could code it like this:
for (@{ $e->extract_links('a', 'img') }) {
my($link, $element, $attr, $tag) = @$_;
print
"Hey, there's a $tag that links to "
$link, ", in its $attr attribute, at ",
$element->address(), ".\n";
}
=cut
sub extract_links {
my $start = shift;
my %wantType;
@wantType{$start->_fold_case(@_)} = (1) x @_; # if there were any
my $wantType = scalar(@_);
my @links;
# TODO: add xml:link?
my($link_attrs, $tag, $self, $val); # scratch for each iteration
$start->traverse(
[
sub { # pre-order call only
$self = $_[0];
$tag = $self->{'_tag'};
return 1 if $wantType && !$wantType{$tag}; # if we're selective
if(defined( $link_attrs = $HTML::Element::linkElements{$tag} )) {
# If this is a tag that has any link attributes,
# look over possibly present link attributes,
# saving the value, if found.
for (ref($link_attrs) ? @$link_attrs : $link_attrs) {
if(defined( $val = $self->attr($_) )) {
push(@links, [$val, $self, $_, $tag])
}
}
}
1; # return true, so we keep recursing
},
undef
],
1, # ignore text nodes
);
\@links;
}
=head2 $h->simplify_pres
In text bits under PRE elements that are at/under $h, this routine
nativizes all newlines, and expands all tabs.
That is, if you read a file with lines delimited by C<\cm\cj>'s, the
text under PRE areas will have C<\cm\cj>'s instead of C<\n>'s. Calling
$h->nativize_pre_newlines on such a tree will turn C<\cm\cj>'s into
C<\n>'s.
Tabs are expanded to however many spaces it takes to get
to the next 8th column -- the usual way of expanding them.
=cut
sub simplify_pres {
my $pre = 0;
my $sub;
my $line;
$sub = sub {
++$pre if $_[0]->{'_tag'} eq 'pre';
foreach my $it (@{ $_[0]->{'_content'} || return }) {
if(ref $it) {
$sub->( $it ); # recurse!
} elsif($pre) {
#$it =~ s/(?:(?:\cm\cj*)|(?:\cj))/\n/g;
$it =
join "\n",
map {;
$line = $_;
while($line =~
s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
# Sort of adapted from Text::Tabs -- yes, it's hardwired-in that
# tabs are at every EIGHTH column.
){}
$line;
}
split /(?:(?:\cm\cj*)|(?:\cj))/, $it, -1
;
}
}
--$pre if $_[0]->{'_tag'} eq 'pre';
return;
};
$sub->( $_[0] );
undef $sub;
return;
}
=head2 $h->same_as($i)
Returns true if $h and $i are both elements representing the same tree
of elements, each with the same tag name, with the same explicit
attributes (i.e., not counting attributes whose names start with "_"),
and with the same content (textual, comments, etc.).
Sameness of descendant elements is tested, recursively, with
C<$child1-Esame_as($child_2)>, and sameness of text segments is tested
with C<$segment1 eq $segment2>.
=cut
sub same_as {
die 'same_as() takes only one argument: $h->same_as($i)' unless @_ == 2;
my($h,$i) = @_[0,1];
die "same_as() can be called only as an object method" unless ref $h;
return 0 unless defined $i and ref $i;
# An element can't be same_as anything but another element!
# They needn't be of the same class, tho.
return 1 if $h eq $i;
# special (if rare) case: anything is the same as... itself!
# assumes that no content lists in/under $h or $i contain subsequent
# text segments, like: ['foo', ' bar']
# compare attributes now.
#print "Comparing tags of $h and $i...\n";
return 0 unless $h->{'_tag'} eq $i->{'_tag'};
# only significant attribute whose name starts with "_"
#print "Comparing attributes of $h and $i...\n";
# Compare attributes, but only the real ones.
{
# Bear in mind that the average element has very few attributes,
# and that element names are rather short.
# (Values are a different story.)
# XXX I would think that /^[^_]/ would be faster, at least easier to read.
my @keys_h = sort grep {length $_ and substr($_,0,1) ne '_'} keys %$h;
my @keys_i = sort grep {length $_ and substr($_,0,1) ne '_'} keys %$i;
return 0 unless @keys_h == @keys_i;
# different number of real attributes? they're different.
for(my $x = 0; $x < @keys_h; ++$x) {
return 0 unless
$keys_h[$x] eq $keys_i[$x] and # same key name
$h->{$keys_h[$x]} eq $i->{$keys_h[$x]}; # same value
# Should this test for definedness on values?
# People shouldn't be putting undef in attribute values, I think.
}
}
#print "Comparing children of $h and $i...\n";
my $hcl = $h->{'_content'} || [];
my $icl = $i->{'_content'} || [];
return 0 unless @$hcl == @$icl;
# different numbers of children? they're different.
if(@$hcl) {
# compare each of the children:
for(my $x = 0; $x < @$hcl; ++$x) {
if(ref $hcl->[$x]) {
return 0 unless ref($icl->[$x]);
# an element can't be the same as a text segment
# Both elements:
return 0 unless $hcl->[$x]->same_as($icl->[$x]); # RECURSE!
} else {
return 0 if ref($icl->[$x]);
# a text segment can't be the same as an element
# Both text segments:
return 0 unless $hcl->[$x] eq $icl->[$x];
}
}
}
return 1; # passed all the tests!
}
=head2 $h = HTML::Element->new_from_lol(ARRAYREF)
Resursively constructs a tree of nodes, based on the (non-cyclic)
data structure represented by ARRAYREF, where that is a reference
to an array of arrays (of arrays (of arrays (etc.))).
In each arrayref in that structure, different kinds of values are
treated as follows:
=over
=item * Arrayrefs
Arrayrefs are considered to
designate a sub-tree representing children for the node constructed
from the current arrayref.
=item * Hashrefs
Hashrefs are considered to contain
attribute-value pairs to add to the element to be constructed from
the current arrayref
=item * Text segments
Text segments at the start of any arrayref
will be considered to specify the name of the element to be
constructed from the current araryref; all other text segments will
be considered to specify text segments as children for the current
arrayref.
=item * Elements
Existing element objects are either inserted into the treelet
constructed, or clones of them are. That is, when the lol-tree is
being traversed and elements constructed based what's in it, if
an existing element object is found, if it has no parent, then it is
added directly to the treelet constructed; but if it has a parent,
then C<$that_node-Eclone> is added to the treelet at the
appropriate place.
=back
An example will hopefully make this more obvious:
my $h = HTML::Element->new_from_lol(
['html',
['head',
[ 'title', 'I like stuff!' ],
],
['body',
{'lang', 'en-JP', _implicit => 1},
'stuff',
['p', 'um, p < 4!', {'class' => 'par123'}],
['div', {foo => 'bar'}, '123'],
]
]
);
$h->dump;
Will print this:
@0
@0.0
@0.0.0
"I like stuff!"
@0.1 (IMPLICIT)
"stuff"
@0.1.1
"um, p < 4!"
@0.1.2
"123"
And printing $h->as_HTML will give something like:
I like stuff!
stuff
um, p < 4!
123
You can even do fancy things with C