package HTML::MobileConverter;
use strict;
use HTML::Parser;
use URI;
our $VERSION = '0.011';
sub new {
my $class = shift;
my $self = {@_};
bless $self, $class;
$self->init;
return $self;
}
sub init {
my $self = shift;
$self->{maxlength} ||= 50000;
$self->{maxpctagcount} ||= 10;
$self->{maxpctagrate} ||= 0.1;
$self->{baseuri} or warn 'baseuri is not specified.';
$self->{hrefhandler} ||= sub {
my $href = shift;
return URI->new_abs($href, $self->{baseuri});
};
$self->{mobiletag} ||= {
a => 'name|accesskey|href|cti|ijam|utn|subject|body|telbook|kana|email|ista|ilet|iswf|irst',
base => 'href',
blink => '',
blockquote => '',
body => 'bgcolor|text|link|alink|vlink',
br => 'clear',
center => '',
dd => '',
dir => 'type',
div => 'align',
dl => '',
dt => '',
font => 'color|size',
form => 'action|method|utn',
head => '',
h1 => 'align',
h2 => 'align',
h3 => 'align',
h4 => 'align',
h5 => 'align',
h6 => 'align',
hr => 'align|size|width|noshade|color',
html => '',
img => 'src|align|width|height|hspace|vspace|alt',
input => 'type|name|size|maxlength|accesskey|value|istyle|checked',
li => 'type|value',
marquee => 'behavior|direction|loop|height|width|scrollmount|scrolldelay|bgcolor',
menu => 'type',
meta => 'http\-equiv|content',
object => 'declare|id|data|type|width|height',
ol => 'type|start',
option => 'selected|value',
p => 'align',
param => 'name|value|valuetype',
plaintext => '',
pre => '',
select => 'name|size|multiple',
textarea => 'name|accesskey|rows|cols|istyle',
title => '',
ul => 'type',
};
$self->{codetag} = 'script|style';
$self->{ignoretag} = 'form|input|select|option|textarea';
$self->{html} = '';
}
sub _initparser {
my $self = shift;
$self->{html2} = '';
$self->{mhtml} = '';
$self->{tagcount} = 0;
$self->{mtagcount} = 0;
$self->{ismobilecontent} = '';
$self->{iscode} = 0;
$self->{parser} = HTML::Parser->new(
api_version => 3,
handlers => {
start => [$self->starthandler, 'text, tagname, attr'],
end => [$self->endhandler, 'text, tagname'],
text => [$self->texthandler, 'text'],
default => [$self->defaulthandler, 'event, text'],
},
);
}
sub starthandler {
my $self = shift;
return sub {
my ($text, $tag, $attr) = @_;
$self->{tagcount}++;
if ($tag =~ /^($self->{codetag})$/i) {
$self->{iscode}++;
}
if (defined $self->{mobiletag}->{$tag}) {
$self->{mtagcount}++;
if ($tag =~ /^($self->{ignoretag})$/i) {
return;
} else {
$self->{mhtml} .= $self->_makestartm($tag,$attr);
$self->{html2} .= $self->_makestart2($tag,$attr);
}
}
};
}
sub _makestartm {
my $self = shift;
my $tag = shift or return;
my $attr = shift or return;
if ($tag eq 'img') {
my ($w,$h) = @$attr{'width', 'height'};
unless ($w && $h && $w < 100 && $h < 100) {
my $text = 'img:';
$text .= $attr->{alt} || $attr->{title} || '';
return $text;
}
}
my $attrpat = $self->{mobiletag}->{$tag};
my $text = qq|<$tag|;
for my $key (keys %$attr) {
if ($key =~ /^(href|action)$/) {
$text .= qq| $key="| .
$self->{hrefhandler}($attr->{$key}) . '"';
} elsif ($key eq 'src') {
$text .= qq| $key="|. URI->new_abs($attr->{$key}, $self->{baseuri}) . '"';
} elsif ($key =~ /^($attrpat)$/i) {
$text .= qq| $key="$attr->{$key}"|;
}
}
$text .= '>';
return $text;
}
sub _makestart2 {
my $self = shift;
my $tag = shift or return;
my $attr = shift or return;
my $text = qq|<$tag|;
for my $key (keys %$attr) {
if ($key =~ /^(href|action)$/) {
$text .= qq| $key="| .
$self->{hrefhandler}($attr->{$key}) . '"';
} elsif ($key eq 'src') {
$text .= qq| $key="|. URI->new_abs($attr->{$key}, $self->{baseuri}) . '"';
} else {
$text .= qq| $key="$attr->{$key}"|;
}
}
$text .= '>';
return $text;
}
sub endhandler {
my $self = shift;
return sub {
my ($text, $tag) = @_;
if ($tag =~ /^($self->{codetag})$/i) {
$self->{iscode}--;
}
if (defined $self->{mobiletag}->{$tag}) {
if ($tag =~ /^($self->{ignoretag})$/i) {
$text = '';
} else {
$self->{mhtml} .= $text;
}
$self->{html2} .= $text;
}
};
}
sub texthandler {
my $self = shift;
return sub {
my ($text) = @_;
if (!$self->{iscode}) {
$self->{mhtml} .= $text;
}
$self->{html2} .= $text;
};
}
sub defaulthandler {
my $self = shift;
return sub {};
}
sub convert {
my $self = shift;
$self->{html} = shift or return;
$self->_initparser;
$self->{parser}->parse($self->{html});
if ($self->_checkmobile) {
return $self->{html2};
} else {
return $self->{mhtml};
}
}
sub _checkmobile {
my $self = shift;
if ($self->{maxlength} && (length($self->{html}) > $self->{maxlength})) {
return; # over max size
} elsif (($self->{tagcount} - $self->{mtagcount}) > $self->{maxpctagcount}) {
return; # includes many pc tags
} elsif (!$self->{tagcount}) {
# do nothing
} elsif (($self->{mtagcount} / $self->{tagcount}) < (1 - $self->{maxpctagrate})) {
return; # includes many pc tags
}
$self->{ismobilecontent} = 1;
return $self->ismobilecontent;
}
sub ismobilecontent {
my $self = shift;
return $self->{ismobilecontent};
}
sub param { $_[0]->{$_[1]}; }
1;
__END__
=head1 NAME
HTML::MobileConverter - HTML Converter for mobile agent
=head1 SYNOPSIS
use HTML::MobileConverter;
my $baseuri = 'http://example.com/';
my $c = HTML::MobileConverter->new(baseuri => $baseuri);
my $html =<title
my link