and
both need dealing with $self->html_whitespace($tag) if exists $elements_whitespace{$tag}; # start tags if ($num == 1) { $self->html_uri($tag, $attr) if exists $elements_uri{$tag}; $self->html_tests($tag, $attr, $num); } # end tags else { $self->{closed_html} = 1 if $tag eq "html"; $self->{closed_body} = 1 if $tag eq "body"; } } } sub html_whitespace { my ($self, $tag) = @_; # ordered by frequency of tag groups, note: whitespace is always "visible" if ($tag eq "br" || $tag eq "div") { $self->display_text("\n", whitespace => 1); } elsif ($tag =~ /^(?:li|t[hd]|d[td]|embed|h\d)$/) { $self->display_text(" ", whitespace => 1); } elsif ($tag =~ /^(?:p|hr|blockquote|pre|listing|plaintext|xmp|title)$/) { $self->display_text("\n\n", whitespace => 1); } } # puts the uri onto the internal array # note: uri may be blank ( obfuscation, etc.) sub push_uri { my ($self, $type, $uri) = @_; $uri = $self->canon_uri($uri); utf8::encode($uri) if $self->{SA_encode_results}; my $target = target_uri($self->{base_href} || "", $uri); # skip things like