#!/usr/bin/perl
##################################################
# rel2abs -- Mike Schilli, 2002
# (m@perlmeister.com)
##################################################
use warnings;
use strict;
use HTML::Parser 3.0;
use URI::URL;
use File::Basename;
use Pod::Usage;
use Getopt::Std;
our $VERSION = "1.01";
our $CVSVERSION = '$Revision: 1.4 $';
my $BASE = shift;
warn "No base url specified\n", pod2usage() unless $BASE;
getopts("h", \my %opts);
pod2usage(-verbose => 2) if $opts{h};
die basename($0) . " $VERSION\n" if $opts{v};
pod2usage() unless @ARGV;
my $data = join '', <>;
# Parser aufsetzen
my $parser = HTML::Parser->new(
default_h => [ \&print_out, 'text' ],
start_h => [ \&warp_link,
'tagname,attrseq,attr,text']);
$parser->parse($data) || die $!;
##################################################
sub print_out {
##################################################
my ($text) = shift;
print $text;
}
##################################################
sub warp_link {
##################################################
my($tagname, $attrseq, $attr, $text) = @_;
my($path, $key);
if($tagname eq "img") {
#
Tag gefunden
$key = "src";
} elsif($tagname eq "a") {
# Tag gefunden
$key = "href";
} else {
# Anderes Tag => unverändert ausgeben
print_out $text;
return;
}
my $uri = URI::URL->new($attr->{$key});
$attr->{$key} = $uri->abs($BASE);
print "<" . uc($tagname) . " " .
join(" ", map { uc($_) . '="' .
$attr->{$_} . '"'
} @$attrseq ) .
">";
}
__END__
=head1 NAME
rel2abs - Make relative links in an HTML document absolute
=head1 DOWNLOAD
_SRC_HERE_
=head1 SYNOPSIS
rel2abs [-h] base_url [html_file]
base_url: The base URL used to make links absolute, e.g. http://perlmeister.com
html_file: The input html file. Output goes to STDOUT
Options:
-h get help
=head1 OPTIONS
=over 8
=item B<-h>
Prints this manual page in text format.
=back
=head1 DESCRIPTION
B takes a html document, either specified as a file on the command
line or streaming in via STDIN, and transforms all relative links in there
to absolute ones, using a specified base URL. It streams its output to STDOUT.
It is currently
looking for CIMG SRC=...E> and CA HREF=...E> tags only.
=head1 EXAMPLES
$ rel2abs http://perlmeister.com index.html >myindex.html
=head1 LEGALESE
Copyright 2002 by Mike Schilli, all rights reserved.
This program is free software, you can redistribute it and/or
modify it under the same terms as Perl itself.
=head1 AUTHOR
2002, Mike Schilli