#!/usr/bin/perl ################################################## # rel2abs -- Mike Schilli, 2002 # (m@perlmeister.com) ################################################## use warnings; use strict; use HTML::Parser 3.0; use URI::URL; use File::Basename; use Pod::Usage; use Getopt::Std; our $VERSION = "1.01"; our $CVSVERSION = '$Revision: 1.4 $'; my $BASE = shift; warn "No base url specified\n", pod2usage() unless $BASE; getopts("h", \my %opts); pod2usage(-verbose => 2) if $opts{h}; die basename($0) . " $VERSION\n" if $opts{v}; pod2usage() unless @ARGV; my $data = join '', <>; # Parser aufsetzen my $parser = HTML::Parser->new( default_h => [ \&print_out, 'text' ], start_h => [ \&warp_link, 'tagname,attrseq,attr,text']); $parser->parse($data) || die $!; ################################################## sub print_out { ################################################## my ($text) = shift; print $text; } ################################################## sub warp_link { ################################################## my($tagname, $attrseq, $attr, $text) = @_; my($path, $key); if($tagname eq "img") { # Tag gefunden $key = "src"; } elsif($tagname eq "a") { # Tag gefunden $key = "href"; } else { # Anderes Tag => unverändert ausgeben print_out $text; return; } my $uri = URI::URL->new($attr->{$key}); $attr->{$key} = $uri->abs($BASE); print "<" . uc($tagname) . " " . join(" ", map { uc($_) . '="' . $attr->{$_} . '"' } @$attrseq ) . ">"; } __END__ =head1 NAME rel2abs - Make relative links in an HTML document absolute =head1 DOWNLOAD _SRC_HERE_ =head1 SYNOPSIS rel2abs [-h] base_url [html_file] base_url: The base URL used to make links absolute, e.g. http://perlmeister.com html_file: The input html file. Output goes to STDOUT Options: -h get help =head1 OPTIONS =over 8 =item B<-h> Prints this manual page in text format. =back =head1 DESCRIPTION B takes a html document, either specified as a file on the command line or streaming in via STDIN, and transforms all relative links in there to absolute ones, using a specified base URL. It streams its output to STDOUT. It is currently looking for CIMG SRC=...E> and CA HREF=...E> tags only. =head1 EXAMPLES $ rel2abs http://perlmeister.com index.html >myindex.html =head1 LEGALESE Copyright 2002 by Mike Schilli, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHOR 2002, Mike Schilli