#!/usr/bin/perl
#geturls
#Pulls the URLs from an html document, displaying them in sorted order
#Author: Zach Tomaszewski, Sep 18, 2002 1:03:01 PM
my $filename = ""; #the url-containing file.
my $filelines = ""; #all the lines of the given file
my @urls = (); #the collected URLs
if (@ARGV > 1) {
&usage_error;
}elsif (@ARGV == 1) {
$filename = $ARGV[0];
open (IN, "$filename") || die "Could not open $filename: ", $!;
$filelines = join ("", <IN>);
close IN;
}else {
$filelines = join ("", (<STDIN>));
}
@urls = $filelines =~ /<\s*a\s+\href\s*=\s*"?\s*([^>"]+)\s*"?\s*>/ig;
my %urlhash;
foreach (@urls){
$urlhash{$_} = "";
}
@urls = sort keys %urlhash;
foreach (@urls){
print "$_\n";
}
##END##
sub usage_error {
print "Usage:\n";
print "Either pass in a file to pass as a parameter (geturls sample.html)\n";
print " or pass in a file in through standard in (geturls < sample.html)\n";
}
| ~ztomasze Index : TA
: Assignment 1 : Solution http://www2.hawaii.edu/~ztomasze |
Last Edited: 01 Oct 2002 ©2002 by Z. Tomaszewski. |