# Purpose: Extracts just the text portions of a document. use LWP::Simple; use HTML::Parser (); use Net::Google; $domainname = $ARGV[0] ; $filenameis = $ARGV[1]; use constant GOOGLE_LICENSE_KEY => 'xxxxxxxxxxxxxxx'; use constant MAX_RESULTS => 50; my $google = Net::Google->new( key => GOOGLE_LICENSE_KEY ); $domain = $domainname; #$url = 'http://www.' . $domain; open(FILE, $filenameis) || die("Could not open file!"); @content=; #print @content; #@content = split(/\n/,@raw_data); # RETRIEVE THE HTML DOCUMENT foreach $line(@content){ my $search = $google->search(); $search->max_results( MAX_RESULTS ); my $query = $search->query( $line); my $position = 1; printf $line; for my $result ( @{ $search->results() } ) { my $url = $result->URL(); if($url =~ /$domain/) { printf "%3d \n", $position; } $position++; } }