X-Git-Url: http://dxcluster.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=perl%2Fcallbot.pl;fp=perl%2Fcallbot.pl;h=6d845a215a91263667ef5b05bca1df629bce94ac;hb=9b16ab623efe48723ba472624cf4020b155f683c;hp=0000000000000000000000000000000000000000;hpb=a453254d687774f352822492d500330a09fd7ebe;p=spider.git diff --git a/perl/callbot.pl b/perl/callbot.pl new file mode 100755 index 00000000..6d845a21 --- /dev/null +++ b/perl/callbot.pl @@ -0,0 +1,295 @@ +#!/usr/bin/perl -w +# +# an attempt at producing a general purpose 'bot' for going and getting +# things orf the web and presenting them to user in a form they want +# +# This program uses LWP::Parallel::UserAgent to do its business +# +# each sub bot has the same structure and calling interface, but the actual +# input and output data formats are completely arbitrary +# +# Copyright (c) 1999 - Dirk Koopman, Tobit Computer Co Ltd +# +# $Id$ +# + +package main; + +BEGIN { + unshift @INC, '.'; +} + +use strict; +use ForkingServer; +require LWP::Parallel::UserAgent; +use HTTP::Request; +use URI::Escape; +use IO::File; +use Carp; +use Text::ParseWords; +use QRZ; +use Buck; +use K4UTE; + +use vars qw($version); + +$version = "1.1"; + +sub cease +{ + $SIG{INT} = $SIG{TERM} = 'IGNORE'; + exit(0); +} + +sub trancode +{ + $_ = shift; + + return 'Continue' if /100/; + return 'Switching protocols' if /101/; + + return 'Ok' if /200/; + return 'Created' if /201/; + return 'Accepted' if /202/; + return 'Non Authoritive' if /203/; + return 'No Content' if /204/; + return 'Reset Content' if /205/; + return 'Partial Content' if /206/; + + return 'Multiple Choices' if /300/; + return 'Moved Permanently' if /301/; + return 'Found, redirect' if /302/; + return 'See Other' if /303/; + return 'Not modified' if /304/; + return 'Use proxy' if /305/; + + return 'Bad request' if /400/; + return 'Unauthorized' if /401/; + return 'Payment required' if /402/; + return 'Forbidden' if /403/; + return 'Not Found' if /404/; + return 'Method not allowed' if /405/; + return 'Not acceptable' if /406/; + return 'Proxy authentication required' if /407/; + return 'Request timeout' if /408/; + return 'Conflict' if /409/; + return 'Gone' if /410/; + return 'Length required' if /411/; + return 'Precondition failed' if /412/; + return 'Request entity too large' if /413/; + return 'Request-URI too long' if /414/; + return 'Unsupported media type' if /415/; + return 'Requested range not satifiable' if /416/; + return 'Expectation failed' if /417/; + + return 'Internal server error' if /500/; + return 'Not implemented' if /501/; + return 'Bad gateway' if /502/; + return 'Service unavailable' if /503/; + return 'Gateway timeout' if /504/; + return 'HTTP version not supported' if /505/; + + return 'Unknown'; +} + +sub genpat +{ + my $s = shift; + $s =~ s/\*/\\S+/g; + $s =~ s/\b(?:THE|\&|A|AND|OR|NOT)\b//gi; + $s =~ s/(?:\(|\))//g; + return join('|', split(/\s+/, $s)); +} + +# qrz specific routines +sub req_qrz +{ + my ($ua, $call, $title) = @_; + my $sreq = "http://www.qrz.com/callsign.html?callsign=$call"; +# print "$sreq\n"; + my $req = HTTP::Request->new('GET', $sreq); + return $ua->register($req); +} + +sub parse_qrz +{ + my ($fh, $call, $title, $code, $content) = @_; + if ($code != 200) { + print $fh "QRZ|$code|", trancode($code), "\n"; + return; + } + + # parse the HTML + my $r = new QRZ $call; + $r->debug(0); + my $i; + my $chunk; + my $l = length $content; + for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) { + $r->parse($chunk); + } + $r->eof; + + my @lines = $r->answer; + for (@lines) { + print $fh "QRZ|$code|$_\n" if $_; + } + print "lines: ", scalar @lines, "\n"; +} + +# k4ute specific routines +sub req_ute +{ + my ($ua, $call, $title) = @_; + my $sreq = "http://no4j.com/nfdxa/qsl/index.asp?dx=$call"; +# print "$sreq\n"; + my $req = HTTP::Request->new('GET', $sreq); + return $ua->register($req); +} + +sub parse_ute +{ + my ($fh, $call, $title, $code, $content) = @_; + if ($code != 200) { + print $fh "UTE|$code|", trancode($code), "\n"; + return; + } + + # parse the HTML + my $r = new K4UTE $call; + $r->debug(0); + my $i; + my $chunk; + my $l = length $content; + for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) { + $r->parse($chunk); + } + $r->eof; + + my @lines = $r->answer; + for (@lines) { + print $fh "UTE|$code|$_\n" if $_; + } + print "lines: ", scalar @lines, "\n"; +} + +# buckmaster specific routines +sub req_buck +{ + my ($ua, $call, $title) = @_; + my $sreq = "http://www.buck.com/cgi-bin/do_hamcallexe"; +# print "$sreq\n"; + my $req = HTTP::Request->new('POST', $sreq); + $req->add_content("entry=$call"); + return $ua->register($req); +} + +sub parse_buck +{ + my ($fh, $call, $title, $code, $content) = @_; + if ($code != 200) { + print $fh "BCK|$code|", trancode($code), "\n"; + return; + } + + # parse the HTML + my $r = new Buck $call; + $r->debug(0); + my $i; + my $chunk; + my $l = length $content; + for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) { + $r->parse($chunk); + } + $r->eof; + + my @lines = $r->answer; + for (@lines) { + print $fh "BCK|$code|$_\n" if $_; + } + print "lines: ", scalar @lines, "\n"; +} + + +# this is what is called when an incoming request is taken +sub child +{ + my $fh = shift; + + my $line; + + if (defined ($line = <$fh>)) { + $line =~ s/[\r\n]+$//g; + print "{$line}\n"; + } else { + return; + } + + $line =~ s/^[^[A-Za-z0-9\|]]+//g; + + my ($call, $title) = split /\|/, $line; + return if $call eq 'quit' || $call eq 'QUIT'; + + print "{A = '$call'"; + print $title ? ", T = '$title'}\n" : "}\n"; + + my $ua = LWP::Parallel::UserAgent->new; + + # set up various UA things + $ua->duplicates(0); # ignore duplicates + $ua->timeout(30); + $ua->redirect(1); # follow 302 redirects + $ua->agent("DXSpider callbot $version"); + + my $res; + my $art = uri_escape($call); + my $tit = uri_escape($title); + + # qrz + if ($res = req_qrz($ua, $art, $tit)) { + print $fh "QRZ|500\n"; + } + # buckmaster + if ($res = req_buck($ua, $art, $tit)) { + print $fh "BCK|500\n"; + } + # ute + if ($res = req_ute($ua, $art, $tit)) { + print $fh "UTE|500\n"; + } + + # wait for all the results to come back + my $entries = $ua->wait(); + + for (keys %$entries) { + $res = $entries->{$_}->response; + my $uri = $res->request->url; + my $code = $res->code; + print "url: ", $uri, " code: ", $code, "\n"; + + # now parse each result + for ($uri) { + parse_qrz($fh, $call, $title, $code, $res->content), last if /www.qrz.com/i; + parse_buck($fh, $call, $title, $code, $res->content), last if /www.buck.com/i; + parse_ute($fh, $call, $title, $code, $res->content), last if /no4j.com/i; + } + } + cease(0); +} + +$SIG{INT} = \&cease; +$SIG{QUIT} = \&cease; +$SIG{HUP} = 'IGNORE'; +STDOUT->autoflush(1); + +my $server = new ForkingServer \&child; + +$server->allow('.*'); +$server->run; + +cease(0); + + + + +