]> dxcluster.org Git - spider.git/commitdiff
added a query routine
authorminima <minima>
Sat, 14 May 2005 21:48:19 +0000 (21:48 +0000)
committerminima <minima>
Sat, 14 May 2005 21:48:19 +0000 (21:48 +0000)
perl/ARRL/DX.pm
perl/dbquery.pl [new file with mode: 0755]
perl/dbtest.pl

index 924ed6723b29fcffc1edb6195e4c828bceb5c928..b2e52a86b42fcf89bbf2131c021f0ec7d0845dda 100644 (file)
@@ -10,7 +10,7 @@ use strict;
 
 package ARRL::DX;
 
-use vars qw($VERSION $BRANCH $dbh $dbname %tabledefs $error);
+use vars qw($VERSION $BRANCH $dbh $dbname %tabledefs $error %stop $limit);
 
 main::mkver($VERSION = q$Revision$) if main->can('mkver');
 
@@ -30,14 +30,65 @@ $dbname = "$main::root/data/arrldx.db";
                          refer_t_idx => 'CREATE INDEX refer_t_idx ON refer(t DESC)',
                         );
 
-sub new
+%stop = (
+                A => 1,
+                ACTIVITY => 1,
+                AND => 1,
+                ARE => 1,
+                AS => 1,
+                AT => 1,
+                BE => 1,
+                BUT => 1,
+                FOR => 1,
+                FROM => 1,
+                HAS => 1,
+                HAVE => 1,
+                HE => 1,
+                I => 1,
+                IF => 1,
+                IN => 1,
+                IS => 1,
+                IT => 1,
+                LOOK => 1,
+                LOOKS => 1,
+                NOT => 1,
+                OF => 1,
+                ON => 1,
+                OR => 1,
+                OUT => 1,
+                SHE => 1,
+                SO => 1,
+                THAT => 1,
+                THE => 1,
+                THEM => 1,
+                THEY => 1,
+                THIS => 1,
+                THIS => 1,
+                TO => 1,
+                WAS => 1,
+                WHERE => 1,
+                WILL => 1,
+                WITH => 1,
+                YOU => 1,
+
+                JANUARY => 1,
+                FEBRUARY => 1,
+                MARCH => 1,
+                APRIL => 1,
+                MAY => 1,
+                JUNE => 1,
+                JULY => 1,
+                AUGUST => 1,
+                SEPTEMBER => 1,
+                OCTOBER => 1,
+                NOVEMBER => 1,
+                DECEMBER => 1,
+               );
+
+$limit = 10;
+
+sub do_connect
 {
-       my $pkg = shift;
-       my $class = ref $pkg || $pkg;
-       my %args = @_;
-       
-       $error = undef;
-       
        unless ($dbh) {
                $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "");
                unless ($dbh) {
@@ -46,18 +97,49 @@ sub new
                        $error = $DBI::errstr;
                        return;
                }
-               
-               # check that all the tables are present and correct
-               my $sth = $dbh->prepare("select name,type from sqlite_master") or $error = $DBI::errstr, return;
-               $sth->execute or $error = $DBI::errstr, return;
-               my %f;
-               while (my @row = $sth->fetchrow_array) {
-                       $f{$row[0]} = $row[1];
-               }
-               foreach my $t (sort keys %tabledefs) {
-                       $dbh->do($tabledefs{$t}) unless exists $f{$t};
-               }
-               $sth->finish;
+       }
+       return $dbh;
+}
+
+sub drop
+{
+       return unless do_connect();
+       my $sth = $dbh->prepare("select name,type from sqlite_master where type = 'table'") or $error = $DBI::errstr, return;
+       $sth->execute or $error = $DBI::errstr, return;
+       while (my @row = $sth->fetchrow_array) {
+               $dbh->do("drop table $row[0]");
+       }
+       $sth->finish;
+}
+
+sub create
+{
+       return unless $dbh;
+       
+       # check that all the tables are present and correct
+       my $sth = $dbh->prepare("select name,type from sqlite_master") or $error = $DBI::errstr, return;
+       $sth->execute or $error = $DBI::errstr, return;
+       my %f;
+       while (my @row = $sth->fetchrow_array) {
+               $f{$row[0]} = $row[1];
+       }
+       foreach my $t (sort keys %tabledefs) {
+               $dbh->do($tabledefs{$t}) unless exists $f{$t};
+       }
+       $sth->finish;
+}
+
+sub new
+{
+       my $pkg = shift;
+       my $class = ref $pkg || $pkg;
+       my %args = @_;
+       
+       $error = undef;
+       
+       unless ($dbh) {
+               return unless do_connect();
+               create();
        }
 
        my $self = {};
@@ -96,6 +178,7 @@ sub process
                } elsif ($state eq 'id') {
                        if (/^Newington\s+CT\s+(\w+)\s+(\d+),\s+(\d+)/i) {
                                $state = 'date' ;
+                               $self->{year} = $3;
                                $self->{date} = str2time("$1 $2 $3") if $state eq 'date';
                        }
                } elsif ($state eq 'date') {
@@ -139,8 +222,66 @@ sub insert
 {
        my $self = shift;
        my $sth = $dbh->prepare("insert into paragraph values(?,?,?)");
-       $sth->execute($self->{para}, $self->{date}, $self->{id});
+       $sth->execute($self->{para}, $self->{date}, "$self->{year}-$self->{id}");
        my $lastrow = $dbh->func('last_insert_rowid');
+       $sth->finish;
+       
+
+#      my @w = split /[.,;:\s"'\$\%!£^&\*\(\)\[\]\{\}\#\<\>+=]+/, $self->{para};
+       my @w = split m|[\b\s]+|, $self->{para};
+#      print join(' ', @w), "\n";
+       $sth = $dbh->prepare("insert into refer values(?,?,?,?)");
+       
+       my $i = 0;
+       for (@w) {
+               
+               # starts with a capital letter that isn't Q
+               if (/^[A-PR-Z]/ || m|\d+[A-Z][-/A-Z0-9]*$|) {
+                       # not all digits
+                       next if /^\d+$/;
+                       
+                       # isn't a stop word
+                       my $w = uc;
+                       $w =~ s/\W+$//;
+                       unless ($stop{$w}) {
+                               # add it into the word list
+                               $sth->execute($w, $lastrow, $self->{date}, $i);
+#                              print " $w";
+                       }
+               }
+               $i++;
+       }
+       $sth->finish;
+}
+
+sub query
+{
+       my $self = shift;
+       my %args = @_;
+       my @out;
+       
+       if ($args{'q'}) {
+        my @w = map { s|[^-/\w]||g; uc $_ } split /\s+/, $args{'q'};
+               if (@w) {
+                       my $s = qq{select distinct p, t, bullid from (select distinct rowid from refer where };
+                       while (@w) {
+                               my $w = shift @w;
+                               $s .= qq{r like '$w\%'};
+                               $s .= ' or ' if @w;
+                       }
+                       my $l = $args{l}; 
+                       $l =~ s/[^\d]//g;
+                       $l ||= $limit;
+                       $s .= qq{ order by t desc limit $l), paragraph where paragraph.ROWID=rowid};
+                       my $sth = $dbh->prepare($s) or $error = $DBI::errstr, return @out;
+                       $sth->execute;
+                       while (my @row = $sth->fetchrow_array) {
+                               push @out, \@row;
+                       }
+                       $sth->finish;
+               }
+       }
+       return @out;
 }
 
 sub close
diff --git a/perl/dbquery.pl b/perl/dbquery.pl
new file mode 100755 (executable)
index 0000000..2ac16d1
--- /dev/null
@@ -0,0 +1,41 @@
+#!/usr/bin/perl
+# test for independent sql servers
+# search local then perl directories
+
+use vars qw($root);
+
+BEGIN {
+       # root of directory tree for this system
+       $root = "/spider"; 
+       $root = $ENV{'DXSPIDER_ROOT'} if $ENV{'DXSPIDER_ROOT'};
+       
+       unshift @INC, "$root/perl";     # this IS the right way round!
+       unshift @INC, "$root/local";
+}
+
+use DXUtil;
+use DXDebug;
+use ARRL::DX;
+
+print "usage:\tdbquery.pl <words>\n\teg: dbquery.pl rtty lebanon\n\n" unless @ARGV;
+
+my $width = $ENV{'COLUMNS'} || $ENV{'COLS'} || 80;
+my $dx = ARRL::DX->new();
+my @out = $dx->query(q=>join(' ', @ARGV));
+
+foreach my $ref (@out) {
+       my $s = cldate($ref->[1]);
+       for (split /\s+/, "$ref->[0] [$ref->[2]]") {
+               if (length($s) + length($_) + 1 < $width ) {
+                       $s .= ' ' if length $s;
+                       $s .= $_;
+               } else {
+                       print "$s\n";
+                       $s = $_;
+               }
+       }
+       print "$s\n" if length $s;
+       print "\n";
+}
+
+exit 0;
index c4f5ca8e9d259a08cd016ce1530dc3b246654306..6951c76dbf041276d93227e3d8b54c390e6bf302 100755 (executable)
@@ -20,6 +20,10 @@ use ARRL::DX;
 
 while (@ARGV) {
        my $fn = shift;
+       if ($fn eq '-d') {
+               ARRL::DX::drop();
+               next;
+       }
        print "Processing $fn ";
        my $dx = ARRL::DX->new(file=>$fn);
        my $c = $dx->process;