add LRU caching
[spider.git] / perl / Prefix.pm
index 7e0eeb7a7c38538128c9f78e2367d293b37f09a2..a07ebdbdde8ff5dc8fd727bd5784829b1729001c 100644 (file)
@@ -14,7 +14,7 @@ use DB_File;
 use Data::Dumper;
 use DXDebug;
 use DXUtil;
-
+use LRU;
 
 use strict;
 
@@ -24,26 +24,33 @@ $BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/  || (0,0))
 $main::build += $VERSION;
 $main::branch += $BRANCH;
 
-use vars qw($db  %prefix_loc %pre);
+use vars qw($db %prefix_loc %pre $lru $lrusize $misses $hits $matchtotal);
 
 $db = undef;                                   # the DB_File handle
 %prefix_loc = ();                              # the meat of the info
 %pre = ();                                             # the prefix list
+$hits = $misses = $matchtotal = 1;             # cache stats
+$lrusize = 1000;                               # size of prefix LRU cache
+
+$lru = LRU->newbase('Prefix', $lrusize);
 
 sub load
 {
+       # untie every thing
        if ($db) {
                undef $db;
                untie %pre;
                %pre = ();
                %prefix_loc = ();
        }
-       $db = tie(%pre, "DB_File", undef, O_RDWR|O_CREAT, 0666, $DB_BTREE) or confess "can't tie \%pre ($!)";  
+
+       # tie the main prefix database
+       $db = tie(%pre, "DB_File", undef, O_RDWR|O_CREAT, 0664, $DB_BTREE) or confess "can't tie \%pre ($!)";  
        my $out = $@ if $@;
        do "$main::data/prefix_data.pl" if !$out;
        $out = $@ if $@;
-       #  print Data::Dumper->Dump([\%pre, \%prefix_loc], [qw(pre prefix_loc)]);
-       return $out;
+
+       return $out;
 }
 
 sub store
@@ -97,20 +104,16 @@ sub store
 # This routine will only do what you ask for, if you wish to be intelligent
 # then that is YOUR problem!
 #
+
 sub get
 {
        my $key = shift;
-       my @out;
-       my @outref;
        my $ref;
-       my $gotkey;
-  
-       $gotkey = $key;
+       my $gotkey = $key;
        return () if $db->seq($gotkey, $ref, R_CURSOR);
        return () if $key ne substr $gotkey, 0, length $key;
 
-       @outref = map { $prefix_loc{$_} } split ',', $ref;
-       return ($gotkey, @outref);
+       return ($gotkey,  map { $prefix_loc{$_} } split ',', $ref);
 }
 
 #
@@ -120,16 +123,13 @@ sub get
 sub next
 {
        my $key = shift;
-       my @out;
-       my @outref;
        my $ref;
        my $gotkey;
   
        return () if $db->seq($gotkey, $ref, R_NEXT);
        return () if $key ne substr $gotkey, 0, length $key;
   
-       @outref = map { $prefix_loc{$_} } split ',', $ref;
-       return ($gotkey, @outref);
+       return ($gotkey, map { $prefix_loc{$_} } split ',', $ref);
 }
 
 # 
@@ -140,16 +140,34 @@ sub next
 sub matchprefix
 {
        my $pref = shift;
+       my @partials;
 
        for (my $i = length $pref; $i; $i--) {
+               $matchtotal++;
                my $s = substr($pref, 0, $i);
-               my @out = get($s);
-               if (isdbg('prefix')) {
-                       my $part = $out[0] || "*";
-                       $part .= '*' unless $part eq '*' || $part eq $s;
-                       dbg("Partial prefix: $pref $s $part" );
-               } 
-               return @out if @out && $out[0] eq $s;
+               push @partials, $s;
+               my $p = $lru->get($s);
+               if ($p) {
+                       $hits++;
+                       if (isdbg('prefix')) {
+                               my $percent = sprintf "%.1f", $hits * 100 / $misses;
+                               dbg("Partial Prefix Cache Hit: $s Hits: $hits/$misses of $matchtotal = $percent\%");
+                       }
+                       $lru->put($_, $p) for @partials;
+                       return @$p;
+               } else {
+                       $misses++;
+                       my @out = get($s);
+                       if (isdbg('prefix')) {
+                               my $part = $out[0] || "*";
+                               $part .= '*' unless $part eq '*' || $part eq $s;
+                               dbg("Partial prefix: $pref $s $part" );
+                       } 
+                       if (@out && $out[0] eq $s) {
+                               $lru->put($_, \@out) for @partials;
+                               return @out;
+                       } 
+               }
        }
        return ();
 }
@@ -169,14 +187,32 @@ sub extract
        my $p;
        my @parts;
        my ($call, $sp, $i);
-  
+
 LM:    foreach $call (split /,/, $calls) {
-               # first check if the whole thing succeeds
-               my @nout = get($call);
-               if (@nout && $nout[0] eq $call) {
-                       dbg("got exact prefix: $nout[0]") if isdbg('prefix');
-                       push @out, @nout;
+
+               # first check if the whole thing succeeds either because it is cached
+               # or because it simply is a stored prefix as callsign (or even a prefix)
+               $matchtotal++;
+               $call =~ s/-\d+$//;             # ignore SSIDs
+               my $p = $lru->get($call);
+               my @nout;
+               if ($p) {
+                       $hits++;
+                       if (isdbg('prefix')) {
+                               my $percent = sprintf "%.1f", $hits * 100 / $misses;
+                               dbg("Prefix Cache Hit: $call Hits: $hits/$misses of $matchtotal = $percent\%");
+                       }
+                       push @out, @$p;
                        next;
+               } else {
+                       @nout =  get($call);
+                       if (@nout && $nout[0] eq $call) {
+                               $misses++;
+                               $lru->put($call, \@nout);
+                               dbg("got exact prefix: $nout[0]") if isdbg('prefix');
+                               push @out, @nout;
+                               next;
+                       }
                }
 
                # now split the call into parts if required
@@ -192,6 +228,8 @@ LM: foreach $call (split /,/, $calls) {
                        @nout = get($s);
                        if (@nout && $nout[0] eq $s) {
                                dbg("got exact multipart prefix: $call $s") if isdbg('prefix');
+                               $misses++;
+                               $lru->put($call, \@nout);
                                push @out, @nout;
                                next;
                        }
@@ -210,6 +248,8 @@ LM: foreach $call (split /,/, $calls) {
                                my @try = get($s);
                                if (@try && $try[0] eq $s) {
                                        dbg("got 3 part prefix: $call $s") if isdbg('prefix');
+                                       $misses++;
+                                       $lru->put($call, \@try);
                                        push @out, @try;
                                        next;
                                }
@@ -231,6 +271,8 @@ LM: foreach $call (split /,/, $calls) {
                                my @try = get($s);
                                if (@try && $try[0] eq $s) {
                                        dbg("got 2 part prefix: $call $s") if isdbg('prefix');
+                                       $misses++;
+                                       $lru->put($call, \@try);
                                        push @out, @try;
                                        next;
                                }
@@ -244,7 +286,9 @@ LM: foreach $call (split /,/, $calls) {
                if (@parts == 1) {
                        @nout = matchprefix($parts[0]);
                        if (@nout) {
-                               dbg("got prefix: $call ]") if isdbg('prefix');
+                               dbg("got prefix: $call = $nout[0]") if isdbg('prefix');
+                               $misses++;
+                               $lru->put($call, \@nout);
                                push @out, @nout;
                                next;
                        }
@@ -288,11 +332,17 @@ L1:               for ($n = 0; $n < @parts; $n++) {
                                                dbg("Compound prefix: $try $part" );
                                        }
                                        if (@try && $try eq $try[0]) {
+                                               $misses++;
+                                               $lru->put($call, \@try);
                                                push @out, @try;
                                        } else {
+                                               $misses++;
+                                               $lru->put($call, \@nout);
                                                push @out, @nout;
                                        }
                                } else {
+                                       $misses++;
+                                       $lru->put($call, \@nout);
                                        push @out, @nout;
                                }
                                next LM;
@@ -300,10 +350,13 @@ L1:               for ($n = 0; $n < @parts; $n++) {
                }
 
                # we are a pirate!
-               push @out, matchprefix('Q');
+               @nout = matchprefix('Q');
+               $misses++;
+               $lru->put($call, \@nout);
+               push @out, @nout;
        }
        
-       if (isdbg('prefix')) {
+       if (isdbg('prefixdata')) {
                my $dd = new Data::Dumper([ \@out ], [qw(@out)]);
                dbg($dd->Dumpxs);
        }