From: minima Date: Wed, 23 Aug 2000 13:59:16 +0000 (+0000) Subject: added DXDupe for persistant dupes (and to allow dup checking for other X-Git-Tag: R_1_44~21 X-Git-Url: http://dxcluster.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=44bab9382ffb0bf12639af84729d1c42ac4d9ae2;p=spider.git added DXDupe for persistant dupes (and to allow dup checking for other things as well - as required) --- diff --git a/Changes b/Changes index 80f46a0e..c3797703 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,6 @@ +23Aug00======================================================================= +1. Added persistant dupe file so that all dupes are stored here (including +announces) - announces are now kept for 5 days (as default). 20Aug00======================================================================= 1. Added system Alias for set/nodxgrid => unset/dxgrid 2. Add full individual checking for all PC protocol fields in all messages diff --git a/perl/AnnTalk.pm b/perl/AnnTalk.pm index 911c2724..987b885d 100644 --- a/perl/AnnTalk.pm +++ b/perl/AnnTalk.pm @@ -12,48 +12,28 @@ use strict; use DXUtil; use DXDebug; +use DXDupe; use vars qw(%dup $duplth $dupage); -%dup = (); # the duplicates hash $duplth = 60; # the length of text to use in the deduping -$dupage = 24*3600; # the length of time to hold spot dups +$dupage = 5*24*3600; # the length of time to hold spot dups # enter the spot for dup checking and return true if it is already a dup sub dup { my ($call, $to, $text) = @_; - my $d = $main::systime; chomp $text; unpad($text); $text = substr($text, 0, $duplth) if length $text > $duplth; - my $dupkey = "$to|$text"; - return 1 if exists $dup{$dupkey}; - $dup{$dupkey} = $d; # in seconds (to the nearest minute) - return 0; -} - -# called every hour and cleans out the dup cache -sub process -{ - my $cutoff = $main::systime - $dupage; - while (my ($key, $val) = each %dup) { - delete $dup{$key} if $val < $cutoff; - } + my $dupkey = "A$to|$text"; + return DXDupe::check($dupkey, $main::systime + $dupage); } sub listdups { - my $regex = shift; - $regex = '.*' unless $regex; - $regex =~ s/[\$\@\%]//g; - my @out; - for (sort { $dup{$a} <=> $dup{$b} } grep { m{$regex}i } keys %dup) { - my $val = $dup{$_}; - push @out, "$_ = " . cldatetime($val); - } - return @out; + return DXDupe::listdups('A', $dupage, @_); } diff --git a/perl/DXDupe.pm b/perl/DXDupe.pm new file mode 100644 index 00000000..2ab0ca8c --- /dev/null +++ b/perl/DXDupe.pm @@ -0,0 +1,84 @@ +# +# class to handle all dupes in the system +# +# each dupe entry goes into a tied hash file +# +# the only thing this class really does is provide a +# mechanism for storing and checking dups +# + +package DXDupe; + +use DXDebug; +use DXUtil; +use DXVars; + +use vars qw{$lasttime $dbm %d $default $fn}; + +$default = 48*24*60*60; +$lasttime = 0; +$fn = "$main::data/dupefile"; + +sub init +{ + $dbm = tie (%d, 'DB_File', $fn) or confess "can't open dupe file: $fn ($!)"; +} + +sub finish +{ + undef $dbm; + untie %d; +} + +sub check +{ + my ($s, $t) = @_; + return 1 if exists $d{$s}; + $t = $main::systime + $default unless $t; + $d{$s} = $t; + return 0; +} + +sub del +{ + my $s = shift; + delete $d{$s}; +} + +sub process +{ + # once an hour + if ($main::systime - $lasttime >= 3600) { + while (($k, $v) = each %d) { + delete $d{$k} if $main::systime >= $v; + } + $lasttime = $main::systime; + } +} + +sub get +{ + my $start = shift; + my @out; + while (($k, $v) = each %d) { + push @out, $k, $v if !$start || $k =~ /^$start/; + } + return @out; +} + +sub listdups +{ + my $let = shift; + my $dupage = shift; + my $regex = shift; + + $regex =~ s/[\^\$\@\%]//g; + $regex = "^$let" . $regex; + my @out; + for (sort { $d{$a} <=> $d{$b} } grep { m{$regex}i } keys %d) { + my ($dum, $key) = unpack "a1a*", $_; + push @out, "$key = " . cldatetime($d{$_} - $dupage); + } + return @out; +} +1; diff --git a/perl/DXProt.pm b/perl/DXProt.pm index 09708332..e7f3c7c0 100644 --- a/perl/DXProt.pm +++ b/perl/DXProt.pm @@ -175,16 +175,16 @@ sub init $me->{state} = "indifferent"; do "$main::data/hop_table.pl" if -e "$main::data/hop_table.pl"; confess $@ if $@; - # $me->{sort} = 'M'; # M for me + $me->{sort} = 'S'; # S for spider # now prime the spot and wwv duplicates file with data - my @today = Julian::unixtoj(time); - for (Spot::readfile(@today), Spot::readfile(Julian::sub(@today, 1))) { - Spot::dup(@{$_}[0..3]); - } - for (Geomag::readfile(time)) { - Geomag::dup(@{$_}[1..5]); - } +# my @today = Julian::unixtoj(time); +# for (Spot::readfile(@today), Spot::readfile(Julian::sub(@today, 1))) { +# Spot::dup(@{$_}[0..3]); +# } +# for (Geomag::readfile(time)) { +# Geomag::dup(@{$_}[1..5]); +# } # load the baddx file do "$baddxfn" if -e "$baddxfn"; @@ -1027,9 +1027,9 @@ sub process my $val; my $cutoff; if ($main::systime - 3600 > $last_hour) { - Spot::process; - Geomag::process; - AnnTalk::process; +# Spot::process; +# Geomag::process; +# AnnTalk::process; $last_hour = $main::systime; } } diff --git a/perl/Geomag.pm b/perl/Geomag.pm index 05aefeff..037dcc50 100644 --- a/perl/Geomag.pm +++ b/perl/Geomag.pm @@ -16,11 +16,12 @@ use DXLog; use Julian; use IO::File; use DXDebug; +use DXDupe; use strict; use vars qw($date $sfi $k $a $r $forecast @allowed @denied $fp $node $from $dirprefix $param - %dup $duplth $dupage); + $duplth $dupage); $fp = 0; # the DXLog fcb $date = 0; # the unix time of the WWV (notional) @@ -33,7 +34,6 @@ $node = ""; # originating node $from = ""; # who this came from @allowed = (); # if present only these callsigns are regarded as valid WWV updators @denied = (); # if present ignore any wwv from these callsigns -%dup = (); # the spot duplicates hash $duplth = 20; # the length of text to use in the deduping $dupage = 12*3600; # the length of time to hold spot dups @@ -252,34 +252,13 @@ sub dup # dump if too old return 2 if $d < $main::systime - $dupage; -# chomp $text; -# $text = substr($text, 0, $duplth) if length $text > $duplth; - my $dupkey = "$d|$sfi|$k|$a"; - return 1 if exists $dup{$dupkey}; - $dup{$dupkey} = $d; # in seconds (to the nearest minute) - return 0; -} - -# called every hour and cleans out the dup cache -sub process -{ - my $cutoff = $main::systime - $dupage; - while (my ($key, $val) = each %dup) { - delete $dup{$key} if $val < $cutoff; - } + my $dupkey = "W$d|$sfi|$k|$a"; + return DXDupe::check($dupkey, $main::systime+$dupage); } sub listdups { - my $regex = shift; - $regex = '.*' unless $regex; - $regex =~ s/[\$\@\%]//g; - my @out; - for (sort { $dup{$a} <=> $dup{$b} } grep { m{$regex}i } keys %dup) { - my $val = $dup{$_}; - push @out, "$_ = " . cldatetime($val); - } - return @out; + return DXDupe::listdups('W', $dupage, @_); } 1; __END__; diff --git a/perl/Spot.pm b/perl/Spot.pm index 1e7de69a..e7a619e0 100644 --- a/perl/Spot.pm +++ b/perl/Spot.pm @@ -15,16 +15,16 @@ use DXUtil; use DXLog; use Julian; use Prefix; +use DXDupe; use strict; -use vars qw($fp $maxspots $defaultspots $maxdays $dirprefix %dup $duplth $dupage); +use vars qw($fp $maxspots $defaultspots $maxdays $dirprefix $duplth $dupage); $fp = undef; $maxspots = 50; # maximum spots to return $defaultspots = 10; # normal number of spots to return $maxdays = 35; # normal maximum no of days to go back $dirprefix = "spots"; -%dup = (); # the spot duplicates hash $duplth = 20; # the length of text to use in the deduping $dupage = 3*3600; # the length of time to hold spot dups @@ -215,32 +215,13 @@ sub dup chomp $text; $text = substr($text, 0, $duplth) if length $text > $duplth; unpad($text); - my $dupkey = "$freq|$call|$d|$text"; - return 1 if exists $dup{$dupkey}; - $dup{$dupkey} = $d; # in seconds (to the nearest minute) - return 0; -} - -# called every hour and cleans out the dup cache -sub process -{ - my $cutoff = $main::systime - $dupage; - while (my ($key, $val) = each %dup) { - delete $dup{$key} if $val < $cutoff; - } + my $dupkey = "X$freq|$call|$d|$text"; + return DXDupe::check($dupkey, $main::systime+$dupage); } sub listdups { - my $regex = shift; - $regex = '.*' unless $regex; - $regex =~ s/[\$\@\%]//g; - my @out; - for (sort { $dup{$a} <=> $dup{$b} } grep { m{$regex}i } keys %dup) { - my $val = $dup{$_}; - push @out, "$_ = " . cldatetime($val); - } - return @out; + return DXDupe::listdups('X', $dupage, @_); } 1; diff --git a/perl/WCY.pm b/perl/WCY.pm index 20b6a184..f3202dd4 100644 --- a/perl/WCY.pm +++ b/perl/WCY.pm @@ -20,7 +20,7 @@ use Data::Dumper; use strict; use vars qw($date $sfi $k $expk $a $r $sa $gmf $au @allowed @denied $fp $node $from $dirprefix $param - %dup $duplth $dupage); + $duplth $dupage); $fp = 0; # the DXLog fcb $date = 0; # the unix time of the WWV (notional) @@ -35,7 +35,6 @@ $node = ""; # originating node $from = ""; # who this came from @allowed = (); # if present only these callsigns are regarded as valid WWV updators @denied = (); # if present ignore any wwv from these callsigns -%dup = (); # the spot duplicates hash $duplth = 20; # the length of text to use in the deduping $dupage = 12*3600; # the length of time to hold spot dups @@ -227,34 +226,13 @@ sub dup # dump if too old return 2 if $d < $main::systime - $dupage; -# chomp $text; -# $text = substr($text, 0, $duplth) if length $text > $duplth; - my $dupkey = "$d|$sfi|$k|$a|$r"; - return 1 if exists $dup{$dupkey}; - $dup{$dupkey} = $d; # in seconds (to the nearest minute) - return 0; -} - -# called every hour and cleans out the dup cache -sub process -{ - my $cutoff = $main::systime - $dupage; - while (my ($key, $val) = each %dup) { - delete $dup{$key} if $val < $cutoff; - } + my $dupkey = "C$d|$sfi|$k|$a|$r"; + return DXDupe::check($dupkey, $main::systime+$dupage); } sub listdups { - my $regex = shift; - $regex = '.*' unless $regex; - $regex =~ s/[\$\@\%]//g; - my @out; - for (sort { $dup{$a} <=> $dup{$b} } grep { m{$regex}i } keys %dup) { - my $val = $dup{$_}; - push @out, "$_ = " . cldatetime($val); - } - return @out; + return DXDupe::listdups('C', $dupage, @_); } 1; __END__; diff --git a/perl/cluster.pl b/perl/cluster.pl index ec1030e8..dfae3278 100755 --- a/perl/cluster.pl +++ b/perl/cluster.pl @@ -61,6 +61,7 @@ use Filter; use DXDb; use AnnTalk; use WCY; +use DXDupe; use Data::Dumper; use Fcntl ':flock'; @@ -219,6 +220,7 @@ sub cease Msg->event_loop(1, 0.05); Msg->event_loop(1, 0.05); DXUser::finish(); + DXDupe::finish(); # close all databases DXDb::closeall; @@ -340,6 +342,9 @@ for (keys %SIG) { } } +# start dupe system +DXDupe::init(); + # read in system messages DXM->init(); @@ -410,6 +415,8 @@ for (;;) { DXMsg::process(); DXDb::process(); DXUser::process(); + DXDupe::process(); + eval { Local::process(); # do any localised processing };