X-Git-Url: http://dxcluster.org/gitweb/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=perl%2FDXDupe.pm;h=1dd7c700ff9fdbc0fc40202e6c6a7df328a79d0a;hb=033dfa11732db0ca660bf171bc21650c24fa6040;hp=bcc0f7b0ea843a0aeaf080781a4a1355b7c8db2b;hpb=3634fba90a64fe488d237f438d9945d81158da52;p=spider.git diff --git a/perl/DXDupe.pm b/perl/DXDupe.pm index bcc0f7b0..1dd7c700 100644 --- a/perl/DXDupe.pm +++ b/perl/DXDupe.pm @@ -21,13 +21,20 @@ $fn = "$main::data/dupefile"; use vars qw($VERSION $BRANCH); $VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ ); -$BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ ) || 0; +$BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ || (0,0)); $main::build += $VERSION; $main::branch += $BRANCH; sub init { - $dbm = tie (%d, 'DB_File', $fn) or confess "can't open dupe file: $fn ($!)"; + $dbm = tie (%d, 'DB_File', $fn); + unless ($dbm) { + eval { untie %d }; + dbg("Dupefile $fn corrupted, removing..."); + unlink $fn; + $dbm = tie (%d, 'DB_File', $fn) or confess "can't open dupe file: $fn ($!)"; + confess "cannot open $fn $!" unless $dbm; + } } sub finish @@ -39,21 +46,21 @@ sub finish sub check { - my ($s, $t) = @_; + my $s = shift; return 1 if find($s); - add($s, $t); + add($s, shift); return 0; } sub find { - return 1 if exists $d{$_[0]}; + return $d{$_[0]}; } sub add { - my ($s, $t) = @_; - $t = $main::systime + $default unless $t; + my $s = shift; + my $t = shift || $main::systime + $default; $d{$s} = $t; } @@ -67,9 +74,11 @@ sub process { # once an hour if ($main::systime - $lasttime >= 3600) { + my @del; while (($k, $v) = each %d) { - delete $d{$k} if $main::systime >= $v; + push @del, $k if $main::systime >= $v; } + delete $d{$_} for @del; $lasttime = $main::systime; } } @@ -96,7 +105,7 @@ sub listdups my @out; for (sort { $d{$a} <=> $d{$b} } grep { m{$regex}i } keys %d) { my ($dum, $key) = unpack "a1a*", $_; - push @out, "$key = " . cldatetime($d{$_} - $dupage); + push @out, "$key = " . cldatetime($d{$_} - $dupage) . " expires " . cldatetime($d{$_}); } return @out; }