X-Git-Url: http://dxcluster.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=perl%2FAnnTalk.pm;h=341857450be90e8e45dd5003fed742875d645682;hb=f21f292746ef4a2edc703e48542d1ed2d85d14cd;hp=987b885d05bb06403559192714c947cc8ef4ac27;hpb=44bab9382ffb0bf12639af84729d1c42ac4d9ae2;p=spider.git diff --git a/perl/AnnTalk.pm b/perl/AnnTalk.pm index 987b885d..34185745 100644 --- a/perl/AnnTalk.pm +++ b/perl/AnnTalk.pm @@ -13,12 +13,14 @@ use strict; use DXUtil; use DXDebug; use DXDupe; +use DXVars; use vars qw(%dup $duplth $dupage); $duplth = 60; # the length of text to use in the deduping $dupage = 5*24*3600; # the length of time to hold spot dups + # enter the spot for dup checking and return true if it is already a dup sub dup { @@ -26,8 +28,9 @@ sub dup chomp $text; unpad($text); + $text =~ s/[^a-zA-Z0-9]//g; $text = substr($text, 0, $duplth) if length $text > $duplth; - my $dupkey = "A$to|$text"; + my $dupkey = "A$to|\L$text"; return DXDupe::check($dupkey, $main::systime + $dupage); }