projects
/
spider.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
50c5cbb
)
add a literal dupe check for the comments in spots
author
minima
<minima>
Mon, 29 Aug 2005 17:31:02 +0000
(17:31 +0000)
committer
minima
<minima>
Mon, 29 Aug 2005 17:31:02 +0000
(17:31 +0000)
Changes
patch
|
blob
|
history
perl/Spot.pm
patch
|
blob
|
history
diff --git
a/Changes
b/Changes
index 73e31c126d66317d00219ab05ec55bacd973729b..770879b7ea60bcc2a067eab516e72eb507c4f105 100644
(file)
--- a/
Changes
+++ b/
Changes
@@
-1,3
+1,7
@@
+29Aug05=======================================================================
+1. try something else to resolve/prevent more of these spot dupes. It appears
+that there is software out there chopping off variable amounts from the ends
+of comment fields. This makes the task of detecting dupes that much harder.
10Aug05=======================================================================
1. trim dx spot duplicate length after normalisation
2. Do the same for announces
10Aug05=======================================================================
1. trim dx spot duplicate length after normalisation
2. Do the same for announces
diff --git
a/perl/Spot.pm
b/perl/Spot.pm
index d52575afbcc8ce25a1870aff607e8da1356c1443..f331d105b540ae1cd2e4b414c631ffcfbf97eca4 100644
(file)
--- a/
perl/Spot.pm
+++ b/
perl/Spot.pm
@@
-343,6
+343,7
@@
sub dup
$text = "" if $cty == $try[0];
}
}
$text = "" if $cty == $try[0];
}
}
+ my $otext = $text;
$text = pack("C*", map {$_ & 127} unpack("C*", $text));
$text =~ s/[^\w]//g;
$text = substr($text, 0, $duplth) if length $text > $duplth;
$text = pack("C*", map {$_ & 127} unpack("C*", $text));
$text =~ s/[^\w]//g;
$text = substr($text, 0, $duplth) if length $text > $duplth;
@@
-350,6
+351,11
@@
sub dup
my $t = DXDupe::find($ldupkey);
return 1 if $t && $t - $main::systime > 0;
DXDupe::add($ldupkey, $main::systime+$dupage);
my $t = DXDupe::find($ldupkey);
return 1 if $t && $t - $main::systime > 0;
DXDupe::add($ldupkey, $main::systime+$dupage);
+ $otext = substr($otext, 0, $duplth) if length $otext > $duplth;
+ $ldupkey = "X$freq|$call|$by|$otext";
+ $t = DXDupe::find($ldupkey);
+ return 1 if $t && $t - $main::systime > 0;
+ DXDupe::add($ldupkey, $main::systime+$dupage);
# my $sdupkey = "X$freq|$call|$by";
# $t = DXDupe::find($sdupkey);
# return 1 if $t && $t - $main::systime > 0;
# my $sdupkey = "X$freq|$call|$by";
# $t = DXDupe::find($sdupkey);
# return 1 if $t && $t - $main::systime > 0;