From: Dirk Koopman Date: Mon, 18 Jun 2007 14:18:25 +0000 (+0100) Subject: use Encode to see whether this reduces dupes X-Git-Tag: 1.55~160^2~4 X-Git-Url: http://dxcluster.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=495e620d66fbc1b8c64dd65ff8d89fc43a2e6f08;p=spider.git use Encode to see whether this reduces dupes --- diff --git a/Changes b/Changes index edf2bfe3..372380a2 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,6 @@ +18Jun06======================================================================= +1. add a optional dependency on Encode (included in 5.8.x) to encode strings +to latin1 for deduping purposes, hopefully getting rid of some dupes. 13Jun06======================================================================= 1. start using git. 2. change all the version / build numbering. @@ -351,7 +354,7 @@ line. Also added sh/fdx as an alias. 01Jan04======================================================================= 1. move position of USDB init to get rid of an obscure bug pointed out by Charlie K1XX. -2. Remove redundant documentation +2. Remove redundant documentation. 31Dec03======================================================================= 1. alter remote database handling to 'new standard'. 30Dec03======================================================================= diff --git a/perl/AnnTalk.pm b/perl/AnnTalk.pm index 443e7442..0090eb04 100644 --- a/perl/AnnTalk.pm +++ b/perl/AnnTalk.pm @@ -47,6 +47,7 @@ sub dup chomp $text; unpad($text); $text =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg; + $text = Encode::encode("iso-8859-1", $text) if $main::can_encode && Encode::is_utf8($text, 1); $text = pack("C*", map {$_ & 127} unpack("C*", $text)); $text =~ s/[^\#a-zA-Z0-9]//g; $text = substr($text, 0, $duplth) if length $text > $duplth; diff --git a/perl/Spot.pm b/perl/Spot.pm index ed78b40c..a87b9c85 100644 --- a/perl/Spot.pm +++ b/perl/Spot.pm @@ -21,7 +21,7 @@ use QSL; use strict; -use vars qw($fp $statp $maxspots $defaultspots $maxdays $dirprefix $duplth $dupage $filterdef $totalspots $hfspots $vhfspots $maxcalllth); +use vars qw($fp $statp $maxspots $defaultspots $maxdays $dirprefix $duplth $dupage $filterdef $totalspots $hfspots $vhfspots $maxcalllth $can_encode); $fp = undef; $statp = undef; @@ -402,6 +402,7 @@ sub dup } } my $otext = $text; + $text = Encode::encode("iso-8859-1", $text) if $main::can_encode && Encode::is_utf8($text, 1); $text =~ s/^\+\w+\s*//; # remove leading LoTW callsign $text = pack("C*", map {$_ & 127} unpack("C*", $text)); $text =~ s/\s{2,}[\dA-Z]?[A-Z]\d?$// if length $text > 24; diff --git a/perl/cluster.pl b/perl/cluster.pl index 3c4548b6..d7dd9a0c 100755 --- a/perl/cluster.pl +++ b/perl/cluster.pl @@ -118,6 +118,7 @@ use vars qw(@inqueue $systime $starttime $lockfn @outstanding_connects $zombies $root @listeners $lang $myalias @debug $userfn $clusteraddr $clusterport $mycall $decease $is_win $routeroot $me $reqreg $bumpexisting $allowdxby $dbh $dsn $dbuser $dbpass $do_xml $systime_days $systime_daystart + $can_encode ); @inqueue = (); # the main input queue, an array of hashes @@ -344,6 +345,19 @@ if (DXSql::init($dsn)) { $dbh = $dbh->connect($dsn, $dbuser, $dbpass) if $dbh; } +# try to load Encode +{ + no warnings; + my $w = $SIG{__DIE__}; + $SIG{__DIE__} = 'IGNORE'; + eval { require Encode; }; + unless ($@) { + import Encode; + $can_encode = 1; + } + $SIG{__DIE__} = $w; +} + # try to load XML::Simple DXXml::init();