X-Git-Url: http://dxcluster.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=perl%2Fgen_usdb_data.pl;h=edd74dab2ea0cebcb8aae788bcee022d16fcb291;hb=7046b8ba37863c3040cee17e46d100675e720eaf;hp=d4edaafade903da192bfd97716bb7160d488c54c;hpb=1ddd21105b4fdf68756a8676a58105927254d386;p=spider.git diff --git a/perl/gen_usdb_data.pl b/perl/gen_usdb_data.pl index d4edaafa..edd74dab 100755 --- a/perl/gen_usdb_data.pl +++ b/perl/gen_usdb_data.pl @@ -36,6 +36,7 @@ BEGIN { $root = "/spider"; $root = $ENV{'DXSPIDER_ROOT'} if $ENV{'DXSPIDER_ROOT'}; + unshift @INC, "$root/perl"; # this IS the right way round! unshift @INC, "$root/local"; } @@ -55,7 +56,7 @@ my $blksize = 1024 * 1024; STDOUT->autoflush(1); -my $dbrawfn = "$main::data/usdbraw"; +my $dbrawfn = "$main::data/usdbraw.gz"; rename "$dbrawfn.oo", "$dbrawfn.ooo"; rename "$dbrawfn.o", "$dbrawfn.oo"; @@ -73,7 +74,6 @@ foreach my $argv (@ARGV) { } $gzfh->gzclose; -print "$ctycount Cities found\n"; exit(0); @@ -96,19 +96,20 @@ sub handleEN $l =~ s/[\r\n]+$//; my ($rt,$usi,$ulsfn,$ebfno,$call,$type,$lid,$name,$first,$middle,$last,$suffix, $phone,$fax,$email,$street,$city,$state,$zip,$pobox,$attl,$sgin,$frn) = split /\|/, $l; - - my $rec = uc join '|', $call,$city,$state if $city && $state; - $buf .= "$rec\n"; - if (length $buf > $blksize) { - $gzfh->gzwrite($buf); - undef $buf; + +# print "ERR: $l\n" unless $call && $city && $state; + + if ($call && $city && $state) { + my $rec = uc join '|', $call,$city,$state if $city && $state; + $buf .= "$rec\n"; + if (length $buf > $blksize) { + $gzfh->gzwrite($buf); + undef $buf; + } + $count++; } - my $c = uc "$city|$state"; - $count++; - } - if (length $buf > $blksize) { - $gzfh->gzwrite($buf); } + $gzfh->gzwrite($buf) if length $buf; print ", $count records\n"; $fh->close; }