X-Git-Url: http://dxcluster.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=perl%2FBadWords.pm;h=63ec8c881aa5e9db936ecc7fc3582f4577ce3e2d;hb=cc579a96816b0bae5b37dc132942fc1075449cf3;hp=ff9dd04c8b4e3b70a634828862420fcac1ccaa8a;hpb=8e0eef80216fbb2bca3606daf5797e39b2889d7a;p=spider.git diff --git a/perl/BadWords.pm b/perl/BadWords.pm index ff9dd04c..63ec8c88 100644 --- a/perl/BadWords.pm +++ b/perl/BadWords.pm @@ -12,34 +12,42 @@ use strict; use DXUtil; use DXVars; +use DXHash; use IO::File; -use vars qw(%badwords $fn); +use vars qw($badword); -$fn = "$main::data/badwords"; -%badwords = (); +my $oldfn = "$main::data/badwords"; +$badword = new DXHash "badword"; + +use vars qw($VERSION $BRANCH); +$VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ ); +$BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ ) || 0; +$main::build += $VERSION; +$main::branch += $BRANCH; # load the badwords file sub load { my @out; - return unless -e $fn; - my $fh = new IO::File $fn; + return unless -e $oldfn; + my $fh = new IO::File $oldfn; if ($fh) { - %badwords = (); while (<$fh>) { chomp; next if /^\s*\#/; my @list = split " "; for (@list) { - $badwords{lc $_}++; + $badword->add($_); } } $fh->close; + $badword->put; + unlink $oldfn; } else { - my $l = "can't open $fn $!"; - dbg('err', $l); + my $l = "can't open $oldfn $!"; + dbg($l); push @out, $l; } return @out; @@ -48,7 +56,29 @@ sub load # check the text against the badwords list sub check { - return grep { $badwords{$_} } split(/\b/, lc shift); + my $s = uc shift; + + for (split(/\s+/, $s)) { + s/[^\w]//g; + return $_ if $badword->in($_); + s/\'?S$//; + return $_ if $badword->in($_); + } + + # look for a few of the common ones with spaces and stuff + if ($s =~ /F[\s\W]*U[\s\W]*C[\s\W]*K/) { + return "FUCK"; + } elsif ($s =~ /C[\s\W]*U[\s\W]*N[\s\W]*T/) { + return "CUNT"; + } elsif ($s =~ /W[\s\W]*A[\s\W]*N[\s\W]*K/) { + return "WANK"; + } elsif ($s =~ /C[\s\W]*[0O][\s\W]*C[\s\W]*K/) { + return "COCK"; + } elsif ($s =~ /S[\s\W]*H[\s\W]*[I1][\s\W]*T/) { + return "SHIT"; + } + + return (); } 1;