4. store deleted status across restarts!
5. make callsign checking more rigorous
6. dup check PC49 (kill full)
+7. improved the regex matching of badwords (more efficient, better coverage)
+8. added default badword and badw_regex tables (as .issue files) which will
+activate unless there is one there already.
03Oct01=======================================================================
1. don't allow @WWW to become a 'TO' field...
2. handle @gb7tlh.#35.eu type addresses as well
use DXUtil;
use DXVars;
use DXHash;
+use DXDebug;
+
use IO::File;
-use vars qw($badword);
+use vars qw($badword @regex);
my $oldfn = "$main::data/badwords";
+my $regex = "$main::data/badw_regex";
+my $bwfn = "$main::data/badword";
+
+# copy issue ones across
+filecopy("$regex.issue", $regex) unless -e $regex;
+filecopy("$bwfn.issue", $bwfn) unless -e $bwfn;
+
$badword = new DXHash "badword";
use vars qw($VERSION $BRANCH);
sub load
{
my @out;
- return unless -e $oldfn;
my $fh = new IO::File $oldfn;
if ($fh) {
$fh->close;
$badword->put;
unlink $oldfn;
+ }
+ push @out, create_regex();
+ return @out;
+}
+
+sub create_regex
+{
+ my @out;
+ @regex = ();
+
+ my $fh = new IO::File $regex;
+
+ if ($fh) {
+ while (<$fh>) {
+ chomp;
+ next if /^\s*\#/;
+ my @list = split " ";
+ for (@list) {
+ # create a closure for each word so that it matches stuff with spaces/punctuation
+ # and repeated characters in it
+ my $w = uc $_;
+ my @l = map { $_ eq 'I' ? '[I1]' : ($_ eq 'O' ? '[O0]' : $_) }split //, $w;
+ my $e = join '+[\s\W]+', @l;
+ my $s = eval qq{sub { return \$_[0] =~ /$e+/ ? '$w' : () } };
+ push @regex, $s unless $@;
+ dbg("create_regex: $@") if $@;
+ }
+ }
+ $fh->close;
} else {
- my $l = "can't open $oldfn $!";
+ my $l = "can't open $regex $!";
dbg($l);
push @out, $l;
}
+
return @out;
}
sub check
{
my $s = uc shift;
+ my @out;
+
+ for (@regex) {
+ push @out, &$_($s);
+ }
+
+ return @out if @out;
for (split(/\s+/, $s)) {
s/[^\w]//g;
- return $_ if $badword->in($_);
+ push @out, $_ if $badword->in($_);
s/\'?S$//;
- return $_ if $badword->in($_);
- }
-
- # look for a few of the common ones with spaces and stuff
- if ($s =~ /F[\s\W]*U[\s\W]*C[\s\W]*K/) {
- return "FUCK";
- } elsif ($s =~ /C[\s\W]*U[\s\W]*N[\s\W]*T/) {
- return "CUNT";
- } elsif ($s =~ /W[\s\W]*A[\s\W]*N[\s\W]*K/) {
- return "WANK";
- } elsif ($s =~ /C[\s\W]*[0O][\s\W]*C[\s\W]*K/) {
- return "COCK";
- } elsif ($s =~ /S[\s\W]*H[\s\W]*[I1][\s\W]*T/) {
- return "SHIT";
- } elsif ($s =~ /P[\s\W]*[I1][\s\W]*S[\s\W]*S/) {
- return "PISS";
- } elsif ($s =~ /B[\s\W]*[O0][\s\W]*L[\s\W]*L[\s\W]*[O0][\s\W]*[CK]/) {
- return "BOLLOCKS";
+ push @out, $_ if $badword->in($_);
}
-
- return ();
+
+ return @out;
}
1;