use DXUtil;
use DXVars;
+use DXHash;
+use DXDebug;
+
use IO::File;
-use vars qw(%badwords $fn);
+use vars qw($badword $regexcode);
+
+my $oldfn = "$main::data/badwords";
+my $regex = "$main::data/badw_regex";
+my $bwfn = "$main::data/badword";
-$fn = "$main::data/badwords";
-%badwords = ();
+# copy issue ones across
+filecopy("$regex.gb.issue", $regex) unless -e $regex;
+filecopy("$bwfn.issue", $bwfn) unless -e $bwfn;
+
+$badword = new DXHash "badword";
use vars qw($VERSION $BRANCH);
$VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ );
-$BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ ) || 0;
+$BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ || (0,0));
$main::build += $VERSION;
$main::branch += $BRANCH;
sub load
{
my @out;
- return unless -e $fn;
- my $fh = new IO::File $fn;
+ my $fh = new IO::File $oldfn;
+
+ if ($fh) {
+ while (<$fh>) {
+ chomp;
+ next if /^\s*\#/;
+ my @list = split " ";
+ for (@list) {
+ $badword->add($_);
+ }
+ }
+ $fh->close;
+ $badword->put;
+ unlink $oldfn;
+ }
+ push @out, create_regex();
+ return @out;
+}
+
+sub create_regex
+{
+ my @out;
+ my $fh = new IO::File $regex;
if ($fh) {
- %badwords = ();
+ my $s = "sub { my \$str = shift; my \@out; \n";
while (<$fh>) {
chomp;
next if /^\s*\#/;
my @list = split " ";
for (@list) {
- $badwords{lc $_}++;
+ # create a closure for each word so that it matches stuff with spaces/punctuation
+ # and repeated characters in it
+ my $w = uc $_;
+ my @l = split //, $w;
+ my $e = join '+[\s\W]*', @l;
+ $s .= "push \@out, \$1 if \$str =~ /\\b($e)/;\n";
}
}
+ $s .= "return \@out;\n}";
+ $regexcode = eval $s;
+ dbg($s) if isdbg('badword');
+ if ($@) {
+ @out = ($@);
+ dbg($@);
+ return @out;
+ }
$fh->close;
} else {
- my $l = "can't open $fn $!";
- dbg('err', $l);
+ my $l = "can't open $regex $!";
+ dbg($l);
push @out, $l;
}
+
return @out;
}
# check the text against the badwords list
sub check
{
- return grep { $badwords{$_} } split(/\b/, lc shift);
+ my $s = uc shift;
+ my @out;
+
+ push @out, &$regexcode($s) if $regexcode;
+
+ return @out if @out;
+
+ for (split(/\b/, $s)) {
+ push @out, $_ if $badword->in($_);
+ }
+
+ return @out;
}
1;