X-Git-Url: http://dxcluster.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=perl%2FDXUser.pm;h=91f3a3f02caab2900ac3deb4f21d2ea81ffa088e;hb=refs%2Fheads%2Fnewusers;hp=de55f1b6e84d01ffa170d215fa91b759d02d0e50;hpb=2d43d3782490b5d684dec9852e05f927c7fa305a;p=spider.git diff --git a/perl/DXUser.pm b/perl/DXUser.pm index de55f1b6..91f3a3f0 100644 --- a/perl/DXUser.pm +++ b/perl/DXUser.pm @@ -1,9 +1,60 @@ # # DX cluster user routines # -# Copyright (c) 1998 - Dirk Koopman G1TLH +# Copyright (c) 1998-2020 - Dirk Koopman G1TLH # +# The new internal structure of the users system looks like this: # +# The users.v4 file formatted as a file of lines containing: \t{json serialised version of user record}\n +# +# You can look at it with any text tools or your favourite editor :-) +# +# In terms of internal structure, the main user hash remains as %u, keyed on callsign as before. +# +# The value is a one or two element array [position] or [position, ref], depending on whether the record has been "get()ed" +# [i.e. got from disk] or not. The 'position' is simply the start of each line in the file. The function "get()" simply returns +# the stored reference in array[1], if present, or seeks to the position from array[0], reads a line, json_decodes it, +# stores that reference into array[1] and returns that. That reference will be used from that time onwards. +# +# The routine writeoutjson() will (very) lazily write out a copy of %u WITHOUT STORING ANY EXTRA CURRENTLY UNREFERENCED CALLSIGN +# records to users.v4.n. It, in effect, does a sort of random accessed merge of the current user file and any "in memory" +# versions of any user record. This can be done with a spawned command because it will just be reading %u and merging +# loaded records, not altering the current users.v4 file in any way. +# +# %u -> $u{call} -> [position of json line in users.v4 (, reference -> {call=>'G1TLH', ...} if this record is in use)]. +# +# On my machine, it takes about 250mS to read the entire users.v4 file of 190,000 records and to create a +# $u{callsign}->[record position in users.v4] for every callsign in the users.v4 file. Loading ~19,000 records +# (read from disk, decode json, store reference) takes about 110mS (or 580nS/record). +# +# A periodic dump of users.v4.n, with said ~19,000 records in memory takes about 750mS to write (this can be speeded up, +# by at least a half, if it becomes a problem!). As this periodic dump will be spawned off, it will not interrupt the data +# stream. +# +# This is the first rewrite of DXUsers since inception. In the mojo branch we will no longer use Storable but use JSON instead. +# We will now be storing all the keys in memory and will use opportunistic loading of actual records in "get()". So out of +# say 200,000 known users it is unlikely that we will have more than 10% (more likely less) of the user records in memory. +# This will mean that there will be a increase in memory requirement, but it is modest. I estimate it's unlikely be more +# than 30 or so MB. +# +# At the moment that means that the working users.v4 is "immutable". +# +# In normal operation, when first calling 'init()', the keys and positions will be read from the newer of users.v4.n and +# users.v4. If there is no users.v4.n, then users.v4 will be used. As time wears on, %u will then accrete active user records. +# Once an hour the current %u will be saved to users.v4.n. +# +# If it becomes too much of a problem then we are likely to chuck off "close()d" users onto the end of the current users.v4 +# leaving existing users intact, but updating the pointer to the (now cleared out) user ref to the new location. This will +# be a sort of write behind log file. The users.v4 file is still immutable for the starting positions, but any chucked off +# records (or even "updates") will be written to the end of that file. If this has to be reread at any time, then the last +# entry for any callsign "wins". But this will only happen if I think the memory requirements over time become too much. +# +# As there is no functional difference between the users.v4 and export_user generated "user_json" file(s), other than the latter +# will be in sorted order with the record elements in "canonical" order. There will now longer be any code to execute to +# "restore the users file". Simply copy one of the "user_json" files to users.v4, remove users.v4.n and restart. +# +# Hopefully though, this will put to rest the need to do all that messing about ever again... Pigs may well be seen flying over +# your node as well :-) # package DXUser; @@ -20,13 +71,13 @@ use JSON; use DXDebug; use Data::Structure::Util qw(unbless); use Time::HiRes qw(gettimeofday tv_interval); +use IO::File; use strict; -use vars qw(%u $dbm $filename %valid $lastoperinterval $lasttime $lru $lrusize $tooold $v3 $v4); +use vars qw(%u $filename %valid $lastoperinterval $lasttime $lru $lrusize $tooold $v3 $v4); %u = (); -$dbm = undef; $filename = undef; $lastoperinterval = 60*24*60*60; $lasttime = 0; @@ -38,6 +89,15 @@ my $json; our $maxconnlist = 3; # remember this many connection time (duration) [start, end] pairs +our $newusers = 0; # per execution stats +our $modusers = 0; +our $totusers = 0; +our $delusers = 0; +our $cachedusers = 0; + +my $ifh; # the input file, initialised by readinjson() + + # hash of valid elements and a simple prompt %valid = ( call => '0,Callsign', @@ -89,6 +149,12 @@ our $maxconnlist = 3; # remember this many connection time (duration) [start, wantdxitu => '0,Show ITU Zone,yesno', wantgtk => '0,Want GTK interface,yesno', wantpc9x => '0,Want PC9X interface,yesno', + wantrbn => '0,Want RBN spots,yesno', + wantft => '0,Want RBN FT4/8,yesno', + wantcw => '0,Want RBN CW,yesno', + wantrtty => '0,Want RBN RTTY,yesno', + wantpsk => '0,Want RBN PSK,yesno', + wantbeacon => '0,Want (RBN) Beacon,yesno', lastoper => '9,Last for/oper,cldatetime', nothere => '0,Not Here Text', registered => '9,Registered?,yesno', @@ -127,93 +193,35 @@ sub init { my $mode = shift; - my $ufn; - my $convert; - - my $fn = "users"; - - $json = JSON->new(); - $filename = $ufn = localdata("$fn.json"); + my $convert = "$main::root/perl/convert-users-v3-to-v4.pl"; + my $export; + + $json = JSON->new()->canonical(1); + $filename = localdata("users.v4"); - if (-e localdata("$fn.json")) { + if (-e $filename || -e "$filename.n" || -e "$filename.o") { $v4 = 1; } else { - eval { - require Storable; - }; - - if ($@) { - if ( ! -e localdata("users.v3") && -e localdata("users.v2") ) { - $convert = 2; - } - LogDbg('',"the module Storable appears to be missing!!"); - LogDbg('',"trying to continue in compatibility mode (this may fail)"); - LogDbg('',"please install Storable from CPAN as soon as possible"); - } - else { - import Storable qw(nfreeze thaw); - $convert = 3 if -e localdata("users.v3") && !-e $ufn; - } - } - - # do a conversion if required - if ($convert) { - my ($key, $val, $action, $count, $err) = ('','',0,0,0); - my $ta = [gettimeofday]; +# if (-e localdata('users.v3')) { +# LogDbg('DXUser', "Converting " . localdata('users.v3') . " to new json version of users file, please wait"); +# if (-x $convert) { +# system($convert); +# ++$export; +# } +# } - my %oldu; - LogDbg('',"Converting the User File from V$convert to $fn.json "); - LogDbg('',"This will take a while, I suggest you go and have cup of strong tea"); - my $odbm = tie (%oldu, 'DB_File', localdata("users.v$convert"), O_RDONLY, 0666, $DB_BTREE) or confess "can't open user file: $fn.v$convert ($!) [rebuild it from user_asc?]"; - for ($action = R_FIRST; !$odbm->seq($key, $val, $action); $action = R_NEXT) { - my $ref; - if ($convert == 3) { - eval { $ref = storable_decode($val) }; - } else { - eval { $ref = asc_decode($val) }; - } - unless ($@) { - if ($ref) { - $u{$key} = $ref; - $count++; - } else { - $err++ - } - } else { - Log('err', "DXUser: error decoding $@"); - } - } - undef $odbm; - untie %oldu; - my $t = _diffms($ta); - LogDbg('',"Conversion from users.v$convert to users.json completed $count records $err errors $t mS"); - - # now write it away for future use - $ta = [gettimeofday]; - $err = 0; - $count = writeoutjson(); - $t = _diffms($ta); - LogDbg('',"New Userfile users.json write completed $count records $err errors $t mS"); - LogDbg('',"Now restarting.."); - $main::ending = 10; - } else { - # otherwise (i.e normally) slurp it in - readinjson(); + die "User file $filename missing, please run $convert or copy a user_json backup from somewhere\n" unless -e "$filename.n" || -s $filename; } - $filename = $ufn; + readinjson(); + copy $filename, "$filename.n" unless -e "$filename.n"; + export() if $export; } sub del_file { # with extreme prejudice - if ($v3) { - unlink "$main::data/users.v3"; - unlink "$main::local_data/users.v3"; - } - if ($v4) { - unlink "$main::data/users.v4"; - unlink "$main::local_data/users.v4"; - } + unlink "$main::data/users.v4"; + unlink "$main::local_data/users.v4"; } # @@ -233,8 +241,8 @@ sub process sub finish { - undef $dbm; - untie %u; + + writeoutjson(); } # @@ -255,10 +263,13 @@ sub new my $call = shift; # $call =~ s/-\d+$//o; -# confess "can't create existing call $call in User\n!" if $u{$call}; + confess "can't create existing call $call in User\n!" if $u{$call}; my $self = $pkg->alloc($call); + $u{$call} = [0, $self]; $self->put; + ++$newusers; + ++$totusers; return $self; } @@ -270,34 +281,90 @@ sub new sub get { my $call = uc shift; - my $data; - - my $ref = $u{$call} if exists $u{$call}; - return $ref if $ref && ref $ref eq 'DXUser'; + my $nodecode = shift; + my $ref = $u{$call}; + return undef unless $ref; + unless ($ref->[1]) { + $ifh->seek($ref->[0], 0); + my $l = $ifh->getline; + if ($l) { + my ($k,$s) = split /\t/, $l; + return $s if $nodecode; + my $j = json_decode($s); + if ($j) { + $ref->[1] = $j; + ++$cachedusers; + } + } + } elsif ($nodecode) { + return json_encode($ref->[1]); + } + return $ref->[1]; +} + +# +# get an "ephemeral" reference - i.e. this will give you new temporary copy of +# the call's user record, but without storing it (if it isn't already there) +# +# This is not as quick as get()! But it will allow safe querying of the +# user file. Probably in conjunction with get_some_calls feeding it. +# +# NOTE: for cached records this, in effect, is a faster version of Storable's +# dclone - only about 3.5 times as fast! +# + +sub get_tmp +{ + my $call = uc shift; + my $ref = $u{$call}; + if ($ref) { + if ($ref->[1]) { + return json_decode(json_encode($ref->[1])); + } + $ifh->seek($ref->[0], 0); + my $l = $ifh->getline; + if ($l) { + my ($k,$s) = split /\t/, $l; + my $j = json_decode($s); + return $j; + } + } return undef; } # -# get an existing either from the channel (if there is one) or from the database +# Master branch: +# get an existing record either from the channel (if there is one) or from the database # # It is important to note that if you have done a get (for the channel say) and you # want access or modify that you must use this call (and you must NOT use get's all # over the place willy nilly!) # +# NOTE: mojo branch with newusers system: +# There is no longer any function difference between get_current() +# and get() as they will always reference the same record as held in %u. This is because +# there is no more (repeated) thawing of stored records from the underlying "database". +# +# BUT: notice the difference between this and the get_tmp() function. A get() will online an +# othewise unused record, so for poking around looking for that locked out user: +# MAKE SURE you use get_tmp(). It will likely still be quicker than DB_File and Storable! +# sub get_current { - my $call = uc shift; - - my $dxchan = DXChannel::get($call); - if ($dxchan) { - my $ref = $dxchan->user; - return $ref if $ref && UNIVERSAL::isa($ref, 'DXUser'); - - dbg("DXUser::get_current: got invalid user ref for $call from dxchan $dxchan->{call} ". ref $ref. " ignoring"); - } - return get($call); + goto &get; + +# my $call = uc shift; +# +# my $dxchan = DXChannel::get($call); +# if ($dxchan) { +# my $ref = $dxchan->user; +# return $ref if $ref && UNIVERSAL::isa($ref, 'DXUser'); +# +# dbg("DXUser::get_current: got invalid user ref for $call from dxchan $dxchan->{call} ". ref $ref. " ignoring"); +# } +# return get($call); } # @@ -309,6 +376,49 @@ sub get_all_calls return (sort keys %u); } +# +# get some calls - provide a qr// style selector string as a partial key +# + +sub get_some_calls +{ + my $pattern = shift || qr/.*/; + return sort grep {$pattern} keys %u; +} + +# +# if I understand the term correctly, this is a sort of monad. +# +# Scan through the whole user file and select records that you want +# to process further. This routine returns lines of json, yu +# +# the CODE ref should look like: +# sub { +# my $key = shift; +# my $line = shift; +# # maybe do a rough check to see if this is a likely candidate +# return unless $line =~ /something/; +# my $r = json_decode($l); +# return (condition ? wanted thing : ()); +# } +# + +sub scan +{ + my $c = shift; + my @out; + + if (ref($c) eq 'CODE') { + foreach my $k (get_all_calls()) { + my $l = get($k, 1); # get the offline json line or the jsoned online version + push @out, $c->($k, $l) if $l; + } + } else { + dbg("DXUser::scan supplied argument is not a code ref"); + } + return @out; +} + # # put - put a user # @@ -317,50 +427,20 @@ sub put { my $self = shift; confess "Trying to put nothing!" unless $self && ref $self; - my $call = $self->{call}; $self->{lastin} = $main::systime; + ++$modusers; # new or existing, it's still been modified } # freeze the user sub encode { - goto &json_encode if $v4; - goto &asc_encode unless $v3; - my $self = shift; - return nfreeze($self); + goto &json_encode; } # thaw the user sub decode { - goto &json_decode if $v4; - goto &storable_decode if $v3; - goto &asc_decode; -} - -# should now be obsolete for mojo branch build 238 and above -sub storable_decode -{ - my $ref; - $ref = thaw(shift); - return $ref; -} - - -# -# create a hash from a string (in ascii) -# -sub asc_decode -{ - my $s = shift; - my $ref; - $s =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg; - eval '$ref = ' . $s; - if ($@) { - LogDbg('err', "DXUser::asc_decode: on '$s' $@"); - $ref = undef; - } - return $ref; + goto &json_decode; } sub json_decode @@ -371,7 +451,7 @@ sub json_decode if ($ref && !$@) { return bless $ref, 'DXUser'; } else { - LogDbg('err', "DXUser::json_decode: on '$s' $@"); + LogDbg('DXUser', "DXUser::json_decode: on '$s' $@"); } return undef; } @@ -393,8 +473,9 @@ sub del { my $self = shift; my $call = $self->{call}; -# $lru->remove($call); - # $dbm->del($call); + ++$delusers; + --$totusers; + --$cachedusers if $u{$call}->[1]; delete $u{$call}; } @@ -413,7 +494,6 @@ sub close push @$ref, $ip if $ip; push @{$self->{connlist}}, $ref; shift @{$self->{connlist}} if @{$self->{connlist}} > $maxconnlist; -# $self->put(); } # @@ -444,6 +524,7 @@ sub export my $name = shift; my $fn = $name || localdata("user_json"); # force use of local_data + my $ta = [gettimeofday]; # save old ones move "$fn.oooo", "$fn.ooooo" if -e "$fn.oooo"; @@ -453,7 +534,7 @@ sub export move "$fn", "$fn.o" if -e "$fn"; my $json = JSON->new; - $json->canonical(1); + $json->canonical(1);; my $count = 0; my $err = 0; @@ -463,10 +544,10 @@ sub export my $key = 0; my $val = undef; foreach my $k (sort keys %u) { - my $r = $u{$k}; + my $r = get($k); if ($r->{sort} eq 'U' && !$r->{priv} && $main::systime > $r->{lastin}+$tooold ) { unless ($r->{lat} || $r->{long} || $r->{qra} || $r->{qth} || $r->{name}) { - LogDbg('err', "DXUser::export deleting $k - too old, last in " . cldatetime($r->lastin) . " " . difft([$r->lastin, $main::systime])); + LogDbg('export', "DXUser::export deleting $k - too old, last in " . cldatetime($r->lastin) . " " . difft([$r->lastin, $main::systime])); delete $u{$k}; ++$del; next; @@ -474,7 +555,7 @@ sub export } eval {$val = json_encode($r);}; if ($@) { - LogDbg('err', "DXUser::export error encoding call: $k $@"); + LogDbg('export', "DXUser::export error encoding call: $k $@"); ++$err; next; } @@ -483,8 +564,9 @@ sub export } $fh->close; } - my $s = qq{Exported users to $fn - $count Users $del Deleted $err Errors ('sh/log Export' for details)}; - LogDbg('command', $s); + my $t = _diffms($ta); + my $s = qq{Exported users to $fn - $count Users $del Deleted $err Errors in $t mS ('sh/log Export' for details)}; + LogDbg('DXUser', $s); return $s; } @@ -760,16 +842,24 @@ sub is_ak1a return $self->{sort} eq 'A'; } +sub is_rbn +{ + my $self = shift; + return $self->{sort} eq 'N' +} + sub unset_passwd { my $self = shift; delete $self->{passwd}; + $self->put; } sub unset_passphrase { my $self = shift; delete $self->{passphrase}; + $self->put; } sub set_believe @@ -777,7 +867,10 @@ sub set_believe my $self = shift; my $call = uc shift; $self->{believe} ||= []; - push @{$self->{believe}}, $call unless grep $_ eq $call, @{$self->{believe}}; + unless (grep $_ eq $call, @{$self->{believe}}) { + push @{$self->{believe}}, $call; + $self->put; + }; } sub unset_believe @@ -787,6 +880,7 @@ sub unset_believe if (exists $self->{believe}) { $self->{believe} = [grep {$_ ne $call} @{$self->{believe}}]; delete $self->{believe} unless @{$self->{believe}}; + $self->put; } } @@ -807,60 +901,84 @@ sub lastping return $b->{$call}; } +# +# read in the latest version of the user file. As this file is immutable, the file one really wants is +# a later (generated) copy. But, if the plain users.v4 file is all we have, we'll use that. +# + sub readinjson { - my $fn = shift || $filename; - + my $fn = $filename; + my $nfn = "$fn.n"; + my $ofn = "$fn.o"; + my $ta = [gettimeofday]; my $count = 0; my $s; my $err = 0; - unless (-r $fn) { - dbg("DXUser $fn not found - probably about to convert"); - return; + if (-e $nfn && -e $fn && (stat($nfn))[9] > (stat($fn))[9]) { + # move the old file to .o + unlink $ofn; + move($fn, $ofn); + move($nfn, $fn); + }; + + # if we don't have a users.v4 at this point, look for a backup users.v4.json, users.v4.n then users.v4.o + unless (-e $fn) { + move($nfn, $fn) unless -e $fn; # the users.v4 isn't there (maybe convert-users-v3-to-v4.pl + move("$fn.json", $fn); # from a run of convert-users-v3-to-v4.pl + move($ofn, $fn) unless -e $fn; # desperate now... } - open DATA, "$fn" or die "$fn read error $!"; - while () { + if ($ifh) { + $ifh->seek(0, 0); + } else { + LogDbg("DXUser","DXUser::readinjson: opening $fn as users file"); + $ifh = IO::File->new("+<$fn") or die "Cannot open $fn ($!)"; + } + my $pos = $ifh->tell; + while (<$ifh>) { chomp; my @f = split /\t/; - my $ref; - eval { $ref = json_decode($f[1]); }; - if ($ref) { - $u{$f[0]} = $ref; - $count++; - } else { - LogDbg('DXCommand', "# readinjson Error: '$f[0]\t$f[1]' $@"); - $err++ - } + $u{$f[0]} = [$pos]; + $count++; + $pos = $ifh->tell; } - close DATA; - $s = _diffms($ta); - dbg("DXUser::readinjson $count records $s mS"); + $ifh->seek(0, 0); + + # $ifh is "global" and should not be closed + + LogDbg('DXUser',"DXUser::readinjson $count record headers read from $fn in ". _diffms($ta) . " mS"); + return $totusers = $count; } -sub writeoutjson() -{ - my $fn = shift || $filename; +# +# Write a newer copy of the users.v4 file to users.v4.n, which is what will be read in. +# This means that the existing users.v4 is not touched during a run of dxspider, or at least +# not yet. - link $fn, "$fn.o"; - unlink $fn; - open DATA, ">$fn" or die "$fn write error $!"; - my $fh = new IO::File ">$fn" or return "cannot open $fn ($!)"; +sub writeoutjson +{ + my $ofn = shift || "$filename.n"; + my $ta = [gettimeofday]; + + my $ofh = IO::File->new(">$ofn") or die "$ofn write error $!"; my $count = 0; - if ($fh) { - my $key = 0; - my $val = undef; - foreach my $k (keys %u) { # this is to make it as quick as possible (no sort) - my $r = $u{$k}; - $val = json_encode($r); - $fh->print("$k\t$val\n"); + $ifh->seek(0, 0); + for my $k (sort keys %u) { + my $l = get($k, 1); + if ($l) { + chomp $l; + print $ofh "$k\t$l\n"; ++$count; + } else { + LogDbg('DXUser', "DXUser::writeoutjson callsign $k not found") } - $fh->close; - } - close DATA; + } + + $ofh->close; + LogDbg('DXUser',"DXUser::writeoutjson $count records written to $ofn in ". _diffms($ta) . " mS"); return $count; } 1;