#
# DX cluster user routines
#
-# Copyright (c) 1998 - Dirk Koopman G1TLH
+# Copyright (c) 1998-2020 - Dirk Koopman G1TLH
#
+# The new internal structure of the users system looks like this:
#
+# The users.v4 file formatted as a file of lines containing: <callsign>\t{json serialised version of user record}\n
+#
+# You can look at it with any text tools or your favourite editor :-)
+#
+# In terms of internal structure, the main user hash remains as %u, keyed on callsign as before.
+#
+# The value is a one or two element array [position] or [position, ref], depending on whether the record has been "get()ed"
+# [i.e. got from disk] or not. The 'position' is simply the start of each line in the file. The function "get()" simply returns
+# the stored reference in array[1], if present, or seeks to the position from array[0], reads a line, json_decodes it,
+# stores that reference into array[1] and returns that. That reference will be used from that time onwards.
+#
+# The routine writeoutjson() will (very) lazily write out a copy of %u WITHOUT STORING ANY EXTRA CURRENTLY UNREFERENCED CALLSIGN
+# records to users.v4.n. It, in effect, does a sort of random accessed merge of the current user file and any "in memory"
+# versions of any user record. This can be done with a spawned command because it will just be reading %u and merging
+# loaded records, not altering the current users.v4 file in any way.
+#
+# %u -> $u{call} -> [position of json line in users.v4 (, reference -> {call=>'G1TLH', ...} if this record is in use)].
+#
+# On my machine, it takes about 250mS to read the entire users.v4 file of 190,000 records and to create a
+# $u{callsign}->[record position in users.v4] for every callsign in the users.v4 file. Loading ~19,000 records
+# (read from disk, decode json, store reference) takes about 110mS (or 580nS/record).
+#
+# A periodic dump of users.v4.n, with said ~19,000 records in memory takes about 750mS to write (this can be speeded up,
+# by at least a half, if it becomes a problem!). As this periodic dump will be spawned off, it will not interrupt the data
+# stream.
+#
+# This is the first rewrite of DXUsers since inception. In the mojo branch we will no longer use Storable but use JSON instead.
+# We will now be storing all the keys in memory and will use opportunistic loading of actual records in "get()". So out of
+# say 200,000 known users it is unlikely that we will have more than 10% (more likely less) of the user records in memory.
+# This will mean that there will be a increase in memory requirement, but it is modest. I estimate it's unlikely be more
+# than 30 or so MB.
+#
+# At the moment that means that the working users.v4 is "immutable".
+#
+# In normal operation, when first calling 'init()', the keys and positions will be read from the newer of users.v4.n and
+# users.v4. If there is no users.v4.n, then users.v4 will be used. As time wears on, %u will then accrete active user records.
+# Once an hour the current %u will be saved to users.v4.n.
+#
+# If it becomes too much of a problem then we are likely to chuck off "close()d" users onto the end of the current users.v4
+# leaving existing users intact, but updating the pointer to the (now cleared out) user ref to the new location. This will
+# be a sort of write behind log file. The users.v4 file is still immutable for the starting positions, but any chucked off
+# records (or even "updates") will be written to the end of that file. If this has to be reread at any time, then the last
+# entry for any callsign "wins". But this will only happen if I think the memory requirements over time become too much.
+#
+# As there is no functional difference between the users.v4 and export_user generated "user_json" file(s), other than the latter
+# will be in sorted order with the record elements in "canonical" order. There will now longer be any code to execute to
+# "restore the users file". Simply copy one of the "user_json" files to users.v4, remove users.v4.n and restart.
+#
+# Hopefully though, this will put to rest the need to do all that messing about ever again... Pigs may well be seen flying over
+# your node as well :-)
#
package DXUser;
use Data::Dumper;
use Fcntl;
use IO::File;
-use DXDebug;
use DXUtil;
use LRU;
use File::Copy;
+use JSON;
+use DXDebug;
+use Data::Structure::Util qw(unbless);
+use Time::HiRes qw(gettimeofday tv_interval);
+use IO::File;
use strict;
-use vars qw(%u $dbm $filename %valid $lastoperinterval $lasttime $lru $lrusize $tooold $v3);
+use vars qw(%u $filename %valid $lastoperinterval $lasttime $lru $lrusize $tooold $v3 $v4);
%u = ();
-$dbm = undef;
$filename = undef;
$lastoperinterval = 60*24*60*60;
$lasttime = 0;
$lrusize = 2000;
-$tooold = 86400 * 365; # this marks an old user who hasn't given enough info to be useful
+$tooold = 86400 * 365 + 31; # this marks an old user who hasn't given enough info to be useful
$v3 = 0;
+$v4 = 0;
+my $json;
+
our $maxconnlist = 3; # remember this many connection time (duration) [start, end] pairs
+our $newusers = 0; # per execution stats
+our $modusers = 0;
+our $totusers = 0;
+our $delusers = 0;
+our $cachedusers = 0;
+
+my $ifh; # the input file, initialised by readinjson()
+
+
# hash of valid elements and a simple prompt
%valid = (
call => '0,Callsign',
my $ufn;
my $convert;
- eval {
- require Storable;
- };
-
- my $fn = "users";
-
- if ($@) {
- $ufn = localdata("users.v2");
- $v3 = $convert = 0;
- dbg("the module Storable appears to be missing!!");
- dbg("trying to continue in compatibility mode (this may fail)");
- dbg("please install Storable from CPAN as soon as possible");
- } else {
- import Storable qw(nfreeze thaw);
-
- $ufn = localdata("users.v3");
- $v3 = 1;
- $convert++ if -e localdata("users.v2") && !-e $ufn;
- }
+ $json = JSON->new()->canonical(1);
+ $filename = localdata("users.v4");
- if ($mode) {
- $dbm = tie (%u, 'DB_File', $ufn, O_CREAT|O_RDWR, 0666, $DB_BTREE) or confess "can't open user file: $fn ($!) [rebuild it from user_asc?]";
+ if (-e $filename || -e "$filename.n" || -e "$filename.o") {
+ $v4 = 1;
+ readinjson();
} else {
- $dbm = tie (%u, 'DB_File', $ufn, O_RDONLY, 0666, $DB_BTREE) or confess "can't open user file: $fn ($!) [rebuild it from user_asc?]";
+ die "User file $filename missing, please run convert-users-v3-to-v4.pl or copy a user_json backup from somewhere\n";
}
-
- die "Cannot open $ufn ($!)\n" unless $dbm;
-
- $lru = LRU->newbase("DXUser", $lrusize);
-
- # do a conversion if required
- if ($dbm && $convert) {
- my ($key, $val, $action, $count, $err) = ('','',0,0,0);
-
- my %oldu;
- dbg("Converting the User File to V3 ");
- dbg("This will take a while, I suggest you go and have cup of strong tea");
- my $odbm = tie (%oldu, 'DB_File', localdata("users.v2"), O_RDONLY, 0666, $DB_BTREE) or confess "can't open user file: $fn.v2 ($!) [rebuild it from user_asc?]";
- for ($action = R_FIRST; !$odbm->seq($key, $val, $action); $action = R_NEXT) {
- my $ref;
- eval { $ref = asc_decode($val) };
- unless ($@) {
- if ($ref) {
- $ref->put;
- $count++;
- } else {
- $err++
- }
- } else {
- Log('err', "DXUser: error decoding $@");
- }
- }
- undef $odbm;
- untie %oldu;
- dbg("Conversion completed $count records $err errors");
- }
- $filename = $ufn;
}
sub del_file
{
# with extreme prejudice
- unlink "$main::data/users.v3";
- unlink "$main::local_data/users.v3";
+ unlink "$main::data/users.v4";
+ unlink "$main::local_data/users.v4";
}
#
#
sub process
{
- if ($main::systime > $lasttime + 15) {
- $dbm->sync if $dbm;
- $lasttime = $main::systime;
- }
+# if ($main::systime > $lasttime + 15) {
+# #$dbm->sync if $dbm;
+# $lasttime = $main::systime;
+# }
}
#
sub finish
{
- undef $dbm;
- untie %u;
+
+ writeoutjson();
}
#
my $call = shift;
# $call =~ s/-\d+$//o;
-# confess "can't create existing call $call in User\n!" if $u{$call};
+ confess "can't create existing call $call in User\n!" if $u{$call};
my $self = $pkg->alloc($call);
+ $u{$call} = [0, $self];
$self->put;
+ ++$newusers;
+ ++$totusers;
return $self;
}
sub get
{
my $call = uc shift;
- my $data;
-
- # is it in the LRU cache?
- my $ref = $lru->get($call);
- return $ref if $ref && ref $ref eq 'DXUser';
+ my $nodecode = shift;
+ my $ref = $u{$call};
+ return undef unless $ref;
- # search for it
- unless ($dbm->get($call, $data)) {
- eval { $ref = decode($data); };
-
- if ($ref) {
- if (!UNIVERSAL::isa($ref, 'DXUser')) {
- dbg("DXUser::get: got strange answer from decode of $call". ref $ref. " ignoring");
- return undef;
+ unless ($ref->[1]) {
+ $ifh->seek($ref->[0], 0);
+ my $l = $ifh->getline;
+ if ($l) {
+ my ($k,$s) = split /\t/, $l;
+ return $s if $nodecode;
+ my $j = json_decode($s);
+ if ($j) {
+ $ref->[1] = $j;
+ ++$cachedusers;
}
- # we have a reference and it *is* a DXUser
- } else {
- if ($@) {
- LogDbg('err', "DXUser::get decode error on $call '$@'");
- } else {
- dbg("DXUser::get: no reference returned from decode of $call $!");
- }
- return undef;
}
- $lru->put($call, $ref);
- return $ref;
+ } elsif ($nodecode) {
+ return json_encode($ref->[1]);
+ }
+ return $ref->[1];
+}
+
+#
+# get an "ephemeral" reference - i.e. this will give you new temporary copy of
+# the call's user record, but without storing it (if it isn't already there)
+#
+# This is not as quick as get()! But it will allow safe querying of the
+# user file. Probably in conjunction with get_some_calls feeding it.
+#
+# NOTE: for cached records this, in effect, is a faster version of Storable's
+# dclone - only about 3.5 times as fast!
+#
+
+sub get_tmp
+{
+ my $call = uc shift;
+ my $ref = $u{$call};
+ if ($ref) {
+ if ($ref->[1]) {
+ return json_decode(json_encode($ref->[1]));
+ }
+ $ifh->seek($ref->[0], 0);
+ my $l = $ifh->getline;
+ if ($l) {
+ my ($k,$s) = split /\t/, $l;
+ my $j = json_decode($s);
+ return $j;
+ }
}
return undef;
}
#
-# get an existing either from the channel (if there is one) or from the database
+# Master branch:
+# get an existing record either from the channel (if there is one) or from the database
#
# It is important to note that if you have done a get (for the channel say) and you
# want access or modify that you must use this call (and you must NOT use get's all
# over the place willy nilly!)
#
+# NOTE: mojo branch with newusers system:
+# There is no longer any function difference between get_current()
+# and get() as they will always reference the same record as held in %u. This is because
+# there is no more (repeated) thawing of stored records from the underlying "database".
+#
+# BUT: notice the difference between this and the get_tmp() function. A get() will online an
+# othewise unused record, so for poking around looking for that locked out user:
+# MAKE SURE you use get_tmp(). It will likely still be quicker than DB_File and Storable!
+#
sub get_current
{
- my $call = uc shift;
-
- my $dxchan = DXChannel::get($call);
- if ($dxchan) {
- my $ref = $dxchan->user;
- return $ref if $ref && UNIVERSAL::isa($ref, 'DXUser');
-
- dbg("DXUser::get_current: got invalid user ref for $call from dxchan $dxchan->{call} ". ref $ref. " ignoring");
- }
- return get($call);
+ goto &get;
+
+# my $call = uc shift;
+#
+# my $dxchan = DXChannel::get($call);
+# if ($dxchan) {
+# my $ref = $dxchan->user;
+# return $ref if $ref && UNIVERSAL::isa($ref, 'DXUser');
+#
+# dbg("DXUser::get_current: got invalid user ref for $call from dxchan $dxchan->{call} ". ref $ref. " ignoring");
+# }
+# return get($call);
}
#
return (sort keys %u);
}
+#
+# get some calls - provide a qr// style selector string as a partial key
+#
+
+sub get_some_calls
+{
+ my $pattern = shift || qr/.*/;
+ return sort grep {$pattern} keys %u;
+}
+
+#
+# if I understand the term correctly, this is a sort of monad.
+#
+# Scan through the whole user file and select records that you want
+# to process further. This routine returns lines of json, yu
+#
+# the CODE ref should look like:
+# sub {
+# my $key = shift;
+# my $line = shift;
+# # maybe do a rough check to see if this is a likely candidate
+# return unless $line =~ /something/;
+# my $r = json_decode($l);
+# return (condition ? wanted thing : ());
+# }
+#
+
+sub scan
+{
+ my $c = shift;
+ my @out;
+
+ if (ref($c) eq 'CODE') {
+ foreach my $k (get_all_calls()) {
+ my $l = get($k, 1); # get the offline json line or the jsoned online version
+ push @out, $c->($k, $l) if $l;
+ }
+ } else {
+ dbg("DXUser::scan supplied argument is not a code ref");
+ }
+ return @out;
+}
+
#
# put - put a user
#
{
my $self = shift;
confess "Trying to put nothing!" unless $self && ref $self;
- my $call = $self->{call};
-
- $dbm->del($call);
- delete $self->{annok} if $self->{annok};
- delete $self->{dxok} if $self->{dxok};
-
- $lru->put($call, $self);
- my $ref = $self->encode;
- $dbm->put($call, $ref);
+ $self->{lastin} = $main::systime;
+ ++$modusers; # new or existing, it's still been modified
}
# freeze the user
sub encode
{
- goto &asc_encode unless $v3;
- my $self = shift;
- return nfreeze($self);
+ goto &json_encode;
}
# thaw the user
sub decode
{
- goto &asc_decode unless $v3;
- my $ref;
- $ref = thaw(shift);
- return $ref;
+ goto &json_decode;
}
-#
-# create a string from a user reference (in_ascii)
-#
-sub asc_encode
+sub json_decode
{
- my $self = shift;
- my $strip = shift;
- my $p;
-
- if ($strip) {
- my $ref = bless {}, ref $self;
- foreach my $k (qw(qth lat long qra sort call homenode node lastoper lastin)) {
- $ref->{$k} = $self->{$k} if exists $self->{$k};
- }
- $ref->{name} = $self->{name} if exists $self->{name} && $self->{name} !~ /selfspot/i;
- $p = dd($ref);
+ my $s = shift;
+ my $ref;
+ eval { $ref = $json->decode($s) };
+ if ($ref && !$@) {
+ return bless $ref, 'DXUser';
} else {
- $p = dd($self);
+ LogDbg('DXUser', "DXUser::json_decode: on '$s' $@");
}
- return $p;
+ return undef;
}
-#
-# create a hash from a string (in ascii)
-#
-sub asc_decode
+sub json_encode
{
- my $s = shift;
- my $ref;
- $s =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg;
- eval '$ref = ' . $s;
- if ($@) {
- LogDbg('err', "DXUser::asc_decode: on '$s' $@");
- $ref = undef;
- }
- return $ref;
+ my $ref = shift;
+ unbless($ref);
+ my $s = $json->encode($ref);
+ bless $ref, 'DXUser';
+ return $s;
}
-
+
#
# del - delete a user
#
{
my $self = shift;
my $call = $self->{call};
- $lru->remove($call);
- $dbm->del($call);
+ ++$delusers;
+ --$totusers;
+ --$cachedusers if $u{$call}->[1];
+ delete $u{$call};
}
#
my $ip = shift;
$self->{lastin} = $main::systime;
# add a record to the connect list
- my $ref = [$startt || $self->{startt}, $main::systime];
- push @$ref, $ip if $ip;
- push @{$self->{connlist}}, $ref;
- shift @{$self->{connlist}} if @{$self->{connlist}} > $maxconnlist;
- $self->put();
+# $self->put();
}
#
sub sync
{
- $dbm->sync;
+# $dbm->sync;
}
#
sub export
{
- my $name = shift || 'user_asc';
- my $basic_info_only = shift;
+ my $name = shift;
- my $fn = $name ne 'user_asc' ? $name : "$main::local_data/$name"; # force use of local
+ my $fn = $name || localdata("user_json"); # force use of local_data
# save old ones
move "$fn.oooo", "$fn.ooooo" if -e "$fn.oooo";
move "$fn.o", "$fn.oo" if -e "$fn.o";
move "$fn", "$fn.o" if -e "$fn";
+ my $json = JSON->new;
+ $json->canonical(1);
+
my $count = 0;
my $err = 0;
my $del = 0;
if ($fh) {
my $key = 0;
my $val = undef;
- my $action;
- my $t = scalar localtime;
- print $fh q{#!/usr/bin/perl
-#
-# The exported userfile for a DXSpider System
-#
-# Input file: $filename
-# Time: $t
-#
-
-package main;
-
-# search local then perl directories
-BEGIN {
- umask 002;
-
- # root of directory tree for this system
- $root = "/spider";
- $root = $ENV{'DXSPIDER_ROOT'} if $ENV{'DXSPIDER_ROOT'};
-
- unshift @INC, "$root/perl"; # this IS the right way round!
- unshift @INC, "$root/local";
-
- # try to detect a lockfile (this isn't atomic but
- # should do for now
- $lockfn = "$root/local_data/cluster.lck"; # lock file name
- if (-e $lockfn) {
- open(CLLOCK, "$lockfn") or die "Can't open Lockfile ($lockfn) $!";
- my $pid = <CLLOCK>;
- chomp $pid;
- die "Lockfile ($lockfn) and process $pid exists - cluster must be stopped first\n" if kill 0, $pid;
- close CLLOCK;
- }
-}
-
-use SysVar;
-use DXUser;
-
-if (@ARGV) {
- $main::userfn = shift @ARGV;
- print "user filename now $userfn\n";
-}
-
-package DXUser;
-
-del_file();
-init(1);
-%u = ();
-my $count = 0;
-my $err = 0;
-while (<DATA>) {
- chomp;
- my @f = split /\t/;
- my $ref = asc_decode($f[1]);
- if ($ref) {
- $ref->put();
- $count++;
- DXUser::sync() unless $count % 10000;
- } else {
- print "# Error: $f[0]\t$f[1]\n";
- $err++
- }
-}
-DXUser::sync(); DXUser::finish();
-print "There are $count user records and $err errors\n";
-};
- print $fh "__DATA__\n";
-
- for ($action = R_FIRST; !$dbm->seq($key, $val, $action); $action = R_NEXT) {
- if (!is_callsign($key) || $key =~ /^0/) {
- my $eval = $val;
- my $ekey = $key;
- $eval =~ s/([\%\x00-\x1f\x7f-\xff])/sprintf("%%%02X", ord($1))/eg;
- $ekey =~ s/([\%\x00-\x1f\x7f-\xff])/sprintf("%%%02X", ord($1))/eg;
- LogDbg('DXCommand', "Export Error1: $ekey\t$eval");
- eval {$dbm->del($key)};
- dbg(carp("Export Error1: $ekey\t$eval\n$@")) if $@;
- ++$err;
- next;
- }
- my $ref;
- eval {$ref = decode($val); };
- if ($ref) {
- my $t = $ref->{lastin} || 0;
- if ($ref->is_user && !$ref->{priv} && $main::systime > $t + $tooold) {
- unless ($ref->{lat} && $ref->{long} || $ref->{qth} || $ref->{qra}) {
- eval {$dbm->del($key)};
- dbg(carp("Export Error2: $key\t$val\n$@")) if $@;
- LogDbg('DXCommand', "$ref->{call} deleted, too old");
- $del++;
- next;
- }
+ foreach my $k (sort keys %u) {
+ my $r = $u{$k};
+ if ($r->{sort} eq 'U' && !$r->{priv} && $main::systime > $r->{lastin}+$tooold ) {
+ unless ($r->{lat} || $r->{long} || $r->{qra} || $r->{qth} || $r->{name}) {
+ LogDbg('DXUser', "DXUser::export deleting $k - too old, last in " . cldatetime($r->lastin) . " " . difft([$r->lastin, $main::systime]));
+ delete $u{$k};
+ ++$del;
+ next;
}
- # only store users that are reasonably active or have useful information
- print $fh "$key\t" . $ref->asc_encode($basic_info_only) . "\n";
- ++$count;
- } else {
- LogDbg('DXCommand', "Export Error3: $key\t" . carp($val) ."\n$@");
- eval {$dbm->del($key)};
- dbg(carp("Export Error3: $key\t$val\n$@")) if $@;
- ++$err;
}
- }
+ eval {$val = json_encode($r);};
+ if ($@) {
+ LogDbg('DXUser', "DXUser::export error encoding call: $k $@");
+ ++$err;
+ next;
+ }
+ $fh->print("$k\t$val\n");
+ ++$count;
+ }
$fh->close;
}
my $s = qq{Exported users to $fn - $count Users $del Deleted $err Errors ('sh/log Export' for details)};
- LogDbg('command', $s);
+ LogDbg('DXUser', $s);
return $s;
}
{
my $self = shift;
delete $self->{passwd};
+ $self->put;
}
sub unset_passphrase
{
my $self = shift;
delete $self->{passphrase};
+ $self->put;
}
sub set_believe
my $self = shift;
my $call = uc shift;
$self->{believe} ||= [];
- push @{$self->{believe}}, $call unless grep $_ eq $call, @{$self->{believe}};
+ unless (grep $_ eq $call, @{$self->{believe}}) {
+ push @{$self->{believe}}, $call;
+ $self->put;
+ };
}
sub unset_believe
if (exists $self->{believe}) {
$self->{believe} = [grep {$_ ne $call} @{$self->{believe}}];
delete $self->{believe} unless @{$self->{believe}};
+ $self->put;
}
}
$b->{$call} = shift if @_;
return $b->{$call};
}
+
+#
+# read in the latest version of the user file. As this file is immutable, the file one really wants is
+# a later (generated) copy. But, if the plain users.v4 file is all we have, we'll use that.
+#
+
+sub readinjson
+{
+ my $fn = $filename;
+ my $nfn = "$fn.n";
+ my $ofn = "$fn.o";
+
+ my $ta = [gettimeofday];
+ my $count = 0;
+ my $s;
+ my $err = 0;
+
+ if (-e $nfn && -e $fn && (stat($nfn))[9] > (stat($fn))[9]) {
+ # move the old file to .o
+ unlink $ofn;
+ move($fn, $ofn);
+ move($nfn, $fn);
+ };
+
+ # if we don't have a users.v4 at this point, look for a backup users.v4.o
+ unless (-e $fn) {
+ move($ofn, $fn);
+ }
+ if ($ifh) {
+ $ifh->seek(0, 0);
+ } else {
+ LogDbg("DXUser","DXUser::readinjson: opening $fn as users file");
+ $ifh = IO::File->new("+<$fn") or die "Cannot open $fn ($!)";
+ }
+ my $pos = $ifh->tell;
+ while (<$ifh>) {
+ chomp;
+ my @f = split /\t/;
+ $u{$f[0]} = [$pos];
+ $count++;
+ $pos = $ifh->tell;
+ }
+ $ifh->seek(0, 0);
+
+ # $ifh is "global" and should not be closed
+
+ dbg("DXUser::readinjson $count record headers read from $fn in ". _diffms($ta) . " mS");
+ return $totusers = $count;
+}
+
+#
+# Write a newer copy of the users.v4 file to users.v4.n, which is what will be read in.
+# This means that the existing users.v4 is not touched during a run of dxspider, or at least
+# not yet.
+
+sub writeoutjson
+{
+ my $ofn = shift || "$filename.n";
+ my $ta = [gettimeofday];
+
+ my $ofh = IO::File->new(">$ofn") or die "$ofn write error $!";
+ my $count = 0;
+ $ifh->seek(0, 0);
+ for my $k (sort keys %u) {
+ my $l = get($k, 1);
+ if ($l) {
+ chomp $l;
+ print $ofh "$k\t$l\n";
+ ++$count;
+ } else {
+ LogDbg('DXUser', "DXUser::writeoutjson callsign $k not found")
+ }
+ }
+
+ $ofh->close;
+ dbg("DXUser::writeoutjson $count records written to $ofn in ". _diffms($ta) . " mS");
+ return $count;
+}
1;
__END__