From 3a820c06e4293165c148320398fd8b576c744f52 Mon Sep 17 00:00:00 2001 From: Oliver Gorwits Date: Tue, 5 Sep 2023 22:55:33 +0100 Subject: [PATCH] #1084 move slow skiplist build to the first job on running backend --- bin/netdisco-backend-fg | 3 +- lib/App/Netdisco/Backend/Role/Manager.pm | 9 ++- lib/App/Netdisco/JobQueue/PostgreSQL.pm | 59 ++---------------- lib/App/Netdisco/Util/Device.pm | 36 +++++++++++ lib/App/Netdisco/Worker/Plugin/Arpwalk.pm | 11 ++++ lib/App/Netdisco/Worker/Plugin/DiscoverAll.pm | 11 ++++ lib/App/Netdisco/Worker/Plugin/Macwalk.pm | 11 ++++ lib/App/Netdisco/Worker/Plugin/Nbtwalk.pm | 11 ++++ .../Netdisco/Worker/Plugin/PrimeSkiplist.pm | 60 +++++++++++++++++++ share/config.yml | 46 +++++++------- 10 files changed, 180 insertions(+), 77 deletions(-) create mode 100644 lib/App/Netdisco/Worker/Plugin/PrimeSkiplist.pm diff --git a/bin/netdisco-backend-fg b/bin/netdisco-backend-fg index 8437795a..f869d169 100755 --- a/bin/netdisco-backend-fg +++ b/bin/netdisco-backend-fg @@ -35,7 +35,8 @@ use Role::Tiny::With; use MCE::Signal '-setpgrp'; use MCE::Flow Sereal => 1; -use MCE::Queue; +use MCE::Queue porder => $MCE::Queue::HIGHEST, + type => $MCE::Queue::FIFO; # set temporary MCE files' location in home directory my $home = ($ENV{NETDISCO_HOME} || $ENV{HOME}); diff --git a/lib/App/Netdisco/Backend/Role/Manager.pm b/lib/App/Netdisco/Backend/Role/Manager.pm index 589371b2..ce8b5301 100644 --- a/lib/App/Netdisco/Backend/Role/Manager.pm +++ b/lib/App/Netdisco/Backend/Role/Manager.pm @@ -5,6 +5,7 @@ use Dancer qw/:moose :syntax :script/; use List::Util 'sum'; use App::Netdisco::Util::MCE; +use App::Netdisco::Backend::Job; use App::Netdisco::JobQueue qw/jq_locked jq_getsome jq_lock jq_warm_thrusters/; @@ -21,9 +22,13 @@ sub worker_begin { debug "entering Manager ($wid) worker_begin()"; # job queue initialisation - debug "mgr ($wid): building acl hints (please be patient...)"; + # the expensive parts of this were moved to primeskiplist job jq_warm_thrusters; + # queue a job to rebuild the device action skip list + $self->{queue}->enqueuep(200, + App::Netdisco::Backend::Job->new({ job => 0, action => 'primeskiplist' })); + # requeue jobs locally debug "mgr ($wid): searching for jobs booked to this processing node"; my @jobs = jq_locked; @@ -60,7 +65,7 @@ sub worker_body { my %seen_job = (); $num_slots = parse_max_workers( setting('workers')->{tasks} ) - - $self->{queue}->pending(); + - $self->{queue}->pending(); debug "mgr ($wid): getting potential jobs for $num_slots workers"; foreach my $job ( jq_getsome($num_slots) ) { diff --git a/lib/App/Netdisco/JobQueue/PostgreSQL.pm b/lib/App/Netdisco/JobQueue/PostgreSQL.pm index 42912516..4d68181c 100644 --- a/lib/App/Netdisco/JobQueue/PostgreSQL.pm +++ b/lib/App/Netdisco/JobQueue/PostgreSQL.pm @@ -3,11 +3,9 @@ package App::Netdisco::JobQueue::PostgreSQL; use Dancer qw/:moose :syntax :script/; use Dancer::Plugin::DBIC 'schema'; -use App::Netdisco::Util::Device - qw/get_device is_discoverable is_macsuckable is_arpnipable/; +use App::Netdisco::Util::Device 'get_denied_actions'; use App::Netdisco::Backend::Job; -use Module::Load (); use JSON::PP (); use Try::Tiny; @@ -28,45 +26,8 @@ our @EXPORT_OK = qw/ /; our %EXPORT_TAGS = ( all => \@EXPORT_OK ); -# given a device, tests if any of the primary acls applies -# returns a list of job actions to be denied/skipped on this host. -sub _get_denied_actions { - my $device = shift; - my @badactions = (); - return @badactions unless $device; - $device = get_device($device); # might be no-op but is done in is_* anyway - - if ($device->is_pseudo) { - # always let pseudo devices do contact|location|portname|snapshot - # and additionally if there's a snapshot cache, is_discoverable will let - # them do all other discover and high prio actions - push @badactions, ('discover', grep { $_ !~ m/^(?:contact|location|portname|snapshot)$/ } - @{ setting('job_prio')->{high} }) - if not is_discoverable($device); - } - else { - push @badactions, ('discover', @{ setting('job_prio')->{high} }) - if not is_discoverable($device); - } - - push @badactions, (qw/macsuck nbtstat/) - if not is_macsuckable($device); - - push @badactions, 'arpnip' - if not is_arpnipable($device); - - return @badactions; -} - sub jq_warm_thrusters { - my @devices = schema(vars->{'tenant'})->resultset('Device')->all; my $rs = schema(vars->{'tenant'})->resultset('DeviceSkip'); - my %actionset = (); - - foreach my $d (@devices) { - my @badactions = _get_denied_actions($d); - $actionset{$d->ip} = \@badactions if scalar @badactions; - } schema(vars->{'tenant'})->txn_do(sub { $rs->search({ @@ -86,19 +47,6 @@ sub jq_warm_thrusters { actionset => { -value => [] }, # special syntax for matching empty ARRAY deferrals => 0, })->delete; - - $rs->update_or_create({ - backend => setting('workers')->{'BACKEND'}, - device => $_, - actionset => $actionset{$_}, - }, { key => 'primary' }) for keys %actionset; - - # add one faux record to allow *walk actions to see there is a backend running - $rs->update_or_create({ - backend => setting('workers')->{'BACKEND'}, - device => '255.255.255.255', - last_defer => \'LOCALTIMESTAMP', - }, { key => 'primary' }); }); } @@ -122,7 +70,7 @@ sub jq_getsome { # and the skiplist was primed. these should be checked against # the various acls and have device_skip entry added if needed, # and return false if it should have been skipped. - my @badactions = _get_denied_actions($job->device); + my @badactions = get_denied_actions($job->device); if (scalar @badactions) { schema(vars->{'tenant'})->resultset('DeviceSkip')->find_or_create({ backend => setting('workers')->{'BACKEND'}, device => $job->device, @@ -205,6 +153,7 @@ sub jq_queued { sub jq_lock { my $job = shift; + return true unless $job->id; my $happy = false; # lock db row and update to show job has been picked @@ -251,6 +200,8 @@ sub jq_defer { },{ key => 'device_skip_pkey' })->increment_deferrals; } + debug sprintf 'defer: job %s', ($job->id || 'unknown'); + # lock db row and update to show job is available schema(vars->{'tenant'})->resultset('Admin') ->search({ job => $job->id }, { for => 'update' }) diff --git a/lib/App/Netdisco/Util/Device.pm b/lib/App/Netdisco/Util/Device.pm index 143101e4..16843203 100644 --- a/lib/App/Netdisco/Util/Device.pm +++ b/lib/App/Netdisco/Util/Device.pm @@ -17,6 +17,7 @@ our @EXPORT_OK = qw/ is_discoverable is_discoverable_now is_arpnipable is_arpnipable_now is_macsuckable is_macsuckable_now + get_denied_actions /; our %EXPORT_TAGS = (all => \@EXPORT_OK); @@ -329,4 +330,39 @@ sub is_macsuckable_now { return is_macsuckable(@_); } +=head2 get_denied_actions( $device ) + +Checks configured ACLs for the device on this backend and returns list +of actions which are denied. + +=cut + +sub get_denied_actions { + my $device = shift; + my @badactions = (); + return @badactions unless $device; + $device = get_device($device); # might be no-op but is done in is_* anyway + + if ($device->is_pseudo) { + # always let pseudo devices do contact|location|portname|snapshot + # and additionally if there's a snapshot cache, is_discoverable will let + # them do all other discover and high prio actions + push @badactions, ('discover', grep { $_ !~ m/^(?:contact|location|portname|snapshot)$/ } + @{ setting('job_prio')->{high} }) + if not is_discoverable($device); + } + else { + push @badactions, ('discover', @{ setting('job_prio')->{high} }) + if not is_discoverable($device); + } + + push @badactions, (qw/macsuck nbtstat/) + if not is_macsuckable($device); + + push @badactions, 'arpnip' + if not is_arpnipable($device); + + return @badactions; +} + 1; diff --git a/lib/App/Netdisco/Worker/Plugin/Arpwalk.pm b/lib/App/Netdisco/Worker/Plugin/Arpwalk.pm index 28e3a7b6..78bd108b 100644 --- a/lib/App/Netdisco/Worker/Plugin/Arpwalk.pm +++ b/lib/App/Netdisco/Worker/Plugin/Arpwalk.pm @@ -7,6 +7,17 @@ use aliased 'App::Netdisco::Worker::Status'; use App::Netdisco::JobQueue 'jq_insert'; use Dancer::Plugin::DBIC 'schema'; +register_worker({ phase => 'check' }, sub { + return Status->defer("arpwalk skipped: have not yet primed skiplist") + unless schema(vars->{'tenant'})->resultset('DeviceSkip') + ->search({ + backend => setting('workers')->{'BACKEND'}, + device => '255.255.255.255', + })->count(); + + return Status->done('Arpwalk is able to run'); +}); + register_worker({ phase => 'main' }, sub { my ($job, $workerconf) = @_; diff --git a/lib/App/Netdisco/Worker/Plugin/DiscoverAll.pm b/lib/App/Netdisco/Worker/Plugin/DiscoverAll.pm index 597f65d9..8b1d3cb0 100644 --- a/lib/App/Netdisco/Worker/Plugin/DiscoverAll.pm +++ b/lib/App/Netdisco/Worker/Plugin/DiscoverAll.pm @@ -7,6 +7,17 @@ use aliased 'App::Netdisco::Worker::Status'; use App::Netdisco::JobQueue 'jq_insert'; use Dancer::Plugin::DBIC 'schema'; +register_worker({ phase => 'check' }, sub { + return Status->defer("discoverall skipped: have not yet primed skiplist") + unless schema(vars->{'tenant'})->resultset('DeviceSkip') + ->search({ + backend => setting('workers')->{'BACKEND'}, + device => '255.255.255.255', + })->count(); + + return Status->done('Discoverall is able to run'); +}); + register_worker({ phase => 'main' }, sub { my ($job, $workerconf) = @_; diff --git a/lib/App/Netdisco/Worker/Plugin/Macwalk.pm b/lib/App/Netdisco/Worker/Plugin/Macwalk.pm index f6fd569c..db5b315b 100644 --- a/lib/App/Netdisco/Worker/Plugin/Macwalk.pm +++ b/lib/App/Netdisco/Worker/Plugin/Macwalk.pm @@ -7,6 +7,17 @@ use aliased 'App::Netdisco::Worker::Status'; use App::Netdisco::JobQueue 'jq_insert'; use Dancer::Plugin::DBIC 'schema'; +register_worker({ phase => 'check' }, sub { + return Status->defer("macwalk skipped: have not yet primed skiplist") + unless schema(vars->{'tenant'})->resultset('DeviceSkip') + ->search({ + backend => setting('workers')->{'BACKEND'}, + device => '255.255.255.255', + })->count(); + + return Status->done('Macwalk is able to run'); +}); + register_worker({ phase => 'main' }, sub { my ($job, $workerconf) = @_; diff --git a/lib/App/Netdisco/Worker/Plugin/Nbtwalk.pm b/lib/App/Netdisco/Worker/Plugin/Nbtwalk.pm index 9ff3731a..b2aa1eff 100644 --- a/lib/App/Netdisco/Worker/Plugin/Nbtwalk.pm +++ b/lib/App/Netdisco/Worker/Plugin/Nbtwalk.pm @@ -7,6 +7,17 @@ use aliased 'App::Netdisco::Worker::Status'; use App::Netdisco::JobQueue 'jq_insert'; use Dancer::Plugin::DBIC 'schema'; +register_worker({ phase => 'check' }, sub { + return Status->defer("nbtwalk skipped: have not yet primed skiplist") + unless schema(vars->{'tenant'})->resultset('DeviceSkip') + ->search({ + backend => setting('workers')->{'BACKEND'}, + device => '255.255.255.255', + })->count(); + + return Status->done('Nbtwalk is able to run'); +}); + register_worker({ phase => 'main' }, sub { my ($job, $workerconf) = @_; diff --git a/lib/App/Netdisco/Worker/Plugin/PrimeSkiplist.pm b/lib/App/Netdisco/Worker/Plugin/PrimeSkiplist.pm new file mode 100644 index 00000000..6137e471 --- /dev/null +++ b/lib/App/Netdisco/Worker/Plugin/PrimeSkiplist.pm @@ -0,0 +1,60 @@ +package App::Netdisco::Worker::Plugin::PrimeSkiplist; + +use Dancer ':syntax'; +use Dancer::Plugin::DBIC 'schema'; + +use App::Netdisco::Worker::Plugin; +use aliased 'App::Netdisco::Worker::Status'; + +use App::Netdisco::Util::Device 'get_denied_actions'; +use App::Netdisco::Backend::Job; + +use Try::Tiny; + +register_worker({ phase => 'main' }, sub { + my ($job, $workerconf) = @_; + my $happy = false; + + my $devices = schema(vars->{'tenant'})->resultset('Device'); + my $rs = schema(vars->{'tenant'})->resultset('DeviceSkip'); + my %actionset = (); + + while (my $d = $devices->next) { + my @badactions = get_denied_actions($d); + $actionset{$d->ip} = \@badactions if scalar @badactions; + } + + debug sprintf 'priming device action skip list for %d devices', + scalar keys %actionset; + + try { + schema(vars->{'tenant'})->txn_do(sub { + $rs->update_or_create({ + backend => setting('workers')->{'BACKEND'}, + device => $_, + actionset => $actionset{$_}, + }, { key => 'primary' }) for keys %actionset; + }); + + # add one faux record to allow *walk actions to see there is a backend running + $rs->update_or_create({ + backend => setting('workers')->{'BACKEND'}, + device => '255.255.255.255', + last_defer => \'LOCALTIMESTAMP', + }, { key => 'primary' }); + + $happy = true; + } + catch { + error $_; + }; + + if ($happy) { + return Status->done("Primed device action skip list"); + } + else { + return Status->error("Failed to prime device action skip list"); + } +}); + +true; diff --git a/share/config.yml b/share/config.yml index 37530181..964ffd4a 100644 --- a/share/config.yml +++ b/share/config.yml @@ -456,6 +456,7 @@ workers: # this one takes ages snapshot_timeout: 1200 +primeskiplist_timeout: 1200 # 50 minutes jobs_stale_after: 3000 @@ -485,27 +486,27 @@ schedule: job_prio: high: - - contact - - hook::exec - - hook::http - - location - - portcontrol - - portname - - power - - snapshot - - vlan - - delete + - 'contact' + - 'delete' + - 'hook::exec' + - 'hook::http' + - 'location' + - 'portcontrol' + - 'portname' + - 'power' + - 'snapshot' + - 'vlan' normal: - - arpnip - - arpwalk - - discover - - discoverall - - expire - - macsuck - - macwalk - - nbtstat - - nbtwalk - - stats + - 'arpnip' + - 'arpwalk' + - 'discover' + - 'discoverall' + - 'expire' + - 'macsuck' + - 'macwalk' + - 'nbtstat' + - 'nbtwalk' + - 'stats' worker_plugins: - 'Internal::SNMPFastDiscover' @@ -559,6 +560,7 @@ worker_plugins: - 'PortControl' - 'PortName' - 'Power' + - 'PrimeSkiplist' - 'Psql' - 'Renumber' - 'Show' @@ -579,6 +581,10 @@ driver_priority: deferrable_actions: - 'snapshot' + - 'nbtwalk' + - 'macwalk' + - 'arpwalk' + - 'discoverall' # --------------- # GraphViz Export