#1084 move slow skiplist build to the first job on running backend

This commit is contained in:
Oliver Gorwits
2023-09-05 22:55:33 +01:00
parent 8b47e7e2f8
commit 3a820c06e4
10 changed files with 180 additions and 77 deletions

View File

@@ -5,6 +5,7 @@ use Dancer qw/:moose :syntax :script/;
use List::Util 'sum';
use App::Netdisco::Util::MCE;
use App::Netdisco::Backend::Job;
use App::Netdisco::JobQueue
qw/jq_locked jq_getsome jq_lock jq_warm_thrusters/;
@@ -21,9 +22,13 @@ sub worker_begin {
debug "entering Manager ($wid) worker_begin()";
# job queue initialisation
debug "mgr ($wid): building acl hints (please be patient...)";
# the expensive parts of this were moved to primeskiplist job
jq_warm_thrusters;
# queue a job to rebuild the device action skip list
$self->{queue}->enqueuep(200,
App::Netdisco::Backend::Job->new({ job => 0, action => 'primeskiplist' }));
# requeue jobs locally
debug "mgr ($wid): searching for jobs booked to this processing node";
my @jobs = jq_locked;
@@ -60,7 +65,7 @@ sub worker_body {
my %seen_job = ();
$num_slots = parse_max_workers( setting('workers')->{tasks} )
- $self->{queue}->pending();
- $self->{queue}->pending();
debug "mgr ($wid): getting potential jobs for $num_slots workers";
foreach my $job ( jq_getsome($num_slots) ) {

View File

@@ -3,11 +3,9 @@ package App::Netdisco::JobQueue::PostgreSQL;
use Dancer qw/:moose :syntax :script/;
use Dancer::Plugin::DBIC 'schema';
use App::Netdisco::Util::Device
qw/get_device is_discoverable is_macsuckable is_arpnipable/;
use App::Netdisco::Util::Device 'get_denied_actions';
use App::Netdisco::Backend::Job;
use Module::Load ();
use JSON::PP ();
use Try::Tiny;
@@ -28,45 +26,8 @@ our @EXPORT_OK = qw/
/;
our %EXPORT_TAGS = ( all => \@EXPORT_OK );
# given a device, tests if any of the primary acls applies
# returns a list of job actions to be denied/skipped on this host.
sub _get_denied_actions {
my $device = shift;
my @badactions = ();
return @badactions unless $device;
$device = get_device($device); # might be no-op but is done in is_* anyway
if ($device->is_pseudo) {
# always let pseudo devices do contact|location|portname|snapshot
# and additionally if there's a snapshot cache, is_discoverable will let
# them do all other discover and high prio actions
push @badactions, ('discover', grep { $_ !~ m/^(?:contact|location|portname|snapshot)$/ }
@{ setting('job_prio')->{high} })
if not is_discoverable($device);
}
else {
push @badactions, ('discover', @{ setting('job_prio')->{high} })
if not is_discoverable($device);
}
push @badactions, (qw/macsuck nbtstat/)
if not is_macsuckable($device);
push @badactions, 'arpnip'
if not is_arpnipable($device);
return @badactions;
}
sub jq_warm_thrusters {
my @devices = schema(vars->{'tenant'})->resultset('Device')->all;
my $rs = schema(vars->{'tenant'})->resultset('DeviceSkip');
my %actionset = ();
foreach my $d (@devices) {
my @badactions = _get_denied_actions($d);
$actionset{$d->ip} = \@badactions if scalar @badactions;
}
schema(vars->{'tenant'})->txn_do(sub {
$rs->search({
@@ -86,19 +47,6 @@ sub jq_warm_thrusters {
actionset => { -value => [] }, # special syntax for matching empty ARRAY
deferrals => 0,
})->delete;
$rs->update_or_create({
backend => setting('workers')->{'BACKEND'},
device => $_,
actionset => $actionset{$_},
}, { key => 'primary' }) for keys %actionset;
# add one faux record to allow *walk actions to see there is a backend running
$rs->update_or_create({
backend => setting('workers')->{'BACKEND'},
device => '255.255.255.255',
last_defer => \'LOCALTIMESTAMP',
}, { key => 'primary' });
});
}
@@ -122,7 +70,7 @@ sub jq_getsome {
# and the skiplist was primed. these should be checked against
# the various acls and have device_skip entry added if needed,
# and return false if it should have been skipped.
my @badactions = _get_denied_actions($job->device);
my @badactions = get_denied_actions($job->device);
if (scalar @badactions) {
schema(vars->{'tenant'})->resultset('DeviceSkip')->find_or_create({
backend => setting('workers')->{'BACKEND'}, device => $job->device,
@@ -205,6 +153,7 @@ sub jq_queued {
sub jq_lock {
my $job = shift;
return true unless $job->id;
my $happy = false;
# lock db row and update to show job has been picked
@@ -251,6 +200,8 @@ sub jq_defer {
},{ key => 'device_skip_pkey' })->increment_deferrals;
}
debug sprintf 'defer: job %s', ($job->id || 'unknown');
# lock db row and update to show job is available
schema(vars->{'tenant'})->resultset('Admin')
->search({ job => $job->id }, { for => 'update' })

View File

@@ -17,6 +17,7 @@ our @EXPORT_OK = qw/
is_discoverable is_discoverable_now
is_arpnipable is_arpnipable_now
is_macsuckable is_macsuckable_now
get_denied_actions
/;
our %EXPORT_TAGS = (all => \@EXPORT_OK);
@@ -329,4 +330,39 @@ sub is_macsuckable_now {
return is_macsuckable(@_);
}
=head2 get_denied_actions( $device )
Checks configured ACLs for the device on this backend and returns list
of actions which are denied.
=cut
sub get_denied_actions {
my $device = shift;
my @badactions = ();
return @badactions unless $device;
$device = get_device($device); # might be no-op but is done in is_* anyway
if ($device->is_pseudo) {
# always let pseudo devices do contact|location|portname|snapshot
# and additionally if there's a snapshot cache, is_discoverable will let
# them do all other discover and high prio actions
push @badactions, ('discover', grep { $_ !~ m/^(?:contact|location|portname|snapshot)$/ }
@{ setting('job_prio')->{high} })
if not is_discoverable($device);
}
else {
push @badactions, ('discover', @{ setting('job_prio')->{high} })
if not is_discoverable($device);
}
push @badactions, (qw/macsuck nbtstat/)
if not is_macsuckable($device);
push @badactions, 'arpnip'
if not is_arpnipable($device);
return @badactions;
}
1;

View File

@@ -7,6 +7,17 @@ use aliased 'App::Netdisco::Worker::Status';
use App::Netdisco::JobQueue 'jq_insert';
use Dancer::Plugin::DBIC 'schema';
register_worker({ phase => 'check' }, sub {
return Status->defer("arpwalk skipped: have not yet primed skiplist")
unless schema(vars->{'tenant'})->resultset('DeviceSkip')
->search({
backend => setting('workers')->{'BACKEND'},
device => '255.255.255.255',
})->count();
return Status->done('Arpwalk is able to run');
});
register_worker({ phase => 'main' }, sub {
my ($job, $workerconf) = @_;

View File

@@ -7,6 +7,17 @@ use aliased 'App::Netdisco::Worker::Status';
use App::Netdisco::JobQueue 'jq_insert';
use Dancer::Plugin::DBIC 'schema';
register_worker({ phase => 'check' }, sub {
return Status->defer("discoverall skipped: have not yet primed skiplist")
unless schema(vars->{'tenant'})->resultset('DeviceSkip')
->search({
backend => setting('workers')->{'BACKEND'},
device => '255.255.255.255',
})->count();
return Status->done('Discoverall is able to run');
});
register_worker({ phase => 'main' }, sub {
my ($job, $workerconf) = @_;

View File

@@ -7,6 +7,17 @@ use aliased 'App::Netdisco::Worker::Status';
use App::Netdisco::JobQueue 'jq_insert';
use Dancer::Plugin::DBIC 'schema';
register_worker({ phase => 'check' }, sub {
return Status->defer("macwalk skipped: have not yet primed skiplist")
unless schema(vars->{'tenant'})->resultset('DeviceSkip')
->search({
backend => setting('workers')->{'BACKEND'},
device => '255.255.255.255',
})->count();
return Status->done('Macwalk is able to run');
});
register_worker({ phase => 'main' }, sub {
my ($job, $workerconf) = @_;

View File

@@ -7,6 +7,17 @@ use aliased 'App::Netdisco::Worker::Status';
use App::Netdisco::JobQueue 'jq_insert';
use Dancer::Plugin::DBIC 'schema';
register_worker({ phase => 'check' }, sub {
return Status->defer("nbtwalk skipped: have not yet primed skiplist")
unless schema(vars->{'tenant'})->resultset('DeviceSkip')
->search({
backend => setting('workers')->{'BACKEND'},
device => '255.255.255.255',
})->count();
return Status->done('Nbtwalk is able to run');
});
register_worker({ phase => 'main' }, sub {
my ($job, $workerconf) = @_;

View File

@@ -0,0 +1,60 @@
package App::Netdisco::Worker::Plugin::PrimeSkiplist;
use Dancer ':syntax';
use Dancer::Plugin::DBIC 'schema';
use App::Netdisco::Worker::Plugin;
use aliased 'App::Netdisco::Worker::Status';
use App::Netdisco::Util::Device 'get_denied_actions';
use App::Netdisco::Backend::Job;
use Try::Tiny;
register_worker({ phase => 'main' }, sub {
my ($job, $workerconf) = @_;
my $happy = false;
my $devices = schema(vars->{'tenant'})->resultset('Device');
my $rs = schema(vars->{'tenant'})->resultset('DeviceSkip');
my %actionset = ();
while (my $d = $devices->next) {
my @badactions = get_denied_actions($d);
$actionset{$d->ip} = \@badactions if scalar @badactions;
}
debug sprintf 'priming device action skip list for %d devices',
scalar keys %actionset;
try {
schema(vars->{'tenant'})->txn_do(sub {
$rs->update_or_create({
backend => setting('workers')->{'BACKEND'},
device => $_,
actionset => $actionset{$_},
}, { key => 'primary' }) for keys %actionset;
});
# add one faux record to allow *walk actions to see there is a backend running
$rs->update_or_create({
backend => setting('workers')->{'BACKEND'},
device => '255.255.255.255',
last_defer => \'LOCALTIMESTAMP',
}, { key => 'primary' });
$happy = true;
}
catch {
error $_;
};
if ($happy) {
return Status->done("Primed device action skip list");
}
else {
return Status->error("Failed to prime device action skip list");
}
});
true;