Avoid lock/defer of jobs deined via ACL

This commit adds a table 'device_skip' that is used to restrict job queue
searches to avoid jobs that are not permitted on this backend via *_no ACLs,
or jobs on devices that have previously encountered multiple SNMP timeouts.

When the backend loads or a device is added, a row is added to the table if
that device should not be polled on this backend (together with the job
actions which are to be skipped/denied). When a device SNMP connect fails a
counter in the same row (or a new row) is incremented.

There is also a new report 'SNMP Connect Failures' to show the devices with
non-zero SNMP connect failure counters. A configurable limit in the setting
'max_deferrals' is used to set the threshold of no longer polling the device.

To reset the deferrals/failures count, restart the Netdisco backend (which
regenerates 'device_skip' cache entries).

Squashed commit of the following:

commit b5e32c219d
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 20:55:14 2017 +0100

    show all failed connections in report

commit ffce3cee84
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 20:12:39 2017 +0100

    only resolve fqdn once

commit cc4f680f01
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 20:10:20 2017 +0100

    Revert "only resolve fqdn once"

    This reverts commit 3d136a54de.

commit d8d082b30e
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 20:09:05 2017 +0100

    a report to show SNMP failures

commit 3d136a54de
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 19:37:58 2017 +0100

    only resolve fqdn once

commit 4550b8a84c
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 17:27:43 2017 +0100

    skipover now implicit from deferrals/actionset; fix sql where logic with better correlation

commit b51edbccd2
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 16:11:29 2017 +0100

    only abort lock if action matches badactions

commit 415559b24f
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:56:42 2017 +0100

    set skipover true when adding to actionset

commit 1086f2c467
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:50:56 2017 +0100

    fix empty actionset

commit 31962580b8
Merge: 9b2e993e 6808133b
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:25:08 2017 +0100

    Merge branch 'og-device_skip' of github.com:netdisco/netdisco into og-device_skip

commit 6808133bdb
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:19:54 2017 +0100

    in-job checks for acls are required for netdisco-do foreground actions

commit 3944dd7813
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:18:30 2017 +0100

    avoid extra device lookup

commit 9b2e993e0f
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 12:31:36 2017 +0100

    also delete device_skip rows when deleting device

commit b55854e91d
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 11:34:27 2017 +0100

    actions in device_skip table are now an array/set

commit 5e126eef07
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 09:36:33 2017 +0100

    typo

commit 44266f2767
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 09:14:25 2017 +0100

    *able checks within jobs should not be necessary with skiplist

commit e7c22e7d11
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 08:58:57 2017 +0100

    increment deferrals field when job is deferred

commit 88ae9c00ba
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 08:40:27 2017 +0100

    turn connect fail into defer

commit eac1857043
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 08:26:59 2017 +0100

    rename failures column to be deferrals

commit 96ed444bbb
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Mon May 22 22:52:51 2017 +0100

    set up list of jobs the backend instance should skip

commit 3a0019296d
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Mon May 22 22:01:50 2017 +0100

    separate out is_*able last_* checks

commit cf8589aba2
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Sun May 21 22:35:38 2017 +0100

    change from ignore to skip name

commit ed193356f8
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Sun May 21 14:52:33 2017 +0100

    device_ignore table to track devices to skip in polling
This commit is contained in:
Oliver Gorwits
2017-05-27 08:50:08 +01:00
parent 47a5f40efe
commit 9a72d7e74a
21 changed files with 382 additions and 55 deletions

View File

@@ -23,7 +23,7 @@ sub _set_device_generic {
# snmp connect using rw community
my $info = snmp_connect_rw($ip)
or return job_error("Failed to connect to device [$ip] to update $slot");
or return job_defer("Failed to connect to device [$ip] to update $slot");
my $method = 'set_'. $slot;
my $rv = $info->$method($data);

View File

@@ -75,7 +75,7 @@ sub _set_port_generic {
if ($device->vendor ne 'netdisco') {
# snmp connect using rw community
my $info = snmp_connect_rw($ip)
or return job_error("Failed to connect to device [$ip] to control port");
or return job_defer("Failed to connect to device [$ip] to control port");
my $iid = get_iid($info, $port)
or return job_error("Failed to get port ID for [$pn] from [$ip]");
@@ -128,7 +128,7 @@ sub power {
# snmp connect using rw community
my $info = snmp_connect_rw($ip)
or return job_error("Failed to connect to device [$ip] to control port");
or return job_defer("Failed to connect to device [$ip] to control power");
my $powerid = get_powerid($info, $port)
or return job_error("Failed to get power ID for [$pn] from [$ip]");

View File

@@ -8,7 +8,8 @@ use App::Netdisco::Util::Backend;
use Role::Tiny;
use namespace::clean;
use App::Netdisco::JobQueue qw/jq_locked jq_getsome jq_getsomep jq_lock/;
use App::Netdisco::JobQueue
qw/jq_locked jq_getsome jq_getsomep jq_lock jq_prime_skiplist/;
sub worker_begin {
my $self = shift;
@@ -19,6 +20,9 @@ sub worker_begin {
debug "entering Manager ($wid) worker_begin()";
# rebuild device skip hints
jq_prime_skiplist;
# requeue jobs locally
debug "mgr ($wid): searching for jobs booked to this processing node";
my @jobs = jq_locked;

View File

@@ -1,7 +1,7 @@
package App::Netdisco::Backend::Worker::Poller::Arpnip;
use App::Netdisco::Core::Arpnip 'do_arpnip';
use App::Netdisco::Util::Device 'is_arpnipable';
use App::Netdisco::Util::Device 'is_arpnipable_now';
use Role::Tiny;
use namespace::clean;
@@ -9,7 +9,7 @@ use namespace::clean;
with 'App::Netdisco::Backend::Worker::Poller::Common';
sub arpnip_action { \&do_arpnip }
sub arpnip_filter { \&is_arpnipable }
sub arpnip_filter { \&is_arpnipable_now }
sub arpnip_layer { 3 }
sub arpwalk { (shift)->_walk_body('arpnip', @_) }

View File

@@ -64,7 +64,7 @@ sub _single_body {
my $snmp = snmp_connect($device);
if (!defined $snmp) {
return job_error("$job_type failed: could not SNMP connect to $host");
return job_defer("$job_type failed: could not SNMP connect to $host");
}
unless ($snmp->has_layer( $job_layer )) {

View File

@@ -3,7 +3,7 @@ package App::Netdisco::Backend::Worker::Poller::Device;
use Dancer qw/:moose :syntax :script/;
use App::Netdisco::Util::SNMP 'snmp_connect';
use App::Netdisco::Util::Device qw/get_device is_discoverable/;
use App::Netdisco::Util::Device qw/get_device is_discoverable_now/;
use App::Netdisco::Core::Discover ':all';
use App::Netdisco::Backend::Util ':all';
use App::Netdisco::JobQueue qw/jq_queued jq_insert/;
@@ -54,13 +54,13 @@ sub discover {
return job_done("discover skipped: $host is pseudo-device");
}
unless (is_discoverable($device->ip)) {
unless (is_discoverable_now($device)) {
return job_defer("discover deferred: $host is not discoverable");
}
my $snmp = snmp_connect($device);
if (!defined $snmp) {
return job_error("discover failed: could not SNMP connect to $host");
return job_defer("discover failed: could not SNMP connect to $host");
}
store_device($device, $snmp);

View File

@@ -1,7 +1,7 @@
package App::Netdisco::Backend::Worker::Poller::Macsuck;
use App::Netdisco::Core::Macsuck 'do_macsuck';
use App::Netdisco::Util::Device 'is_macsuckable';
use App::Netdisco::Util::Device 'is_macsuckable_now';
use Role::Tiny;
use namespace::clean;
@@ -9,7 +9,7 @@ use namespace::clean;
with 'App::Netdisco::Backend::Worker::Poller::Common';
sub macsuck_action { \&do_macsuck }
sub macsuck_filter { \&is_macsuckable }
sub macsuck_filter { \&is_macsuckable_now }
sub macsuck_layer { 2 }
sub macwalk { (shift)->_walk_body('macsuck', @_) }

View File

@@ -5,7 +5,7 @@ use Dancer::Plugin::DBIC 'schema';
use App::Netdisco::Core::Nbtstat qw/nbtstat_resolve_async store_nbt/;
use App::Netdisco::Util::Node 'is_nbtstatable';
use App::Netdisco::Util::Device qw/get_device is_discoverable/;
use App::Netdisco::Util::Device qw/get_device is_macsuckable/;
use App::Netdisco::Backend::Util ':all';
use NetAddr::IP::Lite ':lower';
@@ -29,8 +29,8 @@ sub nbtstat {
or job_error("nbtstat failed: unable to interpret device parameter");
my $host = $device->ip;
unless (is_discoverable($device->ip)) {
return job_defer("nbtstat deferred: $host is not discoverable");
unless (is_macsuckable($device)) {
return job_defer("nbtstat deferred: $host is not macsuckable");
}
# get list of nodes on device