Avoid lock/defer of jobs deined via ACL
This commit adds a table 'device_skip' that is used to restrict job queue searches to avoid jobs that are not permitted on this backend via *_no ACLs, or jobs on devices that have previously encountered multiple SNMP timeouts. When the backend loads or a device is added, a row is added to the table if that device should not be polled on this backend (together with the job actions which are to be skipped/denied). When a device SNMP connect fails a counter in the same row (or a new row) is incremented. There is also a new report 'SNMP Connect Failures' to show the devices with non-zero SNMP connect failure counters. A configurable limit in the setting 'max_deferrals' is used to set the threshold of no longer polling the device. To reset the deferrals/failures count, restart the Netdisco backend (which regenerates 'device_skip' cache entries). Squashed commit of the following: commitb5e32c219dAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 20:55:14 2017 +0100 show all failed connections in report commitffce3cee84Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 20:12:39 2017 +0100 only resolve fqdn once commitcc4f680f01Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 20:10:20 2017 +0100 Revert "only resolve fqdn once" This reverts commit3d136a54de. commitd8d082b30eAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 20:09:05 2017 +0100 a report to show SNMP failures commit3d136a54deAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 19:37:58 2017 +0100 only resolve fqdn once commit4550b8a84cAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 17:27:43 2017 +0100 skipover now implicit from deferrals/actionset; fix sql where logic with better correlation commitb51edbccd2Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 16:11:29 2017 +0100 only abort lock if action matches badactions commit415559b24fAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 13:56:42 2017 +0100 set skipover true when adding to actionset commit1086f2c467Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 13:50:56 2017 +0100 fix empty actionset commit31962580b8Merge:9b2e993e6808133bAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 13:25:08 2017 +0100 Merge branch 'og-device_skip' of github.com:netdisco/netdisco into og-device_skip commit6808133bdbAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 13:19:54 2017 +0100 in-job checks for acls are required for netdisco-do foreground actions commit3944dd7813Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 13:18:30 2017 +0100 avoid extra device lookup commit9b2e993e0fAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 12:31:36 2017 +0100 also delete device_skip rows when deleting device commitb55854e91dAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 11:34:27 2017 +0100 actions in device_skip table are now an array/set commit5e126eef07Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 09:36:33 2017 +0100 typo commit44266f2767Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 09:14:25 2017 +0100 *able checks within jobs should not be necessary with skiplist commite7c22e7d11Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 08:58:57 2017 +0100 increment deferrals field when job is deferred commit88ae9c00baAuthor: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 08:40:27 2017 +0100 turn connect fail into defer commiteac1857043Author: Oliver Gorwits <oliver@cpan.org> Date: Tue May 23 08:26:59 2017 +0100 rename failures column to be deferrals commit96ed444bbbAuthor: Oliver Gorwits <oliver@cpan.org> Date: Mon May 22 22:52:51 2017 +0100 set up list of jobs the backend instance should skip commit3a0019296dAuthor: Oliver Gorwits <oliver@cpan.org> Date: Mon May 22 22:01:50 2017 +0100 separate out is_*able last_* checks commitcf8589aba2Author: Oliver Gorwits <oliver@cpan.org> Date: Sun May 21 22:35:38 2017 +0100 change from ignore to skip name commited193356f8Author: Oliver Gorwits <oliver@cpan.org> Date: Sun May 21 14:52:33 2017 +0100 device_ignore table to track devices to skip in polling
This commit is contained in:
@@ -23,7 +23,7 @@ sub _set_device_generic {
|
||||
|
||||
# snmp connect using rw community
|
||||
my $info = snmp_connect_rw($ip)
|
||||
or return job_error("Failed to connect to device [$ip] to update $slot");
|
||||
or return job_defer("Failed to connect to device [$ip] to update $slot");
|
||||
|
||||
my $method = 'set_'. $slot;
|
||||
my $rv = $info->$method($data);
|
||||
|
||||
@@ -75,7 +75,7 @@ sub _set_port_generic {
|
||||
if ($device->vendor ne 'netdisco') {
|
||||
# snmp connect using rw community
|
||||
my $info = snmp_connect_rw($ip)
|
||||
or return job_error("Failed to connect to device [$ip] to control port");
|
||||
or return job_defer("Failed to connect to device [$ip] to control port");
|
||||
|
||||
my $iid = get_iid($info, $port)
|
||||
or return job_error("Failed to get port ID for [$pn] from [$ip]");
|
||||
@@ -128,7 +128,7 @@ sub power {
|
||||
|
||||
# snmp connect using rw community
|
||||
my $info = snmp_connect_rw($ip)
|
||||
or return job_error("Failed to connect to device [$ip] to control port");
|
||||
or return job_defer("Failed to connect to device [$ip] to control power");
|
||||
|
||||
my $powerid = get_powerid($info, $port)
|
||||
or return job_error("Failed to get power ID for [$pn] from [$ip]");
|
||||
|
||||
@@ -8,7 +8,8 @@ use App::Netdisco::Util::Backend;
|
||||
use Role::Tiny;
|
||||
use namespace::clean;
|
||||
|
||||
use App::Netdisco::JobQueue qw/jq_locked jq_getsome jq_getsomep jq_lock/;
|
||||
use App::Netdisco::JobQueue
|
||||
qw/jq_locked jq_getsome jq_getsomep jq_lock jq_prime_skiplist/;
|
||||
|
||||
sub worker_begin {
|
||||
my $self = shift;
|
||||
@@ -19,6 +20,9 @@ sub worker_begin {
|
||||
|
||||
debug "entering Manager ($wid) worker_begin()";
|
||||
|
||||
# rebuild device skip hints
|
||||
jq_prime_skiplist;
|
||||
|
||||
# requeue jobs locally
|
||||
debug "mgr ($wid): searching for jobs booked to this processing node";
|
||||
my @jobs = jq_locked;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package App::Netdisco::Backend::Worker::Poller::Arpnip;
|
||||
|
||||
use App::Netdisco::Core::Arpnip 'do_arpnip';
|
||||
use App::Netdisco::Util::Device 'is_arpnipable';
|
||||
use App::Netdisco::Util::Device 'is_arpnipable_now';
|
||||
|
||||
use Role::Tiny;
|
||||
use namespace::clean;
|
||||
@@ -9,7 +9,7 @@ use namespace::clean;
|
||||
with 'App::Netdisco::Backend::Worker::Poller::Common';
|
||||
|
||||
sub arpnip_action { \&do_arpnip }
|
||||
sub arpnip_filter { \&is_arpnipable }
|
||||
sub arpnip_filter { \&is_arpnipable_now }
|
||||
sub arpnip_layer { 3 }
|
||||
|
||||
sub arpwalk { (shift)->_walk_body('arpnip', @_) }
|
||||
|
||||
@@ -64,7 +64,7 @@ sub _single_body {
|
||||
|
||||
my $snmp = snmp_connect($device);
|
||||
if (!defined $snmp) {
|
||||
return job_error("$job_type failed: could not SNMP connect to $host");
|
||||
return job_defer("$job_type failed: could not SNMP connect to $host");
|
||||
}
|
||||
|
||||
unless ($snmp->has_layer( $job_layer )) {
|
||||
|
||||
@@ -3,7 +3,7 @@ package App::Netdisco::Backend::Worker::Poller::Device;
|
||||
use Dancer qw/:moose :syntax :script/;
|
||||
|
||||
use App::Netdisco::Util::SNMP 'snmp_connect';
|
||||
use App::Netdisco::Util::Device qw/get_device is_discoverable/;
|
||||
use App::Netdisco::Util::Device qw/get_device is_discoverable_now/;
|
||||
use App::Netdisco::Core::Discover ':all';
|
||||
use App::Netdisco::Backend::Util ':all';
|
||||
use App::Netdisco::JobQueue qw/jq_queued jq_insert/;
|
||||
@@ -54,13 +54,13 @@ sub discover {
|
||||
return job_done("discover skipped: $host is pseudo-device");
|
||||
}
|
||||
|
||||
unless (is_discoverable($device->ip)) {
|
||||
unless (is_discoverable_now($device)) {
|
||||
return job_defer("discover deferred: $host is not discoverable");
|
||||
}
|
||||
|
||||
my $snmp = snmp_connect($device);
|
||||
if (!defined $snmp) {
|
||||
return job_error("discover failed: could not SNMP connect to $host");
|
||||
return job_defer("discover failed: could not SNMP connect to $host");
|
||||
}
|
||||
|
||||
store_device($device, $snmp);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package App::Netdisco::Backend::Worker::Poller::Macsuck;
|
||||
|
||||
use App::Netdisco::Core::Macsuck 'do_macsuck';
|
||||
use App::Netdisco::Util::Device 'is_macsuckable';
|
||||
use App::Netdisco::Util::Device 'is_macsuckable_now';
|
||||
|
||||
use Role::Tiny;
|
||||
use namespace::clean;
|
||||
@@ -9,7 +9,7 @@ use namespace::clean;
|
||||
with 'App::Netdisco::Backend::Worker::Poller::Common';
|
||||
|
||||
sub macsuck_action { \&do_macsuck }
|
||||
sub macsuck_filter { \&is_macsuckable }
|
||||
sub macsuck_filter { \&is_macsuckable_now }
|
||||
sub macsuck_layer { 2 }
|
||||
|
||||
sub macwalk { (shift)->_walk_body('macsuck', @_) }
|
||||
|
||||
@@ -5,7 +5,7 @@ use Dancer::Plugin::DBIC 'schema';
|
||||
|
||||
use App::Netdisco::Core::Nbtstat qw/nbtstat_resolve_async store_nbt/;
|
||||
use App::Netdisco::Util::Node 'is_nbtstatable';
|
||||
use App::Netdisco::Util::Device qw/get_device is_discoverable/;
|
||||
use App::Netdisco::Util::Device qw/get_device is_macsuckable/;
|
||||
use App::Netdisco::Backend::Util ':all';
|
||||
|
||||
use NetAddr::IP::Lite ':lower';
|
||||
@@ -29,8 +29,8 @@ sub nbtstat {
|
||||
or job_error("nbtstat failed: unable to interpret device parameter");
|
||||
my $host = $device->ip;
|
||||
|
||||
unless (is_discoverable($device->ip)) {
|
||||
return job_defer("nbtstat deferred: $host is not discoverable");
|
||||
unless (is_macsuckable($device)) {
|
||||
return job_defer("nbtstat deferred: $host is not macsuckable");
|
||||
}
|
||||
|
||||
# get list of nodes on device
|
||||
|
||||
Reference in New Issue
Block a user