Avoid lock/defer of jobs deined via ACL

This commit adds a table 'device_skip' that is used to restrict job queue
searches to avoid jobs that are not permitted on this backend via *_no ACLs,
or jobs on devices that have previously encountered multiple SNMP timeouts.

When the backend loads or a device is added, a row is added to the table if
that device should not be polled on this backend (together with the job
actions which are to be skipped/denied). When a device SNMP connect fails a
counter in the same row (or a new row) is incremented.

There is also a new report 'SNMP Connect Failures' to show the devices with
non-zero SNMP connect failure counters. A configurable limit in the setting
'max_deferrals' is used to set the threshold of no longer polling the device.

To reset the deferrals/failures count, restart the Netdisco backend (which
regenerates 'device_skip' cache entries).

Squashed commit of the following:

commit b5e32c219d
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 20:55:14 2017 +0100

    show all failed connections in report

commit ffce3cee84
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 20:12:39 2017 +0100

    only resolve fqdn once

commit cc4f680f01
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 20:10:20 2017 +0100

    Revert "only resolve fqdn once"

    This reverts commit 3d136a54de.

commit d8d082b30e
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 20:09:05 2017 +0100

    a report to show SNMP failures

commit 3d136a54de
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 19:37:58 2017 +0100

    only resolve fqdn once

commit 4550b8a84c
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 17:27:43 2017 +0100

    skipover now implicit from deferrals/actionset; fix sql where logic with better correlation

commit b51edbccd2
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 16:11:29 2017 +0100

    only abort lock if action matches badactions

commit 415559b24f
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:56:42 2017 +0100

    set skipover true when adding to actionset

commit 1086f2c467
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:50:56 2017 +0100

    fix empty actionset

commit 31962580b8
Merge: 9b2e993e 6808133b
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:25:08 2017 +0100

    Merge branch 'og-device_skip' of github.com:netdisco/netdisco into og-device_skip

commit 6808133bdb
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:19:54 2017 +0100

    in-job checks for acls are required for netdisco-do foreground actions

commit 3944dd7813
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 13:18:30 2017 +0100

    avoid extra device lookup

commit 9b2e993e0f
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 12:31:36 2017 +0100

    also delete device_skip rows when deleting device

commit b55854e91d
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 11:34:27 2017 +0100

    actions in device_skip table are now an array/set

commit 5e126eef07
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 09:36:33 2017 +0100

    typo

commit 44266f2767
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 09:14:25 2017 +0100

    *able checks within jobs should not be necessary with skiplist

commit e7c22e7d11
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 08:58:57 2017 +0100

    increment deferrals field when job is deferred

commit 88ae9c00ba
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 08:40:27 2017 +0100

    turn connect fail into defer

commit eac1857043
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Tue May 23 08:26:59 2017 +0100

    rename failures column to be deferrals

commit 96ed444bbb
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Mon May 22 22:52:51 2017 +0100

    set up list of jobs the backend instance should skip

commit 3a0019296d
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Mon May 22 22:01:50 2017 +0100

    separate out is_*able last_* checks

commit cf8589aba2
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Sun May 21 22:35:38 2017 +0100

    change from ignore to skip name

commit ed193356f8
Author: Oliver Gorwits <oliver@cpan.org>
Date:   Sun May 21 14:52:33 2017 +0100

    device_ignore table to track devices to skip in polling
This commit is contained in:
Oliver Gorwits
2017-05-27 08:50:08 +01:00
parent 47a5f40efe
commit 9a72d7e74a
21 changed files with 382 additions and 55 deletions

View File

@@ -56,6 +56,36 @@ __PACKAGE__->set_primary_key("job");
# You can replace this text with custom code or comments, and it will be preserved on regeneration
=head1 RELATIONSHIPS
=head2 device_skips( $backend?, $max_deferrals? )
Retuns the set of C<device_skip> entries which apply to this job. They match
the device IP, current backend, and job action.
You probably want to use the ResultSet method C<skipped> which completes this
query with a C<backend> host and C<max_deferrals> parameters (or sensible
defaults).
=cut
__PACKAGE__->has_many( device_skips => 'App::Netdisco::DB::Result::DeviceSkip',
sub {
my $args = shift;
return {
"$args->{foreign_alias}.backend" => { '=' => \'?' },
"$args->{foreign_alias}.device"
=> { -ident => "$args->{self_alias}.device" },
-or => [
{ "$args->{foreign_alias}.actionset"
=> { '@>' => \"string_to_array($args->{self_alias}.action,'')" } },
{ "$args->{foreign_alias}.deferrals" => { '>=' => \'?' } },
],
};
},
{ cascade_copy => 0, cascade_update => 0, cascade_delete => 0 }
);
=head1 METHODS
=head2 summary

View File

@@ -0,0 +1,55 @@
use utf8;
package App::Netdisco::DB::Result::DeviceSkip;
use strict;
use warnings;
use List::MoreUtils ();
use base 'DBIx::Class::Core';
__PACKAGE__->table("device_skip");
__PACKAGE__->add_columns(
"backend",
{ data_type => "text", is_nullable => 0 },
"device",
{ data_type => "inet", is_nullable => 0 },
"actionset",
{ data_type => "text[]", is_nullable => 0, default_value => '{}' },
"deferrals",
{ data_type => "integer", is_nullable => 1, default_value => '0' },
);
__PACKAGE__->set_primary_key("backend", "device");
__PACKAGE__->add_unique_constraint(
device_skip_pkey => [qw/backend device/]);
=head1 METHODS
=head2 increment_deferrals
Increments the C<deferrals> field in the row, only if the row is in storage.
There is a race in the update, but this is not worrying for now.
=cut
sub increment_deferrals {
my $row = shift;
return unless $row->in_storage;
return $row->update({ deferrals => (($row->deferrals || 0) + 1) });
}
=head2 add_to_actionset
=cut
sub add_to_actionset {
my ($row, @badactions) = @_;
return unless $row->in_storage;
return unless scalar @badactions;
return $row->update({ actionset =>
[ sort (List::MoreUtils::uniq( @{ $row->actionset || [] }, @badactions )) ]
});
}
1;

View File

@@ -5,8 +5,11 @@ use warnings;
use base 'DBIx::Class::ResultSet';
__PACKAGE__->load_components(
qw{Helper::ResultSet::SetOperations Helper::ResultSet::Shortcut});
__PACKAGE__->load_components(qw/
Helper::ResultSet::SetOperations
Helper::ResultSet::Shortcut
Helper::ResultSet::CorrelateRelationship
/);
=head1 ADDITIONAL METHODS

View File

@@ -4,12 +4,34 @@ use base 'App::Netdisco::DB::ResultSet';
use strict;
use warnings;
use Net::Domain 'hostfqdn';
__PACKAGE__->load_components(qw/
+App::Netdisco::DB::ExplicitLocking
/);
=head1 ADDITIONAL METHODS
=head2 skipped
Retuns a correlated subquery for the set of C<device_skip> entries that apply
to some jobs. They match the device IP, current backend, and job action.
Pass the C<backend> FQDN (or the current host will be used as a default), and
the C<max_deferrals> (or 10 will be used as the default).
=cut
sub skipped {
my ($rs, $backend, $max_deferrals) = @_;
$backend ||= (hostfqdn || 'localhost');
$max_deferrals ||= 10;
return $rs->correlate('device_skips')->search(undef, {
bind => [[deferrals => $max_deferrals], [backend => $backend]],
});
}
=head2 with_times
This is a modifier for any C<search()> (including the helpers below) which

View File

@@ -596,9 +596,14 @@ sub delete {
)->delete;
}
$schema->resultset('Admin')->search({
device => { '-in' => $devices->as_query },
})->delete;
foreach my $set (qw/
Admin
DeviceSkip
/) {
$schema->resultset($set)->search(
{ device => { '-in' => $devices->as_query } },
)->delete;
}
$schema->resultset('Topology')->search({
-or => [