we had situations where the manager would start workers on the same job, either because of race conditions or because at the time of queueing it wasn't known that the jobs were targeting the same device (due to device aliases). this commit removes duplicate jobs, reduces the need for locking on the job queue, and makes use of lldpRemChassisId to try to deduplicate jobs before they are started. in effect we have several goes to prevent duplicate jobs: 1. at neighbor discovery time we try to skip queueing same lldpRemChassisId 2. at job selection we 'error out' jobs with same profile as job selected 3. at job selection we check for running job with same profile as selected 4. the job manager process also checks for duplicate job profiles 5. at job lock we abort if the job was 'errored out' all together this seems to work well. a test on a large university network of 303 devices (four core routers and the rest edge routers, runing VRF with many duplicate identities), ~1200 subnets, ~50k hosts, resulted in no DB deadlock or contention and a complete discover+arpnip+macsuck (909 jobs) in ~3 minutes (with ~150 duplicate jobs identified and skipped).
165 lines
4.1 KiB
Perl
165 lines
4.1 KiB
Perl
use utf8;
|
|
package App::Netdisco::DB::Result::Admin;
|
|
|
|
# Created by DBIx::Class::Schema::Loader
|
|
# DO NOT MODIFY THE FIRST PART OF THIS FILE
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use base 'DBIx::Class::Core';
|
|
__PACKAGE__->table("admin");
|
|
__PACKAGE__->add_columns(
|
|
"job",
|
|
{
|
|
data_type => "integer",
|
|
is_auto_increment => 1,
|
|
is_nullable => 0,
|
|
sequence => "admin_job_seq",
|
|
},
|
|
"entered",
|
|
{
|
|
data_type => "timestamp",
|
|
default_value => \"current_timestamp",
|
|
is_nullable => 1,
|
|
original => { default_value => \"now()" },
|
|
},
|
|
"started",
|
|
{ data_type => "timestamp", is_nullable => 1 },
|
|
"finished",
|
|
{ data_type => "timestamp", is_nullable => 1 },
|
|
"device",
|
|
{ data_type => "inet", is_nullable => 1 },
|
|
"port",
|
|
{ data_type => "text", is_nullable => 1 },
|
|
"action",
|
|
{ data_type => "text", is_nullable => 1 },
|
|
"subaction",
|
|
{ data_type => "text", is_nullable => 1 },
|
|
"status",
|
|
{ data_type => "text", is_nullable => 1 },
|
|
"username",
|
|
{ data_type => "text", is_nullable => 1 },
|
|
"userip",
|
|
{ data_type => "inet", is_nullable => 1 },
|
|
"log",
|
|
{ data_type => "text", is_nullable => 1 },
|
|
"debug",
|
|
{ data_type => "boolean", is_nullable => 1 },
|
|
"device_key",
|
|
{ data_type => "text", is_nullable => 1 },
|
|
);
|
|
|
|
|
|
# Created by DBIx::Class::Schema::Loader v0.07015 @ 2012-01-07 14:20:02
|
|
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:gW4JW4pMgrufFIxFeYPYpw
|
|
|
|
__PACKAGE__->set_primary_key("job");
|
|
|
|
# You can replace this text with custom code or comments, and it will be preserved on regeneration
|
|
|
|
=head1 RELATIONSHIPS
|
|
|
|
=head2 device_skips( $backend?, $max_deferrals?, $retry_after? )
|
|
|
|
Retuns the set of C<device_skip> entries which apply to this job. They match
|
|
the device IP, current backend, and job action.
|
|
|
|
You probably want to use the ResultSet method C<skipped> which completes this
|
|
query with a C<backend> host, C<max_deferrals>, and C<retry_after> parameters
|
|
(or sensible defaults).
|
|
|
|
=cut
|
|
|
|
__PACKAGE__->has_many( device_skips => 'App::Netdisco::DB::Result::DeviceSkip',
|
|
sub {
|
|
my $args = shift;
|
|
return {
|
|
"$args->{foreign_alias}.backend" => { '=' => \'?' },
|
|
"$args->{foreign_alias}.device"
|
|
=> { -ident => "$args->{self_alias}.device" },
|
|
-or => [
|
|
"$args->{foreign_alias}.actionset"
|
|
=> { '@>' => \"string_to_array($args->{self_alias}.action,'')" },
|
|
-and => [
|
|
"$args->{foreign_alias}.deferrals" => { '>=' => \'?' },
|
|
"$args->{foreign_alias}.last_defer" =>
|
|
{ '>', \'(LOCALTIMESTAMP - ?::interval)' },
|
|
],
|
|
],
|
|
};
|
|
},
|
|
{ cascade_copy => 0, cascade_update => 0, cascade_delete => 0 }
|
|
);
|
|
|
|
=head2 target
|
|
|
|
Returns the single C<device> to which this Job entry was associated.
|
|
|
|
The JOIN is of type LEFT, in case the C<device> is not in the database.
|
|
|
|
=cut
|
|
|
|
__PACKAGE__->belongs_to( target => 'App::Netdisco::DB::Result::Device',
|
|
{ 'foreign.ip' => 'self.device' }, { join_type => 'LEFT' } );
|
|
|
|
=head1 METHODS
|
|
|
|
=head2 summary
|
|
|
|
An attempt to make a meaningful statement about the job.
|
|
|
|
=cut
|
|
|
|
sub summary {
|
|
my $job = shift;
|
|
return join ' ',
|
|
$job->action,
|
|
($job->device || ''),
|
|
($job->port || '');
|
|
# ($job->subaction ? (q{'}. $job->subaction .q{'}) : '');
|
|
}
|
|
|
|
=head1 ADDITIONAL COLUMNS
|
|
|
|
=head2 entererd_stamp
|
|
|
|
Formatted version of the C<entered> field, accurate to the minute.
|
|
|
|
The format is somewhat like ISO 8601 or RFC3339 but without the middle C<T>
|
|
between the date stamp and time stamp. That is:
|
|
|
|
2012-02-06 12:49
|
|
|
|
=cut
|
|
|
|
sub entered_stamp { return (shift)->get_column('entered_stamp') }
|
|
|
|
=head2 started_stamp
|
|
|
|
Formatted version of the C<started> field, accurate to the minute.
|
|
|
|
The format is somewhat like ISO 8601 or RFC3339 but without the middle C<T>
|
|
between the date stamp and time stamp. That is:
|
|
|
|
2012-02-06 12:49
|
|
|
|
=cut
|
|
|
|
sub started_stamp { return (shift)->get_column('started_stamp') }
|
|
|
|
=head2 finished_stamp
|
|
|
|
Formatted version of the C<finished> field, accurate to the minute.
|
|
|
|
The format is somewhat like ISO 8601 or RFC3339 but without the middle C<T>
|
|
between the date stamp and time stamp. That is:
|
|
|
|
2012-02-06 12:49
|
|
|
|
=cut
|
|
|
|
sub finished_stamp { return (shift)->get_column('finished_stamp') }
|
|
|
|
1;
|