Daemon crash when restarting with in-progress jobs and many workers
This commit is contained in:
		| @@ -5,7 +5,7 @@ use Dancer::Plugin::DBIC 'schema'; | ||||
|  | ||||
| use base 'Exporter'; | ||||
| our @EXPORT = (); | ||||
| our @EXPORT_OK = qw/ add_jobs capacity_for take_jobs reset_jobs/; | ||||
| our @EXPORT_OK = qw/ add_jobs capacity_for take_jobs reset_jobs release_jobs /; | ||||
| our %EXPORT_TAGS = ( all => \@EXPORT_OK ); | ||||
|  | ||||
| schema('daemon')->deploy; | ||||
| @@ -59,4 +59,11 @@ sub reset_jobs { | ||||
|         ->update({wid => 0}); | ||||
| } | ||||
|  | ||||
| # not used by workers, only the daemon when reinitializing a worker | ||||
| sub release_jobs { | ||||
|   my ($jid) = @_; | ||||
|   debug "releasing local job ID $jid"; | ||||
|   $queue->search({job => $jid})->delete; | ||||
| } | ||||
|  | ||||
| 1; | ||||
|   | ||||
| @@ -143,12 +143,19 @@ sub jq_lock { | ||||
|   return $happy; | ||||
| } | ||||
|  | ||||
| # PostgreSQL engine depends on LocalQueue, which is accessed synchronously via | ||||
| # the main daemon process. This is only used by daemon workers which can use | ||||
| # MCE ->do() method. | ||||
| sub jq_defer { | ||||
|   my $job = shift; | ||||
|   my $happy = false; | ||||
|  | ||||
|   # lock db row and update to show job is available | ||||
|   try { | ||||
|     # other local workers are polling the central queue, so | ||||
|     # to prevent a race, first delete the job in our local queue | ||||
|     MCE->do('release_jobs', $job->id); | ||||
|  | ||||
|     # lock db row and update to show job is available | ||||
|     schema('netdisco')->txn_do(sub { | ||||
|       schema('netdisco')->resultset('Admin') | ||||
|         ->find($job->id, {for => 'update'}) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user