/** {@inheritDoc} */ @Override public ClusterNode getBalancedNode(List<ClusterNode> top) { return loadMgr.getBalancedNode(taskSes, top, jobRes.getJob()); }
/** {@inheritDoc} */ @Override public ClusterNode failover(FailoverContext ctx, List<ClusterNode> top) { failedOverJobs.add(ctx.getJobResult().getJobContext()); // Clear failed nodes list - allow to failover on the same node. ctx.getJobResult().getJobContext().setAttribute(FAILED_NODE_LIST_ATTR, null); // Account for maximum number of failover attempts since we clear failed node list. Integer failoverCnt = ctx.getJobResult().getJobContext().getAttribute(FAILOVER_NUMBER_ATTR); if (failoverCnt == null) ctx.getJobResult().getJobContext().setAttribute(FAILOVER_NUMBER_ATTR, 1); else { if (failoverCnt >= getMaximumFailoverAttempts()) { U.warn(log, "Job failover failed because number of maximum failover attempts is exceeded " + "[failedJob=" + ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + getMaximumFailoverAttempts() + ']'); return null; } ctx.getJobResult().getJobContext().setAttribute(FAILOVER_NUMBER_ATTR, failoverCnt + 1); } List<ClusterNode> cp = new ArrayList<>(top); // Keep collection type. F.retain(cp, false, new IgnitePredicate<ClusterNode>() { @Override public boolean apply(ClusterNode node) { return F.isAll(node, filter); } }); return super.failover(ctx, cp); //use cp to ensure we don't failover on failed node }
" is exceeded [failedJob=" + ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + maxFailoverAttempts + ']'); ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + maxFailoverAttempts + ']'); ", oldNode=" + ctx.getJobResult().getNode().id() + ", sesId=" + ctx.getTaskSession().getId() + ", job=" + ctx.getJobResult().getJob() + ", jobCtx=" + ctx.getJobResult().getJobContext() + ", task=" + ctx.getTaskSession().getTaskName() + ']');
/** {@inheritDoc} */ @Override public ClusterNode failover(FailoverContext ctx, List<ClusterNode> top) { U.warn(log, "Returning 'null' node for failed job (failover will not happen) [job=" + ctx.getJobResult().getJob() + ", task=" + ctx.getTaskSession().getTaskName() + ", sessionId=" + ctx.getTaskSession().getId() + ']'); return null; }
ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + maxFailoverAttempts + ']'); ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + maxFailoverAttempts + ']'); ", oldNode=" + ctx.getJobResult().getNode().id() + ", sesId=" + ctx.getTaskSession().getId() + ", job=" + ctx.getJobResult().getJob() + ", jobCtx=" + ctx.getJobResult().getJobContext() + ", task=" + ctx.getTaskSession().getTaskName() + ']'); ", oldNode=" + ctx.getJobResult().getNode().id() + ", sesId=" + ctx.getTaskSession().getId() + ", job=" + ctx.getJobResult().getJob() + ", jobCtx=" + ctx.getJobResult().getJobContext() + ", task=" + ctx.getTaskSession().getTaskName() + ']');
assert !done; PlatformAbstractJob job = res.getJob();
", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']'); ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class); ses.getUserVersion(), ses.getTaskClassName(), loc ? null : U.marshal(marsh, res.getJob()), loc ? res.getJob() : null, ses.getStartTime(), timeout, ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class); U.warn(log, "Job timed out prior to sending job execution request: " + res.getJob());
/** {@inheritDoc} */ @Override public ClusterNode getBalancedNode(List<ClusterNode> top) { return loadMgr.getBalancedNode(taskSes, top, jobRes.getJob()); }
" is exceeded [failedJob=" + ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + maxFailoverAttempts + ']'); ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + maxFailoverAttempts + ']'); ", oldNode=" + ctx.getJobResult().getNode().id() + ", sesId=" + ctx.getTaskSession().getId() + ", job=" + ctx.getJobResult().getJob() + ", jobCtx=" + ctx.getJobResult().getJobContext() + ", task=" + ctx.getTaskSession().getTaskName() + ']');
/** {@inheritDoc} */ @Override public ClusterNode failover(FailoverContext ctx, List<ClusterNode> top) { U.warn(log, "Returning 'null' node for failed job (failover will not happen) [job=" + ctx.getJobResult().getJob() + ", task=" + ctx.getTaskSession().getTaskName() + ", sessionId=" + ctx.getTaskSession().getId() + ']'); return null; }
ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + maxFailoverAttempts + ']'); ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + maxFailoverAttempts + ']'); ", oldNode=" + ctx.getJobResult().getNode().id() + ", sesId=" + ctx.getTaskSession().getId() + ", job=" + ctx.getJobResult().getJob() + ", jobCtx=" + ctx.getJobResult().getJobContext() + ", task=" + ctx.getTaskSession().getTaskName() + ']'); ", oldNode=" + ctx.getJobResult().getNode().id() + ", sesId=" + ctx.getTaskSession().getId() + ", job=" + ctx.getJobResult().getJob() + ", jobCtx=" + ctx.getJobResult().getJobContext() + ", task=" + ctx.getTaskSession().getTaskName() + ']');
assert !done; PlatformAbstractJob job = res.getJob();
", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']'); ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class); ses.getUserVersion(), ses.getTaskClassName(), loc ? null : U.marshal(marsh, res.getJob()), loc ? res.getJob() : null, ses.getStartTime(), timeout, ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class); U.warn(log, "Job timed out prior to sending job execution request: " + res.getJob());