/** * Increases priority if job has bumped down. * * @param jobs Ordered collection of collision contexts for jobs that are currently waiting * for execution. */ private void bumpPriority(List<GridCollisionJobContextWrapper> jobs) { int starvationInc = this.starvationInc; for (int i = 0; i < jobs.size(); i++) { GridCollisionJobContextWrapper wrapper = jobs.get(i); if (i > wrapper.originalIndex()) wrapper.getContext().getJobContext() .setAttribute(jobPriAttrKey, getJobPriority(wrapper.getContext()) + starvationInc); } }
/** {@inheritDoc} */ @Override public Serializable execute() { UUID locNodeId = ignite.configuration().getNodeId(); jobCtx.setAttribute("nodeId", locNodeId); jobCtx.setAttribute("jobId", jobCtx.getJobId()); Map<String, String> attrs = new HashMap<>(10); for (int i = 0; i < 10; i++) { String s = jobCtx.getJobId().toString() + i; attrs.put(s, s); } jobCtx.setAttributes(attrs); assert jobCtx.getAttribute("nodeId").equals(locNodeId); assert jobCtx.getAttributes().get("nodeId").equals(locNodeId); assert jobCtx.getAttributes().keySet().containsAll(attrs.keySet()); assert jobCtx.getAttributes().values().containsAll(attrs.values()); return null; } });
waitCtx.getJobContext().setAttribute(THIEF_NODE_ATTR, nodeId); waitCtx.getJobContext().setAttribute(STEALING_ATTEMPT_COUNT_ATTR, stealingCnt + 1); waitCtx.getJobContext().setAttribute(STEALING_PRIORITY_ATTR, pri + 1); log.debug("Failed to reject job [i=" + i + ']'); waitCtx.getJobContext().setAttribute(THIEF_NODE_ATTR, null); waitCtx.getJobContext().setAttribute(STEALING_ATTEMPT_COUNT_ATTR, stealingCnt); waitCtx.getJobContext().setAttribute(STEALING_PRIORITY_ATTR, pri);
/** {@inheritDoc} */ @Override public void run() { Integer attempt = jobCtx.getAttribute(ATTR_ATTEMPT); if (attempt == null) attempt = 1; assertEquals(ignite.affinity(NON_DFLT_CACHE_NAME).mapKeyToNode(key), ignite.cluster().localNode()); jobCtx.setAttribute(ATTR_ATTEMPT, attempt + 1); if (attempt < callAttempt) throw new ComputeJobFailoverException("Failover exception."); else assertEquals(callAttempt, attempt); } }
/** {@inheritDoc} */ @Override public Object call() throws IgniteCheckedException { Integer attempt = jobCtx.getAttribute(ATTR_ATTEMPT); if (attempt == null) attempt = 1; assertEquals(ignite.affinity(NON_DFLT_CACHE_NAME).mapKeyToNode(key), ignite.cluster().localNode()); jobCtx.setAttribute(ATTR_ATTEMPT, attempt + 1); if (attempt < callAttempt) throw new ComputeJobFailoverException("Failover exception."); else return attempt; } }
/** * @throws Exception If test failed. */ @Test public void testFailover() throws Exception { ClusterNode rmt = getSpiContext().remoteNodes().iterator().next(); GridTestJobResult failed = new GridTestJobResult(rmt); failed.getJobContext().setAttribute(JobStealingCollisionSpi.THIEF_NODE_ATTR, getSpiContext().localNode().id()); ClusterNode other = getSpi().failover(new GridFailoverTestContext(new GridTestTaskSession(), failed), Collections.singletonList(getSpiContext().remoteNodes().iterator().next())); assert other == rmt : "Invalid failed-over node: " + other; }
jobCtx.setAttribute(ATTR_HELD, true);
/** * @throws Exception If test failed. */ @Test public void testMaxHopsExceeded() throws Exception { ClusterNode rmt = getSpiContext().remoteNodes().iterator().next(); GridTestJobResult failed = new GridTestJobResult(rmt); failed.getJobContext().setAttribute(THIEF_NODE_ATTR, getSpiContext().localNode().id()); failed.getJobContext().setAttribute(FAILOVER_ATTEMPT_COUNT_ATTR, getSpi().getMaximumFailoverAttempts()); ClusterNode other = getSpi().failover(new GridFailoverTestContext(new GridTestTaskSession(), failed), new ArrayList<>(getSpiContext().nodes())); assert other == null; }
/** * @throws Exception If test failed. */ @Test public void testMaxHopsExceededThiefNotSet() throws Exception { ClusterNode rmt = getSpiContext().remoteNodes().iterator().next(); GridTestJobResult failed = new GridTestJobResult(rmt); failed.getJobContext().setAttribute(FAILOVER_ATTEMPT_COUNT_ATTR, getSpi().getMaximumFailoverAttempts()); ClusterNode other = getSpi().failover(new GridFailoverTestContext(new GridTestTaskSession(), failed), new ArrayList<>(getSpiContext().nodes())); assert other == null; }
/** {@inheritDoc} */ @Override public ClusterNode failover(FailoverContext ctx, List<ClusterNode> top) { failedOverJobs.add(ctx.getJobResult().getJobContext()); // Clear failed nodes list - allow to failover on the same node. ctx.getJobResult().getJobContext().setAttribute(FAILED_NODE_LIST_ATTR, null); // Account for maximum number of failover attempts since we clear failed node list. Integer failoverCnt = ctx.getJobResult().getJobContext().getAttribute(FAILOVER_NUMBER_ATTR); if (failoverCnt == null) ctx.getJobResult().getJobContext().setAttribute(FAILOVER_NUMBER_ATTR, 1); else { if (failoverCnt >= getMaximumFailoverAttempts()) { U.warn(log, "Job failover failed because number of maximum failover attempts is exceeded " + "[failedJob=" + ctx.getJobResult().getJob() + ", maxFailoverAttempts=" + getMaximumFailoverAttempts() + ']'); return null; } ctx.getJobResult().getJobContext().setAttribute(FAILOVER_NUMBER_ATTR, failoverCnt + 1); } List<ClusterNode> cp = new ArrayList<>(top); // Keep collection type. F.retain(cp, false, new IgnitePredicate<ClusterNode>() { @Override public boolean apply(ClusterNode node) { return F.isAll(node, filter); } }); return super.failover(ctx, cp); //use cp to ensure we don't failover on failed node }
/** * @throws Exception If test failed. */ @Test public void testThiefEqualsVictim() throws Exception { ClusterNode rmt = getSpiContext().remoteNodes().iterator().next(); GridTestJobResult failed = new GridTestJobResult(rmt); failed.getJobContext().setAttribute(THIEF_NODE_ATTR, rmt.id()); ClusterNode other = getSpi().failover(new GridFailoverTestContext(new GridTestTaskSession(), failed), new ArrayList<>(getSpiContext().nodes())); assert other != null; assert other != rmt; assert other.equals(getSpiContext().localNode()); checkAttributes(failed.getJobContext(), rmt, 1); }
/** * @throws Exception If test failed. */ @Test public void testThiefNotInTopology() throws Exception { ClusterNode rmt = new GridTestNode(UUID.randomUUID()); GridTestJobResult failed = new GridTestJobResult(rmt); failed.getJobContext().setAttribute(THIEF_NODE_ATTR, rmt.id()); ClusterNode other = getSpi().failover(new GridFailoverTestContext(new GridTestTaskSession(), failed), new ArrayList<>(getSpiContext().nodes())); assert other != null; assert other != rmt; assert getSpiContext().nodes().contains(other); checkAttributes(failed.getJobContext(), rmt, 1); }
ctx.getJobResult().getJobContext().setAttribute(AFFINITY_CALL_ATTEMPT, affCallAttempt + 1); else { ctx.getJobResult().getJobContext().setAttribute(FAILED_NODE_LIST_ATTR, failedNodes);
/** * @throws Exception If test failed. */ @Test public void testFailover() throws Exception { ClusterNode rmt = getSpiContext().remoteNodes().iterator().next(); GridTestJobResult failed = new GridTestJobResult(rmt); failed.getJobContext().setAttribute(THIEF_NODE_ATTR, getSpiContext().localNode().id()); ClusterNode other = getSpi().failover(new GridFailoverTestContext(new GridTestTaskSession(), failed), new ArrayList<>(getSpiContext().nodes())); assert other == getSpiContext().localNode(); // This is not a failover but stealing. checkAttributes(failed.getJobContext(), null, 0); }
ctx.getJobResult().getJobContext().setAttribute(THIEF_NODE_ATTR, null); ctx.getJobResult().getJobContext().setAttribute(FAILED_NODE_LIST_ATTR, failedNodes); ctx.getJobResult().getJobContext().setAttribute(FAILOVER_ATTEMPT_COUNT_ATTR, failoverCnt);
/** * @throws Exception If test failed. */ @Test public void testNonZeroFailoverCount() throws Exception { ClusterNode rmt = getSpiContext().remoteNodes().iterator().next(); GridTestJobResult failed = new GridTestJobResult(rmt); failed.getJobContext().setAttribute(FAILOVER_ATTEMPT_COUNT_ATTR, getSpi().getMaximumFailoverAttempts() - 1); ClusterNode other = getSpi().failover(new GridFailoverTestContext(new GridTestTaskSession(), failed), new ArrayList<>(getSpiContext().nodes())); assert other != null; assert other != rmt; assert other == getSpiContext().localNode(); checkAttributes(failed.getJobContext(), rmt, getSpi().getMaximumFailoverAttempts()); }
excluded.getJobContext().setAttribute(STEALING_ATTEMPT_COUNT_ATTR, 1);
/** * Increases priority if job has bumped down. * * @param jobs Ordered collection of collision contexts for jobs that are currently waiting * for execution. */ private void bumpPriority(List<GridCollisionJobContextWrapper> jobs) { int starvationInc = this.starvationInc; for (int i = 0; i < jobs.size(); i++) { GridCollisionJobContextWrapper wrapper = jobs.get(i); if (i > wrapper.originalIndex()) wrapper.getContext().getJobContext() .setAttribute(jobPriAttrKey, getJobPriority(wrapper.getContext()) + starvationInc); } }
ctx.getJobResult().getJobContext().setAttribute(AFFINITY_CALL_ATTEMPT, affCallAttempt + 1); else { ctx.getJobResult().getJobContext().setAttribute(FAILED_NODE_LIST_ATTR, failedNodes);
ctx.getJobResult().getJobContext().setAttribute(THIEF_NODE_ATTR, null); ctx.getJobResult().getJobContext().setAttribute(FAILED_NODE_LIST_ATTR, failedNodes); ctx.getJobResult().getJobContext().setAttribute(FAILOVER_ATTEMPT_COUNT_ATTR, failoverCnt);