if (task.isUnassigned()) { unassigned++; continue; if (task.isUnassigned() && (task.status != FAILURE)) {
private void heartbeat(String path, int new_version, ServerName workerName) { Task task = findOrCreateOrphanTask(path); if (new_version != task.last_version) { if (task.isUnassigned()) { LOG.info("Task " + path + " acquired by " + workerName); } task.heartbeat(EnvironmentEdgeManager.currentTime(), new_version, workerName); SplitLogCounters.tot_mgr_heartbeat.increment(); } else { // duplicate heartbeats - heartbeats w/o zk node version // changing - are possible. The timeout thread does // getDataSetWatch() just to check whether a node still // exists or not } return; }
if (task.isUnassigned()) { unassigned++; continue; if (task.isUnassigned() && (task.status != FAILURE)) {
if (task.isUnassigned()) { unassigned++; continue; if (task.isUnassigned() && (task.status != FAILURE)) {
LOG.debug("failed to resubmit task " + path + " version changed"); task.heartbeatNoDetails(EnvironmentEdgeManager.currentTimeMillis()); return false; LOG.debug("failed to resubmit task " + path + " version changed"); task.heartbeatNoDetails(EnvironmentEdgeManager.currentTimeMillis()); return false; } catch (KeeperException e) { task.unforcedResubmits++; task.setUnassigned(); createRescanNode(Long.MAX_VALUE); tot_mgr_resubmit.incrementAndGet();
.isServerOnline(task.cur_worker_name) : true; if (alive && time < timeout) { LOG.trace("Skipping the resubmit of " + task.toString() + " because the server " + task.cur_worker_name + " is not marked as dead, we waited for " + time + " while the timeout is " + timeout); boolean result = resubmit(this.details.getServerName(), path, version); if (!result) { task.heartbeatNoDetails(EnvironmentEdgeManager.currentTime()); return false; task.unforcedResubmits.incrementAndGet(); task.setUnassigned(); rescan(Long.MAX_VALUE); SplitLogCounters.tot_mgr_resubmit.incrementAndGet();
@Test public void testUnassignedOrphan() throws Exception { LOG.info("TestUnassignedOrphan - an unassigned task is resubmitted at" + " startup"); String tasknode = ZKSplitLog.getEncodedNodeName(zkw, "orphan/test/slash"); //create an unassigned orphan task SplitLogTask slt = new SplitLogTask.Unassigned(master.getServerName()); zkw.getRecoverableZooKeeper().create(tasknode, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); int version = ZKUtil.checkExists(zkw, tasknode); slm = new SplitLogManager(master, conf); waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, to/2); Task task = findOrCreateOrphanTask(tasknode); assertTrue(task.isOrphan()); assertTrue(task.isUnassigned()); // wait for RESCAN node to be created waitForCounter(tot_mgr_rescan, 0, 1, to / 2); Task task2 = findOrCreateOrphanTask(tasknode); assertTrue(task == task2); LOG.debug("task = " + task); assertEquals(1L, tot_mgr_resubmit.sum()); assertEquals(1, task.incarnation.get()); assertEquals(0, task.unforcedResubmits.get()); assertTrue(task.isOrphan()); assertTrue(task.isUnassigned()); assertTrue(ZKUtil.checkExists(zkw, tasknode) > version); }
/** * It is possible for a task to stay in UNASSIGNED state indefinitely - say * SplitLogManager wants to resubmit a task. It forces the task to UNASSIGNED * state but it dies before it could create the RESCAN task node to signal * the SplitLogWorkers to pick up the task. To prevent this scenario the * SplitLogManager resubmits all orphan and UNASSIGNED tasks at startup. * * @param path */ private void handleUnassignedTask(String path) { if (ZKSplitLog.isRescanNode(watcher, path)) { return; } Task task = findOrCreateOrphanTask(path); if (task.isOrphan() && (task.incarnation == 0)) { LOG.info("resubmitting unassigned orphan task " + path); // ignore failure to resubmit. The timeout-monitor will handle it later // albeit in a more crude fashion resubmit(path, task, FORCE); } }
/** * It is possible for a task to stay in UNASSIGNED state indefinitely - say SplitLogManager wants * to resubmit a task. It forces the task to UNASSIGNED state but it dies before it could create * the RESCAN task node to signal the SplitLogWorkers to pick up the task. To prevent this * scenario the SplitLogManager resubmits all orphan and UNASSIGNED tasks at startup. * @param path */ private void handleUnassignedTask(String path) { if (ZKSplitLog.isRescanNode(watcher, path)) { return; } Task task = findOrCreateOrphanTask(path); if (task.isOrphan() && (task.incarnation.get() == 0)) { LOG.info("resubmitting unassigned orphan task " + path); // ignore failure to resubmit. The timeout-monitor will handle it later // albeit in a more crude fashion resubmitTask(path, task, FORCE); } }
@Test public void testOrphanTaskAcquisition() throws Exception { LOG.info("TestOrphanTaskAcquisition"); String tasknode = ZKSplitLog.getEncodedNodeName(zkw, "orphan/test/slash"); SplitLogTask slt = new SplitLogTask.Owned(master.getServerName()); zkw.getRecoverableZooKeeper().create(tasknode, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); slm = new SplitLogManager(master, conf); waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, to/2); Task task = findOrCreateOrphanTask(tasknode); assertTrue(task.isOrphan()); waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); assertFalse(task.isUnassigned()); long curt = System.currentTimeMillis(); assertTrue((task.last_update <= curt) && (task.last_update > (curt - 1000))); LOG.info("waiting for manager to resubmit the orphan task"); waitForCounter(tot_mgr_resubmit, 0, 1, to + to/2); assertTrue(task.isUnassigned()); waitForCounter(tot_mgr_rescan, 0, 1, to + to/2); }
private void heartbeat(String path, int new_version, String workerName) { Task task = findOrCreateOrphanTask(path); if (new_version != task.last_version) { if (task.isUnassigned()) { LOG.info("task " + path + " acquired by " + workerName); } task.heartbeat(EnvironmentEdgeManager.currentTimeMillis(), new_version, workerName); tot_mgr_heartbeat.incrementAndGet(); } else { // duplicate heartbeats - heartbeats w/o zk node version // changing - are possible. The timeout thread does // getDataSetWatch() just to check whether a node still // exists or not } return; }
private void heartbeat(String path, int new_version, ServerName workerName) { Task task = findOrCreateOrphanTask(path); if (new_version != task.last_version) { if (task.isUnassigned()) { LOG.info("task " + path + " acquired by " + workerName); } task.heartbeat(EnvironmentEdgeManager.currentTime(), new_version, workerName); SplitLogCounters.tot_mgr_heartbeat.incrementAndGet(); } else { // duplicate heartbeats - heartbeats w/o zk node version // changing - are possible. The timeout thread does // getDataSetWatch() just to check whether a node still // exists or not } return; }
Task findOrCreateOrphanTask(String path) { Task orphanTask = new Task(); Task task; task = details.getTasks().putIfAbsent(path, orphanTask); if (task == null) { LOG.info("creating orphan task " + path); SplitLogCounters.tot_mgr_orphan_task_acquired.incrementAndGet(); task = orphanTask; } return task; }
Task findOrCreateOrphanTask(String path) { Task orphanTask = new Task(); Task task; task = tasks.putIfAbsent(path, orphanTask); if (task == null) { LOG.info("creating orphan task " + path); SplitLogCounters.tot_mgr_orphan_task_acquired.incrementAndGet(); task = orphanTask; } return task; }
public Task() { last_version = -1; status = IN_PROGRESS; setUnassigned(); }
@Override public void nodeDataChanged(String path) { Task task; task = tasks.get(path); if (task != null || ZKSplitLog.isRescanNode(watcher, path)) { if (task != null) { task.heartbeatNoDetails(EnvironmentEdgeManager.currentTimeMillis()); } getDataSetWatch(path, zkretries); } }
Task() { incarnation = 0; last_version = -1; status = IN_PROGRESS; setUnassigned(); }