public synchronized void jobsCompleted(CcId ccId) { failedJobsCache.asMap().keySet().removeIf(jobId -> jobId.getCcId().equals(ccId)); }
public void updateMaxJobId(JobId jobId) { maxJobIds.computeIfAbsent(jobId.getCcId(), key -> new AtomicLong()) .getAndUpdate(currentMaxId -> Math.max(currentMaxId, jobId.getId())); }
@Override public void reportPartitionWriteCompletion(JobId jobId, ResultSetId rsId, int partition) throws HyracksException { try { LOGGER.trace("Reporting partition write completion: JobId: {}:ResultSetId: {}:partition: {}", jobId, rsId, partition); ncs.getClusterController(jobId.getCcId()).reportResultPartitionWriteCompletion(jobId, rsId, partition); } catch (Exception e) { throw HyracksException.create(e); } }
public synchronized void advertisePartitionRequest(TaskAttemptId taId, Collection<PartitionId> pids, IPartitionCollector collector, PartitionState minState) throws Exception { for (PartitionId pid : pids) { partitionRequestMap.put(pid, collector); PartitionRequest req = new PartitionRequest(pid, nodeController.getId(), taId, minState); nodeController.getClusterController(jobId.getCcId()).registerPartitionRequest(req); } }
@Override public void sendApplicationMessageToCC(byte[] message, DeploymentId deploymentId) throws Exception { this.ncs.sendApplicationMessageToCC(getJobletContext().getJobId().getCcId(), message, deploymentId); }
@Override public void open() throws HyracksDataException { manager.registerPartition(pid, ctx.getJobletContext().getJobId().getCcId(), taId, this, PartitionState.STARTED, false); pendingConnection = true; ensureConnected(); }
@Override protected void doRun() throws Exception { List<JobProfile> profiles = new ArrayList<>(); Map<JobId, Joblet> jobletMap = ncs.getJobletMap(); jobletMap.values().stream().filter(ji -> ji.getJobId().getCcId().equals(ccId)) .forEach(ji -> profiles.add(new JobProfile(ji.getJobId()))); for (JobProfile jProfile : profiles) { Joblet ji; JobletProfile jobletProfile = new JobletProfile(ncs.getId()); ji = jobletMap.get(jProfile.getJobId()); if (ji != null) { ji.dumpProfile(jobletProfile); jProfile.getJobletProfiles().put(ncs.getId(), jobletProfile); } } fv.setValue(profiles); } }
@Override public void sendApplicationMessageToCC(Serializable message, DeploymentId deploymentId) throws Exception { this.ncs.sendApplicationMessageToCC(getJobletContext().getJobId().getCcId(), JavaSerializationUtils.serialize(message), deploymentId); }
private void performCleanup() { nodeController.getJobletMap().remove(jobId); IJobletEventListener listener = getJobletEventListener(); if (listener != null) { listener.jobletFinish(cleanupStatus); } close(); cleanupPending = false; try { nodeController.getClusterController(jobId.getCcId()).notifyJobletCleanup(jobId, nodeController.getId()); } catch (Exception e) { e.printStackTrace(); } }
@Override public void open() throws HyracksDataException { if (LOGGER.isEnabled(openCloseLevel)) { LOGGER.log(openCloseLevel, "open(" + pid + " by " + taId); } size = 0; eos = false; failed = false; deallocated = false; manager.registerPartition(pid, ctx.getJobletContext().getJobId().getCcId(), taId, this, PartitionState.STARTED, false); }
@Override public void registerResultPartitionLocation(JobId jobId, ResultSetId rsId, int partition, int nPartitions, boolean orderedResult, boolean emptyResult) throws HyracksException { try { // Be sure to send the *public* network address to the CC ncs.getClusterController(jobId.getCcId()).registerResultPartitionLocation(jobId, rsId, orderedResult, emptyResult, partition, nPartitions, ncs.getResultNetworkManager().getPublicNetworkAddress()); } catch (Exception e) { throw HyracksException.create(e); } }
@Override protected void doRun() throws Exception { LOGGER.info("Aborting all tasks for controller {}", ccId); IResultPartitionManager resultPartitionManager = ncs.getResultPartitionManager(); if (resultPartitionManager == null) { LOGGER.log(Level.WARN, "ResultPartitionManager is null on " + ncs.getId()); } Deque<Task> abortedTasks = new ArrayDeque<>(); Collection<Joblet> joblets = ncs.getJobletMap().values(); // TODO(mblow): should we have one jobletmap per cc? joblets.stream().filter(joblet -> joblet.getJobId().getCcId().equals(ccId)).forEach(joblet -> { joblet.getTaskMap().values().forEach(task -> { task.abort(); abortedTasks.add(task); }); final JobId jobId = joblet.getJobId(); if (resultPartitionManager != null) { resultPartitionManager.abortReader(jobId); resultPartitionManager.sweep(jobId); } ncs.getWorkQueue().schedule(new CleanupJobletWork(ncs, jobId, JobStatus.FAILURE)); }); ncs.getExecutor().submit(new EnsureAllCcTasksCompleted(ncs, ccId, abortedTasks)); } }
@Override public void close() throws HyracksDataException { if (LOGGER.isInfoEnabled()) { LOGGER.info("close(" + pid + " by " + taId); } if (handle != null) { ctx.getIoManager().close(handle); } if (!failed) { manager.registerPartition(pid, ctx.getJobletContext().getJobId().getCcId(), taId, new MaterializedPartition(ctx, fRef, executor, ctx.getIoManager()), PartitionState.COMMITTED, taId.getAttempt() == 0 ? false : true); } }
@Override public void run() { Exception ex = exceptions.get(0); LOGGER.log(ExceptionUtils.causedByInterrupt(ex) ? Level.DEBUG : Level.WARN, "task " + taskId + " has failed", ex); try { IResultPartitionManager resultPartitionManager = ncs.getResultPartitionManager(); if (resultPartitionManager != null) { resultPartitionManager.abortReader(jobId); } ncs.getClusterController(jobId.getCcId()).notifyTaskFailure(jobId, taskId, ncs.getId(), exceptions); } catch (Exception e) { LOGGER.log(Level.ERROR, "Failure reporting task failure to cluster controller", e); } if (task != null) { task.getJoblet().removeTask(task); } }
private void testOverflow(int id) throws IllegalAccessException { CcId ccId = CcId.valueOf(id); long expected = (long) id << 48; JobIdFactory factory = new JobIdFactory(ccId); AtomicLong theId = (AtomicLong) idField.get(factory); Assert.assertEquals(expected, theId.get()); theId.set((((long) 1 << 48) - 1) | expected); JobId jobId = factory.create(); Assert.assertEquals(ccId, jobId.getCcId()); Assert.assertEquals(CcIdPartitionedLongFactory.MAX_ID, jobId.getIdOnly()); jobId = factory.create(); Assert.assertEquals(ccId, jobId.getCcId()); Assert.assertEquals(0, jobId.getIdOnly()); }
@Test public void testCcIds() { JobIdFactory factory = new JobIdFactory(CcId.valueOf(0)); for (int i = 0; i < 1000; i++) { final JobId jobId = factory.create(); Assert.assertEquals(0, jobId.getCcId().shortValue()); Assert.assertEquals(i, jobId.getIdOnly()); } }
@Override public void run() { TaskProfile taskProfile = new TaskProfile(task.getTaskAttemptId(), task.getPartitionSendProfile(), task.getStatsCollector()); try { ncs.getClusterController(task.getJobletContext().getJobId().getCcId()).notifyTaskComplete( task.getJobletContext().getJobId(), task.getTaskAttemptId(), ncs.getId(), taskProfile); } catch (Exception e) { LOGGER.log(Level.ERROR, "Failed notifying task complete for " + task.getTaskAttemptId(), e); } task.getJoblet().removeTask(task); }
@Test public void testNegativeCcId() { JobIdFactory factory = new JobIdFactory(CcId.valueOf(0xFFFF)); for (int i = 0; i < 1000; i++) { final JobId jobId = factory.create(); Assert.assertEquals((short) 0xFFFF, jobId.getCcId().shortValue()); Assert.assertEquals(i, jobId.getIdOnly()); Assert.assertTrue("JID not negative", jobId.getId() < 0); Assert.assertEquals(0xFFFF000000000000L + i, jobId.getId()); } }