@Override public Object getApplicationContext() { return ncs.getApplicationContext(); } }
@Override public void run() { try { if (!upsert) { ncs.checkForDuplicateDeployedJobSpec(deployedJobSpecId); } ActivityClusterGraph acg = (ActivityClusterGraph) DeploymentUtils.deserialize(acgBytes, null, ncs.getContext()); ncs.storeActivityClusterGraph(deployedJobSpecId, acg); } catch (HyracksException e) { try { ncs.getClusterController(ccId).notifyDeployedJobSpecFailure(deployedJobSpecId, ncs.getId()); } catch (Exception e1) { e1.printStackTrace(); } } }
@Override protected void doRun() throws Exception { LOGGER.info("Aborting all tasks for controller {}", ccId); IResultPartitionManager resultPartitionManager = ncs.getResultPartitionManager(); if (resultPartitionManager == null) { LOGGER.log(Level.WARN, "ResultPartitionManager is null on " + ncs.getId()); } Deque<Task> abortedTasks = new ArrayDeque<>(); Collection<Joblet> joblets = ncs.getJobletMap().values(); // TODO(mblow): should we have one jobletmap per cc? joblets.stream().filter(joblet -> joblet.getJobId().getCcId().equals(ccId)).forEach(joblet -> { joblet.getTaskMap().values().forEach(task -> { task.abort(); abortedTasks.add(task); }); final JobId jobId = joblet.getJobId(); if (resultPartitionManager != null) { resultPartitionManager.abortReader(jobId); resultPartitionManager.sweep(jobId); } ncs.getWorkQueue().schedule(new CleanupJobletWork(ncs, jobId, JobStatus.FAILURE)); }); ncs.getExecutor().submit(new EnsureAllCcTasksCompleted(ncs, ccId, abortedTasks)); } }
@Override public void run() { try { ncs.removeActivityClusterGraph(deployedJobSpecId); } catch (HyracksException e) { try { ncs.getClusterController(ccId).notifyDeployedJobSpecFailure(deployedJobSpecId, ncs.getId()); } catch (Exception e1) { e1.printStackTrace(); } } }
@Override public void run() { try { ncs.getClusterController(ccId).notifyPingResponse(ncs.getId()); } catch (Exception e) { LOGGER.info("failed to respond to ping from cc {}", ccId, e); } } }
int taskIndex = 0; try { ncs.updateMaxJobId(jobId); NCServiceContext serviceCtx = ncs.getContext(); Joblet joblet = getOrCreateLocalJoblet(deploymentId, serviceCtx, acgBytes); if (ncs.getNodeStatus() != NodeStatus.ACTIVE) { throw HyracksException.create(ErrorCode.NODE_IS_NOT_ACTIVE, ncs.getId()); List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(aid); task = null; task = new Task(joblet, flags, taId, han.getClass().getName(), ncs.getExecutor(), ncs, createInputChannels(td, inputs)); IOperatorNodePushable operator = han.createPushRuntime(task, rdp, partition, td.getPartitionCount()); ExceptionUtils.setNodeIds(exceptions, ncs.getId()); TaskAttemptId taskId = taskDescriptors.get(taskIndex).getTaskAttemptId(); ncs.getWorkQueue().schedule(new NotifyTaskFailureWork(ncs, task, exceptions, jobId, taskId));
public NCMessageBroker(NodeControllerService ncs, MessagingProperties messagingProperties) { this.ncs = ncs; appContext = (INcApplicationContext) ncs.getApplicationContext(); maxMsgSize = messagingProperties.getFrameSize(); int messagingMemoryBudget = messagingProperties.getFrameSize() * messagingProperties.getFrameCount(); messagingFramePool = new ConcurrentFramePool(ncs.getId(), messagingMemoryBudget, messagingProperties.getFrameSize()); receivedMsgsQ = new LinkedBlockingQueue<>(); futureIdGenerator = new AtomicLong(); futureMap = new LongObjectHashMap<>(); MessageDeliveryService msgDeliverySvc = new MessageDeliveryService(); appContext.getThreadExecutor().execute(msgDeliverySvc); }
@Override public void run() { DeploymentStatus status; try { DeploymentUtils.undeploy(deploymentId, ncs.getContext().getJobSerializerDeserializerContainer(), ncs.getServerContext()); status = DeploymentStatus.SUCCEED; } catch (Exception e) { status = DeploymentStatus.FAIL; } try { IClusterController ccs = ncs.getClusterController(ccId); ccs.notifyDeployBinary(deploymentId, ncs.getId(), status); } catch (Exception e) { throw new RuntimeException(e); } }
@Override public IFrameWriter createFrameWriter(int receiverIndex) throws HyracksDataException { return new MaterializingPipelinedPartition(ctx, ncs.getPartitionManager(), new PartitionId(jobId, conn.getConnectorId(), senderIndex, receiverIndex), taId, ncs.getExecutor()); } };
NCShutdownHook(NodeControllerService nodeControllerService) { super("ShutdownHook-" + nodeControllerService.getId()); this.nodeControllerService = nodeControllerService; }
public static void main(String[] args) { try { final String nodeId = ConfigUtils.getOptionValue(args, NCConfig.Option.NODE_ID); final ConfigManager configManager = new ConfigManager(args); INCApplication application = getApplication(args); application.registerConfig(configManager); NCConfig ncConfig = new NCConfig(nodeId, configManager); LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration cfg = ctx.getConfiguration(); NCLogConfigurationFactory logCfgFactory = new NCLogConfigurationFactory(ncConfig); ConfigurationFactory.setConfigurationFactory(logCfgFactory); configManager.processConfig(); cfg.removeLogger("Console"); ctx.start(logCfgFactory.getConfiguration(ctx, ConfigurationSource.NULL_SOURCE)); final NodeControllerService ncService = new NodeControllerService(ncConfig, application); ncService.start(); while (true) { Thread.sleep(10000); } } catch (CmdLineException e) { LOGGER.log(Level.DEBUG, "Exception parsing command line: " + Arrays.toString(args), e); System.exit(2); } catch (Exception e) { LOGGER.error("Exiting NCDriver due to exception", e); System.exit(1); } }
final ConnectorDescriptorId codId = new ConnectorDescriptorId(1); final PartitionId pid = new PartitionId(ctx.getJobletContext().getJobId(), codId, 1, 1); final ChannelControlBlock ccb = ncs.getNetworkManager() .connect(NetworkingUtil.getSocketAddress(ncs.getNetworkManager().getLocalNetworkAddress())); final NetworkOutputChannel networkOutputChannel = new NetworkOutputChannel(ccb, 0); final MaterializingPipelinedPartition mpp = new MaterializingPipelinedPartition(ctx, ncs.getPartitionManager(), pid, taId, ncs.getExecutor()); mpp.open();
public static void send(CcId ccId, NodeControllerService cs, NodeStatus nodeStatus, SystemState systemState) throws HyracksDataException { try { RegistrationTasksRequestMessage msg = new RegistrationTasksRequestMessage(cs.getId(), nodeStatus, systemState); ((INCMessageBroker) cs.getContext().getMessageBroker()).sendMessageToCC(ccId, msg); } catch (Exception e) { LOGGER.log(Level.ERROR, "Unable to send RegistrationTasksRequestMessage to CC", e); throw HyracksDataException.create(e); } }
MuxDemuxPerformanceCounters netPC = ncs.getNetworkManager().getPerformanceCounters(); hbData.netPayloadBytesRead = netPC.getPayloadBytesRead(); hbData.netPayloadBytesWritten = netPC.getPayloadBytesWritten(); hbData.netSignalingBytesWritten = netPC.getSignalingBytesWritten(); MuxDemuxPerformanceCounters resultNetPC = ncs.getResultNetworkManager().getPerformanceCounters(); hbData.resultNetPayloadBytesRead = resultNetPC.getPayloadBytesRead(); hbData.resultNetPayloadBytesWritten = resultNetPC.getPayloadBytesWritten(); hbData.resultNetSignalingBytesWritten = resultNetPC.getSignalingBytesWritten(); IPCPerformanceCounters ipcPC = ncs.getIpcSystem().getPerformanceCounters(); hbData.ipcMessagesSent = ipcPC.getMessageSentCount(); hbData.ipcMessageBytesSent = ipcPC.getMessageBytesSent(); hbData.numCores = Runtime.getRuntime().availableProcessors(); ncs.getNodeControllerData().notifyHeartbeat(hbData);
case SEND_APPLICATION_MESSAGE: CCNCFunctions.SendApplicationMessageFunction amf = (CCNCFunctions.SendApplicationMessageFunction) fn; ncs.getWorkQueue().schedule( new ApplicationMessageWork(ncs, amf.getMessage(), amf.getDeploymentId(), amf.getNodeId())); return; case START_TASKS: CCNCFunctions.StartTasksFunction stf = (CCNCFunctions.StartTasksFunction) fn; ncs.getWorkQueue() .schedule(new StartTasksWork(ncs, stf.getDeploymentId(), stf.getJobId(), stf.getPlanBytes(), stf.getTaskDescriptors(), stf.getConnectorPolicies(), stf.getFlags(), case ABORT_TASKS: CCNCFunctions.AbortTasksFunction atf = (CCNCFunctions.AbortTasksFunction) fn; ncs.getWorkQueue().schedule(new AbortTasksWork(ncs, atf.getJobId(), atf.getTasks())); return; case ABORT_ALL_JOBS: CCNCFunctions.AbortCCJobsFunction aajf = (CCNCFunctions.AbortCCJobsFunction) fn; ncs.getWorkQueue().schedule(new AbortAllJobsWork(ncs, aajf.getCcId())); return; case CLEANUP_JOBLET: CCNCFunctions.CleanupJobletFunction cjf = (CCNCFunctions.CleanupJobletFunction) fn; ncs.getWorkQueue().schedule(new CleanupJobletWork(ncs, cjf.getJobId(), cjf.getStatus())); return; case REPORT_PARTITION_AVAILABILITY: CCNCFunctions.ReportPartitionAvailabilityFunction rpaf = (CCNCFunctions.ReportPartitionAvailabilityFunction) fn; ncs.getWorkQueue().schedule( new ReportPartitionAvailabilityWork(ncs, rpaf.getPartitionId(), rpaf.getNetworkAddress())); return;
private void performCleanup() { nodeController.getJobletMap().remove(jobId); IJobletEventListener listener = getJobletEventListener(); if (listener != null) { listener.jobletFinish(cleanupStatus); } close(); cleanupPending = false; try { nodeController.getClusterController(jobId.getCcId()).notifyJobletCleanup(jobId, nodeController.getId()); } catch (Exception e) { e.printStackTrace(); } }
opts.forEach(opt -> ncConfigManager.set(nodeId, opt.getLeft(), opt.getRight())); nodeControllers .add(new NodeControllerService(fixupPaths(createNCConfig(nodeId, ncConfigManager)), ncApplication)); List<Thread> startupThreads = new ArrayList<>(); for (NodeControllerService nc : nodeControllers) { Thread ncStartThread = new Thread("IntegrationUtil-" + nc.getId()) { @Override public void run() {
public static NcLocalCounters collect(CcId ccId, NodeControllerService ncs) throws HyracksDataException { final INcApplicationContext appContext = (INcApplicationContext) ncs.getApplicationContext(); long maxResourceId = Math.max(appContext.getLocalResourceRepository().maxId(), MetadataIndexImmutableProperties.FIRST_AVAILABLE_USER_DATASET_ID); long maxTxnId = appContext.getMaxTxnId(); long maxJobId = ncs.getMaxJobId(ccId); return new NcLocalCounters(maxResourceId, maxTxnId, maxJobId); }
@Override public void sendMessageToCC(CcId ccId, ICcAddressedMessage message) throws Exception { ncs.sendApplicationMessageToCC(ccId, JavaSerializationUtils.serialize(message), null); }
@Override public void run() { Exception ex = exceptions.get(0); LOGGER.log(ExceptionUtils.causedByInterrupt(ex) ? Level.DEBUG : Level.WARN, "task " + taskId + " has failed", ex); try { IResultPartitionManager resultPartitionManager = ncs.getResultPartitionManager(); if (resultPartitionManager != null) { resultPartitionManager.abortReader(jobId); } ncs.getClusterController(jobId.getCcId()).notifyTaskFailure(jobId, taskId, ncs.getId(), exceptions); } catch (Exception e) { LOGGER.log(Level.ERROR, "Failure reporting task failure to cluster controller", e); } if (task != null) { task.getJoblet().removeTask(task); } }