/** * @param igniteInstanceName Ignite instance name. * @param io IO Closure for sending messages. */ @SuppressWarnings("BusyWait") public void startSending(String igniteInstanceName, IgniteInClosure2X<T, HadoopMessage> io) { assert snd == null; assert io != null; this.io = io; if (!stripeMappers) { if (!flushed) { snd = new GridWorker(igniteInstanceName, "hadoop-shuffle-" + job.id(), log) { @Override protected void body() throws InterruptedException { try { while (!isCancelled()) { if (throttle > 0) Thread.sleep(throttle); collectUpdatesAndSend(false); } } catch (IgniteCheckedException e) { throw new IllegalStateException(e); } } }; new IgniteThread(snd).start(); } } ioInitLatch.countDown(); }
/** * Creates reducer tasks based on job information. * * @param reducers Reducers (may be {@code null}). * @param job Job instance. * @return Collection of task infos. */ private Collection<HadoopTaskInfo> reducerTasks(int[] reducers, HadoopJobEx job) { UUID locNodeId = ctx.localNodeId(); HadoopJobId jobId = job.id(); JobLocalState state = activeJobs.get(jobId); Collection<HadoopTaskInfo> tasks = null; if (reducers != null) { if (state == null) state = initState(job.id()); for (int rdc : reducers) { if (state.addReducer(rdc)) { if (log.isDebugEnabled()) log.debug("Submitting REDUCE task for execution [locNodeId=" + locNodeId + ", rdc=" + rdc + ']'); HadoopTaskInfo taskInfo = new HadoopTaskInfo(REDUCE, jobId, rdc, 0, null); if (tasks == null) tasks = new ArrayList<>(); tasks.add(taskInfo); } } } return tasks; }
/** * @param taskInfo Task info. * @param gridJob Grid Hadoop job. */ public HadoopTestTaskContext(HadoopTaskInfo taskInfo, HadoopJobEx gridJob) throws IgniteCheckedException { super(taskInfo, gridJob, gridJob.id(), null, jobConfDataInput(gridJob)); }
final HadoopJobId jobId = job.id();
@Override protected void onTaskFinished(HadoopTaskStatus status) { if (log.isDebugEnabled()) log.debug("Finished task execution [jobId=" + job.id() + ", taskInfo=" + info + ", " + "waitTime=" + waitTime() + ", execTime=" + executionTime() + ']'); finalExecutedTasks.remove(this); jobTracker.onTaskFinished(info, status); }
/** * Send updates to concrete remote reducer. * * @param rmtMapIdx Remote map index. * @param flush Flush flag. * @throws IgniteCheckedException If failed. */ private void collectUpdatesAndSend(int rmtMapIdx, boolean flush) throws IgniteCheckedException { final int rmtRdcIdx = stripeMappers ? rmtMapIdx % totalReducerCnt : rmtMapIdx; HadoopMultimap map = rmtMaps.get(rmtMapIdx); if (map == null) return; if (msgs[rmtMapIdx] == null) msgs[rmtMapIdx] = new HadoopShuffleMessage(job.id(), rmtRdcIdx, msgSize); visit(map, rmtMapIdx, rmtRdcIdx); if (flush && msgs[rmtMapIdx].offset() != 0) send(rmtMapIdx, rmtRdcIdx, 0); }
/** * Process shuffle finish response. * * @param src Source. */ public void onShuffleFinishResponse(T src) { if (log.isDebugEnabled()) log.debug("Received shuffle finish response [jobId=" + job.id() + ", src=" + src + ']'); remoteShuffleState(src).onShuffleFinishResponse(); }
@Override public void apply(IgniteInternalFuture<?> gridFut) { assert initGuard.get(); assert req.jobId().equals(job.id()); if (req.reducersAddresses() != null) { if (shuffleJob.initializeReduceAddresses(req.reducersAddresses())) { shuffleJob.startSending("external", new IgniteInClosure2X<HadoopProcessDescriptor, HadoopMessage>() { @Override public void applyx(HadoopProcessDescriptor dest, HadoopMessage msg) throws IgniteCheckedException { comm.sendMessage(dest, msg); } }); } } } });
/** * Generates input data for reduce-like operation into mock context input and runs the operation. * * @param gridJob Job is to create reduce task from. * @param taskType Type of task - combine or reduce. * @param taskNum Number of task in job. * @param words Pairs of words and its counts. * @return Context with mock output. * @throws IgniteCheckedException If fails. */ private HadoopTestTaskContext runTaskWithInput(HadoopJobEx gridJob, HadoopTaskType taskType, int taskNum, String... words) throws IgniteCheckedException { HadoopTaskInfo taskInfo = new HadoopTaskInfo(taskType, gridJob.id(), taskNum, 0, null); HadoopTestTaskContext ctx = new HadoopTestTaskContext(taskInfo, gridJob); for (int i = 0; i < words.length; i+=2) { List<IntWritable> valList = new ArrayList<>(); for (int j = 0; j < Integer.parseInt(words[i + 1]); j++) valList.add(new IntWritable(1)); ctx.mockInput().put(new Text(words[i]), valList); } ctx.run(); return ctx; }
@Override public void apply( IgniteInternalFuture<IgniteBiTuple<Process, HadoopProcessDescriptor>> f) { if (!busyLock.tryReadLock()) return; try { f.get(); proc0.addTasks(tasks); if (log.isDebugEnabled()) log.debug("Sending task execution request to child process [jobId=" + job.id() + ", proc=" + proc0 + ", tasks=" + tasks + ']'); sendExecutionRequest(proc0, job, tasks); } catch (IgniteCheckedException e) { notifyTasksFailed(tasks, FAILED, e); } finally { busyLock.readUnlock(); } } });
/** * Process shuffle finish request. * * @param src Source. * @param msg Shuffle finish message. */ public void onShuffleFinishRequest(T src, HadoopShuffleFinishRequest msg) { if (log.isDebugEnabled()) log.debug("Received shuffle finish request [jobId=" + job.id() + ", src=" + src + ", req=" + msg + ']'); HadoopShuffleLocalState state = localShuffleState(src); if (state.onShuffleFinishMessage(msg.messageCount())) sendFinishResponse(src, msg.jobId()); }
/** * Sends prepare request to remote process. * * @param proc Process to send request to. * @param job Job. * @param plan Map reduce plan. */ private void prepareForJob(HadoopProcess proc, HadoopJobEx job, HadoopMapReducePlan plan) { try { comm.sendMessage(proc.descriptor(), new HadoopPrepareForJobRequest(job.id(), job.info(), plan.reducers(), plan.reducers(ctx.localNodeId()))); } catch (IgniteCheckedException e) { U.error(log, "Failed to send job prepare request to remote process [proc=" + proc + ", job=" + job + ", plan=" + plan + ']', e); proc.terminate(); } }
msgs[rmtMapIdx] = newBufMinSize == 0 ? null : new HadoopShuffleMessage(job.id(), rmtRdcIdx, Math.max(msgSize, newBufMinSize));
/** * Flush remote direct context. * * @param rmtMapIdx Remote map index. * @param rmtDirectCtx Remote direct context. * @param reset Whether to perform reset. */ private void sendShuffleMessage(int rmtMapIdx, @Nullable HadoopDirectDataOutputContext rmtDirectCtx, boolean reset) { if (rmtDirectCtx == null) return; int cnt = rmtDirectCtx.count(); if (cnt == 0) return; int rmtRdcIdx = stripeMappers ? rmtMapIdx % totalReducerCnt : rmtMapIdx; HadoopDirectDataOutputState state = rmtDirectCtx.state(); if (reset) rmtDirectCtx.reset(); HadoopDirectShuffleMessage msg = new HadoopDirectShuffleMessage(job.id(), rmtRdcIdx, cnt, state.buffer(), state.bufferLength(), state.dataLength()); T nodeId = reduceAddrs[rmtRdcIdx]; io.apply(nodeId, msg); remoteShuffleState(nodeId).onShuffleMessage(); }
/** * Runs chain of map-combine task on file block. * * @param fileBlock block of input file to be processed. * @param gridJob Hadoop job implementation. * @return Context of combine task with mock output. * @throws IgniteCheckedException If fails. */ private HadoopTestTaskContext runMapCombineTask(HadoopFileBlock fileBlock, HadoopJobEx gridJob) throws IgniteCheckedException { HadoopTaskInfo taskInfo = new HadoopTaskInfo(HadoopTaskType.MAP, gridJob.id(), 0, 0, fileBlock); HadoopTestTaskContext mapCtx = new HadoopTestTaskContext(taskInfo, gridJob); mapCtx.run(); //Prepare input for combine taskInfo = new HadoopTaskInfo(HadoopTaskType.COMBINE, gridJob.id(), 0, 0, null); HadoopTestTaskContext combineCtx = new HadoopTestTaskContext(taskInfo, gridJob); combineCtx.makeTreeOfWritables(mapCtx.mockOutput()); combineCtx.run(); return combineCtx; }
/** * Sends execution request to remote node. * * @param proc Process to send request to. * @param job Job instance. * @param tasks Collection of tasks to execute in started process. */ private void sendExecutionRequest(HadoopProcess proc, HadoopJobEx job, Collection<HadoopTaskInfo> tasks) throws IgniteCheckedException { // Must synchronize since concurrent process crash may happen and will receive onConnectionLost(). proc.lock(); try { if (proc.terminated()) { notifyTasksFailed(tasks, CRASHED, null); return; } HadoopTaskExecutionRequest req = new HadoopTaskExecutionRequest(); req.jobId(job.id()); req.jobInfo(job.info()); req.tasks(tasks); comm.sendMessage(proc.descriptor(), req); } finally { proc.unlock(); } }