@Override public void initialize() { this.context = getContext(); ByteBuffer payload = context.getUserPayload().getPayload(); CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(); DataInputByteBuffer dibb = new DataInputByteBuffer(); dibb.reset(payload); try { vertexConf.readFields(dibb); } catch (IOException e) { throw new RuntimeException(e); } this.numBuckets = vertexConf.getNumBuckets(); this.mainWorkName = vertexConf.getInputName(); this.vertexType = vertexConf.getVertexType(); this.numInputsAffectingRootInputSpecUpdate = vertexConf.getNumInputs(); }
for (Entry<String, EdgeProperty> edgeEntry : context.getInputVertexEdgeProperties().entrySet()) { if (edgeEntry.getValue().getDataMovementType() == DataMovementType.CUSTOM && edgeEntry.getValue().getEdgeManagerDescriptor().getClassName() context.addRootInputEvents(inputName, taskEvents);
@Override public void onVertexStarted(Map<String, List<Integer>> completions) { int numTasks = context.getVertexNumTasks(context.getVertexName()); List<VertexManagerPluginContext.TaskWithLocationHint> scheduledTasks = new ArrayList<VertexManagerPluginContext.TaskWithLocationHint>(numTasks); for (int i = 0; i < numTasks; ++i) { scheduledTasks.add(new VertexManagerPluginContext.TaskWithLocationHint(new Integer(i), null)); } context.scheduleVertexTasks(scheduledTasks); }
int totalResource = context.getTotalAvailableResource().getMemory(); int taskResource = context.getVertexTaskResource().getMemory(); float waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES,
private void configure() { Preconditions.checkState(!configured.get(), "Vertex: " + getContext().getVertexName()); int numManagedTasks = getContext().getVertexNumTasks(getContext().getVertexName()); LOG.info("Managing " + numManagedTasks + " tasks for vertex: " + getContext().getVertexName()); Map<String, EdgeProperty> edges = getContext().getInputVertexEdgeProperties(); int oneToOneSrcTaskCount = 0; numOneToOneEdges = 0; EdgeProperty edgeProp = entry.getValue(); String srcVertex = entry.getKey(); int numSrcTasks = getContext().getVertexNumTasks(srcVertex); switch (edgeProp.getDataMovementType()) { case CUSTOM: .checkState(oneToOneSrcTaskCount >= 0, "Vertex: " + getContext().getVertexName()); if (oneToOneSrcTaskCount != numManagedTasks) { numManagedTasks = oneToOneSrcTaskCount; LOG.info("Update parallelism of vertex: " + getContext().getVertexName() + " to " + oneToOneSrcTaskCount + " to match source 1-1 vertices."); getContext().reconfigureVertex(oneToOneSrcTaskCount, null, null); Preconditions.checkState(numManagedTasks >=0, "Vertex: " + getContext().getVertexName()); taskIsStarted = new boolean[numManagedTasks]; getContext().doneReconfiguringVertex(); trySchedulingPendingCompletions();
@Override public void onVertexStarted(List<TaskAttemptIdentifier> completions) { managedTasks = getContext().getVertexNumTasks(getContext().getVertexName()); Map<String, EdgeProperty> edges = getContext().getInputVertexEdgeProperties(); for (Map.Entry<String, EdgeProperty> entry : edges.entrySet()) { String srcVertex = entry.getKey(); //track vertices with task count > 0 if (getContext().getVertexNumTasks(srcVertex) > 0) { LOG.info("Task count in " + srcVertex + ": " + getContext().getVertexNumTasks(srcVertex)); srcVertexConfigured.put(srcVertex, false); getContext().registerForVertexStateUpdates(srcVertex, EnumSet.of(VertexState.CONFIGURED)); } else { LOG.info("Vertex: " + getContext().getVertexName() + "; Ignoring " + srcVertex + " as it has got 0 tasks"); } } onVertexStartedDone.set(true); scheduleTasks(); }
@Override public void initialize() throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("initialize NoOpVertexManager"); } configurationDoneEvent = new VertexConfigurationDoneEvent(); configurationDoneEvent.fromProtoStream(new NonSyncByteArrayInputStream(getContext().getUserPayload().deepCopyAsArray())); String vertexName = getContext().getVertexName(); if (getContext().getVertexNumTasks(vertexName) == -1) { Preconditions.checkArgument(configurationDoneEvent.isSetParallelismCalled(), "SetParallelism must be called " + "when numTasks is -1"); setParallelismInInitializing = true; getContext().registerForVertexStateUpdates(vertexName, Sets.newHashSet(org.apache.tez.dag.api.event.VertexState.INITIALIZING)); } getContext().vertexReconfigurationPlanned(); }
public CartesianProductVertexManager(VertexManagerPluginContext context) { super(context); Preconditions.checkArgument(context.getVertexNumTasks(context.getVertexName()) == -1, "Vertex with CartesianProductVertexManager cannot use pre-defined parallelism"); }
@Override public void onSourceTaskCompleted(TaskAttemptIdentifier attempt) throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("onSourceTaskCompleted is invoked in NoOpVertexManager, vertex=" + getContext().getVertexName()); } }
Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) == -1, "Parallelism for the vertex should be set to -1 if the InputInitializer is setting parallelism" + ", VertexName: " + getContext().getVertexName()); Preconditions.checkState(configuredInputName == null, "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName: " + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName); configuredInputName = inputName; cEvent.getInputSpecUpdate() == null ? InputSpecUpdate .getDefaultSinglePhysicalInputSpecUpdate() : cEvent.getInputSpecUpdate()); getContext().reconfigureVertex(rootInputSpecUpdate, cEvent.getLocationHint(), cEvent.getNumTasks()); dataInformationEventSeen = true; Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) != 0); Preconditions.checkState( configuredInputName == null || configuredInputName.equals(inputName), "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName:" + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName); configuredInputName = inputName; getContext().addRootInputEvents(inputName, riEvents);
@Override public void onVertexStarted(List<TaskAttemptIdentifier> completions) throws Exception { // apply the ReconfigureDoneEvent and then schedule all the tasks. if (LOG.isDebugEnabled()) { LOG.debug("onVertexStarted is invoked in NoOpVertexManager, vertex=" + getContext().getVertexName()); } if (!setParallelismInInitializing && configurationDoneEvent.isSetParallelismCalled()) { reconfigureVertex(); } getContext().doneReconfiguringVertex(); int numTasks = getContext().getVertexNumTasks(getContext().getVertexName()); if (LOG.isDebugEnabled()) { LOG.debug("Schedule all the tasks, numTask=" + numTasks); } List<ScheduleTaskRequest> tasks = new ArrayList<ScheduleTaskRequest>(); for (int i=0;i<numTasks;++i) { tasks.add(ScheduleTaskRequest.create(i, null)); } getContext().scheduleTasks(tasks); }
@Override public void initialize() { // this will prevent vertex from starting until we notify we are done getContext().vertexReconfigurationPlanned(); Map<String, EdgeProperty> edges = getContext().getInputVertexEdgeProperties(); // wait for sources and self to start numConfiguredSources = 0; configured = new AtomicBoolean(false); started = new AtomicBoolean(false); for (String entry : edges.keySet()) { getContext().registerForVertexStateUpdates(entry, EnumSet.of(VertexState.CONFIGURED)); } }
@Override public synchronized void initialize() { try { conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); bytesPerTask = conf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER); pc = (PigContext)ObjectSerializer.deserialize(conf.get(PigImplConstants.PIG_CONTEXT)); tezPlan = (TezOperPlan)ObjectSerializer.deserialize(conf.get("pig.tez.plan")); TezEstimatedParallelismClearer clearer = new TezEstimatedParallelismClearer(tezPlan); try { clearer.visit(); } catch (VisitorException e) { throw new TezUncheckedException(e); } TezOperator op = tezPlan.getOperator(OperatorKey.fromString(getContext().getVertexName())); // Collect grandparents of the vertex Function<TezOperator, String> tezOpToString = new Function<TezOperator, String>() { @Override public String apply(TezOperator op) { return op.getOperatorKey().toString(); } }; grandParents = Lists.transform(TezOperPlan.getGrandParentsForGraceParallelism(tezPlan, op), tezOpToString); } catch (IOException e) { throw new TezUncheckedException(e); } // Register notification for grandparents for (String grandParent : grandParents) { getContext().registerForVertexStateUpdates(grandParent, EnumSet.of(VertexState.SUCCEEDED)); } super.initialize(); }
TezOperator op = tezPlan.getOperator(OperatorKey.fromString(getContext().getVertexName())); "set parallelism for " + getContext().getVertexName()); anyPredAboutToStart = true; break; if (finishedGrandParents.contains(predPredVertexName)) { long outputSize = getContext().getVertexStatistics(predPredVertexName).getOutputStatistics(pred.getOperatorKey().toString()).getDataSize(); int desiredNumReducers = (int)Math.ceil((double)outputSize/bytesPerTask); predPred.setEstimatedParallelism(desiredNumReducers); LOG.info(getContext().getVertexName() + ": Grandparent " + predPred.getOperatorKey().toString() + " finished with actual output " + outputSize + " (desired parallelism " + desiredNumReducers + ")"); for(Map.Entry<String,EdgeProperty> entry : getContext().getInputVertexEdgeProperties().entrySet()) { EdgeProperty edge = entry.getValue(); edge = EdgeProperty.create(DataMovementType.SCATTER_GATHER, edge.getDataSourceType(), edge.getSchedulingType(), edgeManagers.put(entry.getKey(), edge); getContext().reconfigureVertex(thisParallelism, null, edgeManagers); parallelismSet = true; LOG.info("Initialize parallelism for " + getContext().getVertexName() + " to " + thisParallelism);
@Override public void initialize() throws Exception { CartesianProductConfigProto config = CartesianProductConfigProto.parseFrom( ByteString.copyFrom(getContext().getUserPayload().getPayload())); Map<String, EdgeProperty> edgePropertyMap = getContext().getInputVertexEdgeProperties(); Set<String> sourceVerticesDAG = edgePropertyMap.keySet(); Set<String> sourceVerticesConfig = new HashSet<>(config.getSourcesList()); Map<String, List<String>> vertexGroups = getContext().getInputVertexGroups(); Map<String, String> vertexToGroup = new HashMap<>(); for (Map.Entry<String, List<String>> group : vertexGroups.entrySet()) {
@Override public synchronized void onVertexStateUpdated(VertexStateUpdate stateUpdate) throws Exception { numConfiguredSources++; int target = getContext().getInputVertexEdgeProperties().size(); LOG.info("For vertex: " + getContext().getVertexName() + " Received configured signal from: " + stateUpdate.getVertexName() + " numConfiguredSources: " + numConfiguredSources + " needed: " + target); Preconditions.checkState(numConfiguredSources <= target, "Vertex: " + getContext().getVertexName()); if (numConfiguredSources == target) { configure(); } }
for (Map.Entry<String, EdgeProperty> e : getContext().getInputVertexEdgeProperties().entrySet()) { if (e.getValue().getDataMovementType() == CUSTOM && e.getValue().getEdgeManagerDescriptor().getClassName() srcVerticesByName.put(e.getKey(), new SrcVertex()); srcVerticesByName.get(e.getKey()).name = e.getKey(); getContext().registerForVertexStateUpdates(e.getKey(), EnumSet.of(VertexState.CONFIGURED)); numCPSrcNotInConfigureState++; } else { getContext().registerForVertexStateUpdates(e.getKey(), EnumSet.of(VertexState.RUNNING)); numBroadcastSrcNotInRunningState++; Map<String, List<String>> srcGroups = getContext().getInputVertexGroups(); for (int i = 0; i < sourceList.size(); i++) { String srcName = sourceList.get(i); getContext().vertexReconfigurationPlanned();
private synchronized void trySchedulingTasks() { if (configured && started && !scheduled) { LOG.info("Scheduling " + dynamicParallelism + " tasks for vertex " + getContext().getVertexName()); List<TaskWithLocationHint> tasksToStart = Lists.newArrayListWithCapacity(dynamicParallelism); for (int i = 0; i < dynamicParallelism; ++i) { tasksToStart.add(new TaskWithLocationHint(new Integer(i), null)); } getContext().scheduleVertexTasks(tasksToStart); scheduled = true; } }
private void configure() { if(parallelismSet && (numSources == numConfiguredSources)) { if (LOG.isDebugEnabled()) { LOG.debug("Done reconfiguring vertex " + getContext().getVertexName()); } getContext().doneReconfiguringVertex(); configured = true; trySchedulingTasks(); } }
@Override public void initialize() { try { conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); } catch (IOException e) { throw new TezUncheckedException(e); } config = initConfiguration(); updatePendingTasks(); if (config.isAutoParallelismEnabled()) { getContext().vertexReconfigurationPlanned(); } // dont track the source tasks here since those tasks may themselves be // dynamically changed as the DAG progresses. }