@Override public void handleInputInitializerEvent(List<InputInitializerEvent> events) throws Exception { for (InputInitializerEvent e : events) { pruner.addEvent(e); } }
@Override public void onVertexStateUpdated(VertexStateUpdate stateUpdate) { pruner.processVertex(stateUpdate.getVertexName()); }
eval.initialize(soi); applyFilterToPartitions(converter, eval, columnName, values);
public void prune() throws SerDeException, IOException, InterruptedException, HiveException { synchronized(sourcesWaitingForEvents) { if (sourcesWaitingForEvents.isEmpty()) { return; } Set<VertexState> states = Collections.singleton(VertexState.SUCCEEDED); for (String source : sourcesWaitingForEvents) { // we need to get state transition updates for the vertices that will send // events to us. once we have received all events and a vertex has succeeded, // we can move to do the pruning. context.registerForVertexStateUpdates(source, states); } } LOG.info("Waiting for events (" + sourceInfoCount + " sources) ..."); // synchronous event processing loop. Won't return until all events have // been processed. this.processEvents(); this.prunePartitions(); LOG.info("Ok to proceed."); }
private void initialize() throws SerDeException { this.clear(); Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>(); String columnType = typit.next(); ExprNodeDesc partKeyExpr = pit.next(); SourceInfo si = createSourceInfo(t, partKeyExpr, columnName, columnType, jobConf); if (!sourceInfoMap.containsKey(s)) { sourceInfoMap.put(s, new ArrayList<SourceInfo>());
pruner.addEvent(eventV1); pruner.addEvent(eventV1); pruner.processVertex("v1"); pruner.addEvent(eventV1); pruner.addEvent(eventV1); pruner.addEvent(eventV2); pruner.processVertex("v2"); pruner.addEvent(eventV2); pruner.addEvent(eventV2);
@Override public void run() { try { lock.lock(); try { while (!started.get()) { startCondition.await(); } } finally { lock.unlock(); } pruner.prune(); lock.lock(); try { ended.set(true); endCondition.signal(); } finally { lock.unlock(); } } catch (SerDeException | IOException | InterruptedException | HiveException e) { inError.set(true); } } }
public void processVertex(String name) { LOG.info("Vertex succeeded: " + name); synchronized(sourcesWaitingForEvents) { // Get a deterministic count of number of tasks for the vertex. MutableInt prevVal = numExpectedEventsPerSource.get(name); int prevValInt = prevVal.intValue(); Preconditions.checkState(prevValInt < 0, "Invalid value for numExpectedEvents for source: " + name + ", oldVal=" + prevValInt); prevVal.setValue((-1) * prevValInt * context.getVertexNumTasks(name)); checkForSourceCompletion(name); } }
public DynamicPartitionPruner(InputInitializerContext context, MapWork work, JobConf jobConf) throws SerDeException { this.context = context; this.work = work; this.jobConf = jobConf; synchronized (this) { initialize(); } }
public HiveSplitGenerator(InputInitializerContext initializerContext) throws IOException, SerDeException { super(initializerContext); Preconditions.checkNotNull(initializerContext); userPayloadProto = MRInputHelpers.parseMRInputPayload(initializerContext.getInputUserPayload()); this.conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes()); this.jobConf = new JobConf(conf); // Read all credentials into the credentials instance stored in JobConf. ShimLoader.getHadoopShims().getMergedCredentials(jobConf); this.work = Utilities.getMapWork(jobConf); this.splitLocationProvider = Utils.getSplitLocationProvider(conf, work.getCacheAffinity(), LOG); LOG.info("SplitLocationProvider: " + splitLocationProvider); // Events can start coming in the moment the InputInitializer is created. The pruner // must be setup and initialized here so that it sets up it's structures to start accepting events. // Setting it up in initialize leads to a window where events may come in before the pruner is // initialized, which may cause it to drop events. pruner = new DynamicPartitionPruner(initializerContext, work, jobConf); }
@Test(timeout = 5000) public void testSingleSourceMultipleFiltersOrdering1() throws InterruptedException, SerDeException { InputInitializerContext mockInitContext = mock(InputInitializerContext.class); doReturn(2).when(mockInitContext).getVertexNumTasks("v1"); MapWork mapWork = createMockMapWork(new TestSource("v1", 2)); DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork); PruneRunnable pruneRunnable = new PruneRunnable(pruner); Thread t = new Thread(pruneRunnable); t.start(); try { pruneRunnable.start(); InputInitializerEvent event = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0)); event.setSourceVertexName("v1"); pruner.addEvent(event); pruner.addEvent(event); pruner.addEvent(event); pruner.addEvent(event); pruner.processVertex("v1"); pruneRunnable.awaitEnd(); assertFalse(pruneRunnable.inError.get()); } finally { t.interrupt(); t.join(); } }
public void prune() throws SerDeException, IOException, InterruptedException, HiveException { synchronized(sourcesWaitingForEvents) { if (sourcesWaitingForEvents.isEmpty()) { return; } Set<VertexState> states = Collections.singleton(VertexState.SUCCEEDED); for (String source : sourcesWaitingForEvents) { // we need to get state transition updates for the vertices that will send // events to us. once we have received all events and a vertex has succeeded, // we can move to do the pruning. context.registerForVertexStateUpdates(source, states); } } LOG.info("Waiting for events (" + sourceInfoCount + " sources) ..."); // synchronous event processing loop. Won't return until all events have // been processed. this.processEvents(); this.prunePartitions(); LOG.info("Ok to proceed."); }
private void initialize() throws SerDeException { this.clear(); Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>(); String columnType = typit.next(); ExprNodeDesc partKeyExpr = pit.next(); SourceInfo si = createSourceInfo(t, partKeyExpr, columnName, columnType, jobConf); if (!sourceInfoMap.containsKey(s)) { sourceInfoMap.put(s, new ArrayList<SourceInfo>());
pruner.prune();
public void addEvent(InputInitializerEvent event) { synchronized(sourcesWaitingForEvents) { if (sourcesWaitingForEvents.contains(event.getSourceVertexName())) { ++totalEventCount; numEventsSeenPerSource.get(event.getSourceVertexName()).increment(); if(!queue.offer(event)) { throw new IllegalStateException("Queue full"); } checkForSourceCompletion(event.getSourceVertexName()); } } }
public DynamicPartitionPruner(InputInitializerContext context, MapWork work, JobConf jobConf) throws SerDeException { this.context = context; this.work = work; this.jobConf = jobConf; synchronized (this) { initialize(); } }
public HiveSplitGenerator(InputInitializerContext initializerContext) throws IOException, SerDeException { super(initializerContext); Preconditions.checkNotNull(initializerContext); userPayloadProto = MRInputHelpers.parseMRInputPayload(initializerContext.getInputUserPayload()); this.conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes()); this.jobConf = new JobConf(conf); this.splitLocationProvider = Utils.getSplitLocationProvider(conf, LOG); LOG.info("SplitLocationProvider: " + splitLocationProvider); // Read all credentials into the credentials instance stored in JobConf. ShimLoader.getHadoopShims().getMergedCredentials(jobConf); this.work = Utilities.getMapWork(jobConf); // Events can start coming in the moment the InputInitializer is created. The pruner // must be setup and initialized here so that it sets up it's structures to start accepting events. // Setting it up in initialize leads to a window where events may come in before the pruner is // initialized, which may cause it to drop events. pruner = new DynamicPartitionPruner(initializerContext, work, jobConf); }
@Test(timeout = 5000) public void testSingleSourceMultipleFiltersOrdering2() throws InterruptedException, SerDeException { InputInitializerContext mockInitContext = mock(InputInitializerContext.class); doReturn(2).when(mockInitContext).getVertexNumTasks("v1"); MapWork mapWork = createMockMapWork(new TestSource("v1", 2)); DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork); PruneRunnable pruneRunnable = new PruneRunnable(pruner); Thread t = new Thread(pruneRunnable); t.start(); try { pruneRunnable.start(); InputInitializerEvent event = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0)); event.setSourceVertexName("v1"); pruner.processVertex("v1"); pruner.addEvent(event); pruner.addEvent(event); pruner.addEvent(event); pruner.addEvent(event); pruneRunnable.awaitEnd(); assertFalse(pruneRunnable.inError.get()); } finally { t.interrupt(); t.join(); } }
public void prune() throws SerDeException, IOException, InterruptedException, HiveException { synchronized(sourcesWaitingForEvents) { if (sourcesWaitingForEvents.isEmpty()) { return; } Set<VertexState> states = Collections.singleton(VertexState.SUCCEEDED); for (String source : sourcesWaitingForEvents) { // we need to get state transition updates for the vertices that will send // events to us. once we have received all events and a vertex has succeeded, // we can move to do the pruning. context.registerForVertexStateUpdates(source, states); } } LOG.info("Waiting for events (" + sourceInfoCount + " sources) ..."); // synchronous event processing loop. Won't return until all events have // been processed. this.processEvents(); this.prunePartitions(); LOG.info("Ok to proceed."); }
private void initialize() throws SerDeException { this.clear(); Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>(); String columnName = cit.next(); ExprNodeDesc partKeyExpr = pit.next(); SourceInfo si = createSourceInfo(t, partKeyExpr, columnName, jobConf); if (!sourceInfoMap.containsKey(s)) { sourceInfoMap.put(s, new ArrayList<SourceInfo>());