LOG.info("Going through each work and adding MultiMRInput"); mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
private void initializeMapRecordSources() throws Exception { int size = mergeMapOpList.size() + 1; // the +1 is for the main map operator itself sources = new MapRecordSource[size]; position = mapOp.getConf().getTag(); sources[position] = new MapRecordSource(); KeyValueReader reader = null; if (mainWorkMultiMRInput != null) { reader = getKeyValueReader(mainWorkMultiMRInput.getKeyValueReaders(), mapOp); } else { reader = legacyMRInput.getReader(); } sources[position].init(jconf, mapOp, reader); for (AbstractMapOperator mapOp : mergeMapOpList) { int tag = mapOp.getConf().getTag(); sources[tag] = new MapRecordSource(); String inputName = mapOp.getConf().getName(); MultiMRInput multiMRInput = multiMRInputMap.get(inputName); Collection<KeyValueReader> kvReaders = multiMRInput.getKeyValueReaders(); l4j.debug("There are " + kvReaders.size() + " key-value readers for input " + inputName); if (kvReaders.size() > 0) { reader = getKeyValueReader(kvReaders, mapOp); sources[tag].init(jconf, mapOp, reader); } } ((TezContext) MapredContext.get()).setRecordSources(sources); }
@Override public void handleEvents(List<Event> inputEvents) throws Exception { lock.lock(); try { if (getNumPhysicalInputs() == 0) { throw new IllegalStateException( "Unexpected event. MultiMRInput has been setup to receive 0 events"); } Preconditions.checkState(eventCount.get() + inputEvents.size() <= getNumPhysicalInputs(), "Unexpected event. All physical sources already initialized"); for (Event event : inputEvents) { MRReader reader = initFromEvent((InputDataInformationEvent) event); readers.add(reader); if (eventCount.incrementAndGet() == getNumPhysicalInputs()) { getContext().inputIsReady(); condition.signal(); } } } finally { lock.unlock(); } }
@Override public List<Event> initialize() throws IOException { super.initialize(); LOG.info(getContext().getSourceVertexName() + " using newmapreduce API=" + useNewApi + ", numPhysicalInputs=" + getNumPhysicalInputs()); if (getNumPhysicalInputs() == 0) { getContext().inputIsReady(); } return null; }
private MRReader initFromEvent(InputDataInformationEvent event) throws IOException { Preconditions.checkState(event != null, "Event must be specified"); if (LOG.isDebugEnabled()) { LOG.debug(getContext().getSourceVertexName() + " initializing Reader: " + eventCount.get()); getContext().getCounters(), inputRecordCounter, getContext().getApplicationId() .getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext() .getApplicationId().getId(), getContext().getTaskIndex(), getContext() .getTaskAttemptNumber(), getContext()); if (LOG.isDebugEnabled()) { LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + split.getClass().getName() + ", NewSplit: " + split + ", length: " + splitLength); splitLength = split.getLength(); reader = new MRReaderMapred(localJobConf, split, getContext().getCounters(), inputRecordCounter, getContext()); if (LOG.isDebugEnabled()) { LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + split.getClass().getName() + ", OldSplit: " + split + ", length: " + splitLength); getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES) .increment(splitLength); LOG.info(getContext().getSourceVertexName() + " initialized RecordReader from event"); return reader;
@Override public void start() throws Exception { Preconditions.checkState(getNumPhysicalInputs() >= 0, "Expecting zero or more physical inputs"); } }
@Override public List<Event> close() throws Exception { for (MRReader reader : readers) { reader.close(); } long inputRecords = getContext().getCounters() .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue(); getContext().getStatisticsReporter().reportItemsProcessed(inputRecords); return null; }
public Collection<KeyValueReader> getKeyValueReaders() throws InterruptedException, IOException { lock.lock(); try { while (eventCount.get() != getNumPhysicalInputs()) { condition.await(); } } finally { lock.unlock(); } return Collections .unmodifiableCollection(Lists.transform(readers, new Function<MRReader, KeyValueReader>() { @Override public KeyValueReader apply(MRReader input) { return input; } })); }
LOG.info("Going through each work and adding MultiMRInput"); mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
private void initializeMapRecordSources() throws Exception { int size = mergeMapOpList.size() + 1; // the +1 is for the main map operator itself sources = new MapRecordSource[size]; position = mapOp.getConf().getTag(); sources[position] = new MapRecordSource(); KeyValueReader reader = null; if (mainWorkMultiMRInput != null) { reader = getKeyValueReader(mainWorkMultiMRInput.getKeyValueReaders(), mapOp); } else { reader = legacyMRInput.getReader(); } sources[position].init(jconf, mapOp, reader); for (AbstractMapOperator mapOp : mergeMapOpList) { int tag = mapOp.getConf().getTag(); sources[tag] = new MapRecordSource(); String inputName = mapOp.getConf().getName(); MultiMRInput multiMRInput = multiMRInputMap.get(inputName); Collection<KeyValueReader> kvReaders = multiMRInput.getKeyValueReaders(); l4j.debug("There are " + kvReaders.size() + " key-value readers for input " + inputName); if (kvReaders.size() > 0) { reader = getKeyValueReader(kvReaders, mapOp); sources[tag].init(jconf, mapOp, reader); } } ((TezContext) MapredContext.get()).setRecordSources(sources); }
MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build(); } else { dataSource =
skipRead = true; } else { Collection<KeyValueReader> keyValueReaders = multiMRInput.getKeyValueReaders(); if ((keyValueReaders == null) || (keyValueReaders.isEmpty())) { l4j.info("Key value readers are null or empty and hence skipping read. "
MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build(); } else { dataSource =
skipRead = true; } else { Collection<KeyValueReader> keyValueReaders = multiMRInput.getKeyValueReaders(); if ((keyValueReaders == null) || (keyValueReaders.isEmpty())) { l4j.info("Key value readers are null or empty and hence skipping read. "
MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build(); } else { dataSource =
private void initializeMapRecordSources() throws Exception { int size = mergeMapOpList.size() + 1; // the +1 is for the main map operator itself sources = new MapRecordSource[size]; position = mapOp.getConf().getTag(); sources[position] = new MapRecordSource(); KeyValueReader reader = null; if (mainWorkMultiMRInput != null) { reader = getKeyValueReader(mainWorkMultiMRInput.getKeyValueReaders(), mapOp); } else { reader = legacyMRInput.getReader(); } sources[position].init(jconf, mapOp, reader); for (MapOperator mapOp : mergeMapOpList) { int tag = mapOp.getConf().getTag(); sources[tag] = new MapRecordSource(); String inputName = mapOp.getConf().getName(); MultiMRInput multiMRInput = multiMRInputMap.get(inputName); Collection<KeyValueReader> kvReaders = multiMRInput.getKeyValueReaders(); l4j.debug("There are " + kvReaders.size() + " key-value readers for input " + inputName); reader = getKeyValueReader(kvReaders, mapOp); sources[tag].init(jconf, mapOp, reader); } ((TezContext) MapredContext.get()).setRecordSources(sources); }
LOG.info("Going through each work and adding MultiMRInput"); mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());