org.apache.tez.mapreduce.input.MRInput java code examples

/**
 * Create an {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
 * @param conf Configuration for the {@link MRInputLegacy}
 * @param inputFormat InputFormat derived class
 * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
 */
public static MRInputConfigBuilder createConfigBuilder(Configuration conf, Class<?> inputFormat) {
 return MRInput.createConfigBuilder(conf, inputFormat).setInputClassName(MRInputLegacy.class.getName());
}

@Override
public void prepare()
 {
 LOG.info( "calling {}#start() on: {}", logicalInput.getClass().getSimpleName(), getSource() );
 logicalInput.start();
 Hadoop2TezFlowProcess tezFlowProcess = (Hadoop2TezFlowProcess) FlowProcessWrapper.undelegate( flowProcess );
 TezConfiguration configuration = tezFlowProcess.getConfiguration();
 try
  {
  reader = (MRReader) logicalInput.getReader();
  }
 catch( IOException exception )
  {
  throw new CascadeException( "unable to get reader", exception );
  }
 // set the cascading.source.path property for the current split
 // if a TezGroupedSplit, currently won't set
 TezUtil.setSourcePathForSplit( logicalInput, reader, configuration );
 }

@Override
public boolean next() throws IOException {
 getContext().notifyProgress();
 return false;
}

void processSplitEvent(InputDataInformationEvent event)
  throws IOException {
 rrLock.lock();
 try {
  initFromEventInternal(event);
  if (LOG.isDebugEnabled()) {
   LOG.debug(getContext().getSourceVertexName() + " notifying on RecordReader initialized");
  }
  rrInited.signal();
 } finally {
  rrLock.unlock();
 }
}

@Override
public List<Event> initialize() throws IOException {
 super.initialize();
 getContext().inputIsReady();
 this.splitInfoViaEvents = jobConf.getBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS,
   MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS_DEFAULT);
 LOG.info(getContext().getSourceVertexName() + " using newmapreduce API=" + useNewApi +
   ", split via event=" + splitInfoViaEvents + ", numPhysicalInputs=" +
   getNumPhysicalInputs());
 initializeInternal();
 return null;
}

@Override
public void handleEvents(List<Event> inputEvents) throws Exception {
 if (getNumPhysicalInputs() == 0) {
  throw new IllegalStateException(
    "Unexpected event. MRInput has been setup to receive 0 events");
 }
 if (eventReceived || inputEvents.size() != 1) {
  throw new IllegalStateException(
    "MRInput expects only a single input. Received: current eventListSize: "
      + inputEvents.size() + "Received previous input: "
      + eventReceived);
 }
 Event event = inputEvents.iterator().next();
 Preconditions.checkArgument(event instanceof InputDataInformationEvent,
   getClass().getSimpleName()
     + " can only handle a single event of type: "
     + InputDataInformationEvent.class.getSimpleName());
 processSplitEvent((InputDataInformationEvent) event);
}

private NewRecordReader(MRInput in) throws IOException {
 this.in = in;
 this.reader = in.getReader();
}

    "Only a single instance of record reader can be created for this input.");
readerCreated = true;
if (getNumPhysicalInputs() == 0) {
 return new KeyValueReader() {
  @Override
try {
 if (!mrReader.isSetup())
  checkAndAwaitRecordReaderInitialization();
} finally {
 rrLock.unlock();

@Override
public void start() {
 Preconditions.checkState(getNumPhysicalInputs() == 0 || getNumPhysicalInputs() == 1,
   "Expecting 0 or 1 physical input for MRInput");
}

@Override
public void run() throws Exception {
 Preconditions.checkArgument(getInputs().size() == 1);
 boolean inUnion = true;
 if (getContext().getTaskVertexName().equals("map3")) {
  inUnion = false;
 }
 Preconditions.checkArgument(getOutputs().size() == (inUnion ? 2 : 1));
 Preconditions.checkArgument(getOutputs().containsKey("checker"));
 MRInput input = (MRInput) getInputs().values().iterator().next();
 KeyValueReader kvReader = input.getReader();
 Output output =  getOutputs().get("checker");
 KeyValueWriter kvWriter = (KeyValueWriter) output.getWriter();
 MROutput parts = null;
 KeyValueWriter partsWriter = null;
 if (inUnion) {
  parts = (MROutput) getOutputs().get("parts");
  partsWriter = parts.getWriter();
 }
 while (kvReader.next()) {
  StringTokenizer itr = new StringTokenizer(kvReader.getCurrentValue().toString());
  while (itr.hasMoreTokens()) {
   word.set(itr.nextToken());
   kvWriter.write(word, one);
   if (inUnion) {
    partsWriter.write(word, one);
   }
  }
 }
}

/**
 * Create an {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder} for a FileInputFormat
 * @param conf Configuration for the {@link MRInputLegacy}
 * @param inputFormat FileInputFormat derived class
 * @param inputPaths Comma separated input paths
 * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder}
 */
public static MRInputConfigBuilder createConfigBuilder(Configuration conf, Class<?> inputFormat,
                            String inputPaths) {
 return MRInput.createConfigBuilder(conf, inputFormat, inputPaths).setInputClassName(
   MRInputLegacy.class.getName());
}

@Override
public void prepare()
 {
 LOG.info( "calling {}#start() on: {}", logicalInput.getClass().getSimpleName(), getSource() );
 logicalInput.start();
 Hadoop2TezFlowProcess tezFlowProcess = (Hadoop2TezFlowProcess) FlowProcessWrapper.undelegate( flowProcess );
 TezConfiguration configuration = tezFlowProcess.getConfiguration();
 try
  {
  reader = (MRReader) logicalInput.getReader();
  }
 catch( IOException exception )
  {
  throw new CascadeException( "unable to get reader", exception );
  }
 // set the cascading.source.path property for the current split
 // if a TezGroupedSplit, currently won't set
 TezUtil.setSourcePathForSplit( logicalInput, reader, configuration );
 }

reader = input.getReader();

void checkAndAwaitRecordReaderInitialization() throws IOException {
 assert rrLock.getHoldCount() == 1;
 rrLock.lock();
 try {
  if (LOG.isDebugEnabled()) {
   LOG.debug(getContext().getSourceVertexName() + " awaiting RecordReader initialization");
  }
  rrInited.await();
 } catch (Exception e) {
  throw new IOException(
    "Interrupted waiting for RecordReader initiailization");
 } finally {
  rrLock.unlock();
 }
}

inputConf.set("mapred.input.format.class", TextInputFormat.class.getName());
inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
MRInput.MRInputConfigBuilder configurer = MRInput.createConfigBuilder(inputConf, null);
DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();

@Override
public List<Event> close() throws IOException {
 mrReader.close();
 long inputRecords = getContext().getCounters()
   .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
 getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);
 return null;
}

 MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath1)
     .groupSplits(false).build());
Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v2.addDataSource(INPUT,
 MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2)
     .groupSplits(false).build());
Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v3.addDataSource(INPUT,
 MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath3)
  .groupSplits(false).build());
CartesianProductConfig cartesianProductConfig;

private void initFromEventInternal(InputDataInformationEvent initEvent) throws IOException {
 if (LOG.isDebugEnabled()) {
  LOG.debug(getContext().getSourceVertexName() + " initializing RecordReader from event");
   LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " +
     split.getClass().getName() + ", NewSplit: " + split + ", length: " + splitLength);
  splitLength = split.getLength();
  if (LOG.isDebugEnabled()) {
   LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " +
     split.getClass().getName() + ", OldSplit: " + split + ", length: " + splitLength);
  getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES)
    .increment(splitLength);
 LOG.info(getContext().getSourceVertexName() + " initialized RecordReader from event");

MRInput.MRInputConfigBuilder configBuilder = MRInput.createConfigBuilder( sourceConf, null );

 mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter,
   getContext().getApplicationId().getClusterTimestamp(), getContext()
     .getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext()
     .getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
} else {
 mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter, 
   getContext());
TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()];
TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(),
  thisTaskMetaInfo.getStartOffset());
if (useNewApi) {
 org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
   .getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
     .findCounter(TaskCounter.SPLIT_RAW_BYTES));
 try {
 mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(),
   inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(),
   getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
   getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
} else {
 org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
   .getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
     .findCounter(TaskCounter.SPLIT_RAW_BYTES));
 splitLength = oldInputSplit.getLength();
 mrReader =
   new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(),

Javadoc

MRInput is an Input which provides key/values pairs for the consumer. It is compatible with all standard Apache Hadoop MapReduce InputFormat implementations. This class is not meant to be extended by external projects.

Most used methods

createConfigBuilder
Create an org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder for org.apache.hadoop.mapreduc
getReader
Returns a KeyValueReader that can be used to read Map Reduce compatible key value data. An exception
getContext
start
checkAndAwaitRecordReaderInitialization
getNumPhysicalInputs
getProgress
initFromEventInternal
initializeInternal
processSplitEvent

Popular in Java

Creating JSON documents from java classes using gson
requestLocationUpdates (LocationManager)
getApplicationContext (Context)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
Top Sublime Text plugins

How to useMRInput in org.apache.tez.mapreduce.input

Best Java code snippets using org.apache.tez.mapreduce.input.MRInput (Showing top 20 results out of 315)

How to use
MRInput
in
org.apache.tez.mapreduce.input