private NewRecordReader(MRInput in) throws IOException { this.in = in; this.reader = in.getReader(); }
@Override public void prepare() { LOG.info( "calling {}#start() on: {}", logicalInput.getClass().getSimpleName(), getSource() ); logicalInput.start(); Hadoop2TezFlowProcess tezFlowProcess = (Hadoop2TezFlowProcess) FlowProcessWrapper.undelegate( flowProcess ); TezConfiguration configuration = tezFlowProcess.getConfiguration(); try { reader = (MRReader) logicalInput.getReader(); } catch( IOException exception ) { throw new CascadeException( "unable to get reader", exception ); } // set the cascading.source.path property for the current split // if a TezGroupedSplit, currently won't set TezUtil.setSourcePathForSplit( logicalInput, reader, configuration ); }
@Override public void prepare() { LOG.info( "calling {}#start() on: {}", logicalInput.getClass().getSimpleName(), getSource() ); logicalInput.start(); Hadoop2TezFlowProcess tezFlowProcess = (Hadoop2TezFlowProcess) FlowProcessWrapper.undelegate( flowProcess ); TezConfiguration configuration = tezFlowProcess.getConfiguration(); try { reader = (MRReader) logicalInput.getReader(); } catch( IOException exception ) { throw new CascadeException( "unable to get reader", exception ); } // set the cascading.source.path property for the current split // if a TezGroupedSplit, currently won't set TezUtil.setSourcePathForSplit( logicalInput, reader, configuration ); }
@Override public void run() throws Exception { Preconditions.checkArgument(getInputs().size() == 1); boolean inUnion = true; if (getContext().getTaskVertexName().equals("map3")) { inUnion = false; } Preconditions.checkArgument(getOutputs().size() == (inUnion ? 2 : 1)); Preconditions.checkArgument(getOutputs().containsKey("checker")); MRInput input = (MRInput) getInputs().values().iterator().next(); KeyValueReader kvReader = input.getReader(); Output output = getOutputs().get("checker"); KeyValueWriter kvWriter = (KeyValueWriter) output.getWriter(); MROutput parts = null; KeyValueWriter partsWriter = null; if (inUnion) { parts = (MROutput) getOutputs().get("parts"); partsWriter = parts.getWriter(); } while (kvReader.next()) { StringTokenizer itr = new StringTokenizer(kvReader.getCurrentValue().toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); kvWriter.write(word, one); if (inUnion) { partsWriter.write(word, one); } } } }
reader = input.getReader();