@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); } JobConf jobConf = new JobConf(new Configuration()); for (String key : workUnitState.getPropertyNames()) { jobConf.set(key, workUnitState.getProp(key)); } String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf); RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL); boolean readKeys = workUnitState.getPropAsBoolean( HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY, HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); }
@SuppressWarnings("unchecked") private RecordReader<Object, Object> createRecordReader(int idx, Path inputPath, Reporter reporter) throws IOException { LOG.info(format("Opening '%s'", inputPath)); Class<? extends FileInputFormat<?, ?>> cls = (Class<? extends FileInputFormat<?, ?>>) inFormatClsList.get(idx); try { FileInputFormat.setInputPaths(job, inputPath); FileInputFormat<?, ?> instance = cls.newInstance(); if (instance instanceof JobConfigurable) { ((JobConfigurable) instance).configure(job); } InputSplit[] splits = instance.getSplits(job, 1); if (1 != splits.length) { throw new IllegalArgumentException("Could not get input splits: " + inputPath); } return (RecordReader<Object, Object>) instance.getRecordReader(splits[0], job, reporter); } catch (RuntimeException e) { throw e; } catch (IOException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } }
goToNextLeft = alldone = false; this.rightRR = this.rightFIF.getRecordReader(this.rightIS, this.rightConf, this.rightReporter);
public CartesianRecordReader(CompositeInputSplit split, JobConf conf, Reporter reporter) throws Exception { this.rightConf = conf; this.rightIS = split.get(1); this.rightReporter = reporter; // Create left record reader FileInputFormat leftFIF = (FileInputFormat) ReflectionUtils.newInstance( Class.forName(conf.get(CartesianInputFormat.LEFT_INPUT_FORMAT)), conf); leftRR = leftFIF.getRecordReader(split.get(0), conf, reporter);// Create // right // record // reader rightFIF = (FileInputFormat) ReflectionUtils.newInstance( Class.forName(conf.get(CartesianInputFormat.RIGHT_INPUT_FORMAT)), conf); rightRR = rightFIF.getRecordReader(rightIS, rightConf, rightReporter); // Create key value pairs for parsing lkey = (K1) this.leftRR.createKey(); lvalue = (V1) this.leftRR.createValue(); rkey = (K2) this.rightRR.createKey(); rvalue = (V2) this.rightRR.createValue(); }
public CombineFileRecordReaderWrapper( CombineFileSplit split, Configuration conf, Reporter reporter, Integer idx ) throws Exception { FileSplit fileSplit = new FileSplit( split.getPath( idx ), split.getOffset( idx ), split.getLength( idx ), split.getLocations() ); Class<?> clz = conf.getClass( INDIVIDUAL_INPUT_FORMAT, null ); FileInputFormat<K, V> inputFormat = (FileInputFormat<K, V>) clz.newInstance(); if( inputFormat instanceof Configurable ) ( (Configurable) inputFormat ).setConf( conf ); delegate = inputFormat.getRecordReader( fileSplit, (JobConf) conf, reporter ); }
public CombineFileRecordReaderWrapper( CombineFileSplit split, Configuration conf, Reporter reporter, Integer idx ) throws Exception { FileSplit fileSplit = new FileSplit( split.getPath( idx ), split.getOffset( idx ), split.getLength( idx ), split.getLocations() ); Class<?> clz = conf.getClass( INDIVIDUAL_INPUT_FORMAT, null ); FileInputFormat<K, V> inputFormat = (FileInputFormat<K, V>) clz.newInstance(); if( inputFormat instanceof Configurable ) ( (Configurable) inputFormat ).setConf( conf ); delegate = inputFormat.getRecordReader( fileSplit, (JobConf) conf, reporter ); }
@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); } JobConf jobConf = new JobConf(new Configuration()); for (String key : workUnitState.getPropertyNames()) { jobConf.set(key, workUnitState.getProp(key)); } String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf); RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL); boolean readKeys = workUnitState.getPropAsBoolean( HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY, HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); }
protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat, CombineFileSplit split, Configuration conf, Reporter reporter, Integer idx) throws IOException { FileSplit fileSplit = new FileSplit(split.getPath(idx), split.getOffset(idx), split.getLength(idx), split.getLocations()); delegate = inputFormat.getRecordReader(fileSplit, (JobConf)conf, reporter); }
protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat, CombineFileSplit split, Configuration conf, Reporter reporter, Integer idx) throws IOException { FileSplit fileSplit = new FileSplit(split.getPath(idx), split.getOffset(idx), split.getLength(idx), split.getLocations()); delegate = inputFormat.getRecordReader(fileSplit, (JobConf)conf, reporter); }
protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat, CombineFileSplit split, Configuration conf, Reporter reporter, Integer idx) throws IOException { FileSplit fileSplit = new FileSplit(split.getPath(idx), split.getOffset(idx), split.getLength(idx), split.getLocations()); delegate = inputFormat.getRecordReader(fileSplit, (JobConf)conf, reporter); }
protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat, CombineFileSplit split, Configuration conf, Reporter reporter, Integer idx) throws IOException { FileSplit fileSplit = new FileSplit(split.getPath(idx), split.getOffset(idx), split.getLength(idx), split.getLocations()); delegate = inputFormat.getRecordReader(fileSplit, (JobConf)conf, reporter); }