org.apache.hadoop.mapreduce.lib.input.LineRecordReader java code examples

Refine search

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
 boolean next = lineRecordReader.nextKeyValue();
 if (next) {
  String line = lineRecordReader.getCurrentValue().toString();
  try {
   JSONObject json = (JSONObject) parser.parse(line);
   String author = (String) json.get("author");
   String link = (String) json.get("link");
  } catch (ParseException e) {
   if (skipBadLines) {
    System.err.println("Bad line at offset: " +
     lineRecordReader.getCurrentKey().get() +
     ":\n" + e.getMessage());
    badLineCount++;
   } else {
    throw new IOException(e);
   }
  }
 }
 return next;
}

  @Override
  public synchronized void close() throws IOException {
    this.lineRecordReader.close();
  }
}

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
  this.lineRecordReader.initialize(genericSplit, context);
  this.hasEdges = context.getConfiguration().getBoolean(Constants.GREMLIN_HADOOP_GRAPH_READER_HAS_EDGES, true);
  this.graphsonReader = GraphSONReader.build().mapper(
      GraphSONMapper.build().
          version(GraphSONVersion.valueOf(context.getConfiguration().get(Constants.GREMLIN_HADOOP_GRAPHSON_VERSION, "V3_0"))).
          typeInfo(TypeInfo.PARTIAL_TYPES).
          addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(context.getConfiguration()))).create()).create();
}

@Override
public void initialize(final InputSplit inputSplit,
             final TaskAttemptContext taskAttemptContext) 
  throws IOException, InterruptedException {
  this.lrr = new LineRecordReader();
  this.lrr.initialize(inputSplit, taskAttemptContext);
}

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
 throws IOException, InterruptedException {
 lineRecordReader = new LineRecordReader();
 lineRecordReader.initialize(inputSplit, taskAttemptContext);
 currentKey = new ImmutableBytesWritable();
 parser = new JSONParser();
 skipBadLines = taskAttemptContext.getConfiguration().getBoolean(
  SKIP_LINES_CONF_KEY, true);
}

  linerecord.close();
public LongWritable createKey() {
  return linerecord.createKey();
public Text createValue() {
  return linerecord.createValue();
public long getPos() throws IOException {
  return linerecord.getPos();
public float getProgress() throws IOException {
  return linerecord.getProgress();
  return linerecord.next(key, value);

public ArrayList<String> readRecords(URL testFileUrl, int splitSize)
  throws IOException {
 // Set up context
 File testFile = new File(testFileUrl.getFile());
 long testFileSize = testFile.length();
 Path testFilePath = new Path(testFile.getAbsolutePath());
 Configuration conf = new Configuration();
 conf.setInt("io.file.buffer.size", 1);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
 // Gather the records returned by the record reader
 ArrayList<String> records = new ArrayList<String>();
 long offset = 0;
 while (offset < testFileSize) {
  FileSplit split = new FileSplit(testFilePath, offset, splitSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);
  while (reader.nextKeyValue()) {
   records.add(reader.getCurrentValue().toString());
  }
  offset += splitSize;
 }
 return records;
}

public KeyValueLineRecordReader(Configuration conf)
 throws IOException {
 
 lineRecordReader = new LineRecordReader();
 String sepStr = conf.get(KEY_VALUE_SEPERATOR, "\t");
 this.separator = (byte) sepStr.charAt(0);
}

@Test
public void testMultipleClose() throws IOException {
 URL testFileUrl = getClass().getClassLoader().
   getResource("recordSpanningMultipleSplits.txt.bz2");
 assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
   testFileUrl);
 File testFile = new File(testFileUrl.getFile());
 Path testFilePath = new Path(testFile.getAbsolutePath());
 long testFileSize = testFile.length();
 Configuration conf = new Configuration();
 conf.setInt(org.apache.hadoop.mapreduce.lib.input.
   LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
 // read the data and check whether BOM is skipped
 FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
 LineRecordReader reader = new LineRecordReader();
 reader.initialize(split, context);
 //noinspection StatementWithEmptyBody
 while (reader.nextKeyValue()) ;
 reader.close();
 reader.close();
 BZip2Codec codec = new BZip2Codec();
 codec.setConf(conf);
 Set<Decompressor> decompressors = new HashSet<Decompressor>();
 for (int i = 0; i < 10; ++i) {
  decompressors.add(CodecPool.getDecompressor(codec));
 }
 assertEquals(10, decompressors.size());
}

LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
int numRecordsNoSplits = 0;
while (reader.nextKeyValue()) {
 ++numRecordsNoSplits;
reader.close();
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
int numRecordsFirstSplit = 0;
while (reader.nextKeyValue()) {
 ++numRecordsFirstSplit;
reader.close();
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
int numRecordsRemainingSplits = 0;
while (reader.nextKeyValue()) {
 ++numRecordsRemainingSplits;
reader.close();
assertEquals("Unexpected number of records in split ", numRecordsNoSplits,
  numRecordsFirstSplit + numRecordsRemainingSplits);

public ScriptRecordReader(final VertexQueryFilter vertexQuery, final TaskAttemptContext context) throws IOException {
  this.lineRecordReader = new LineRecordReader();
  this.vertex = new FaunusVertex();
  this.vertexQuery = vertexQuery;
  this.pathEnabled = context.getConfiguration().getBoolean(FaunusCompiler.PATH_ENABLED, false);
  final FileSystem fs = FileSystem.get(context.getConfiguration());
  try {
    this.engine.eval(new InputStreamReader(fs.open(new Path(context.getConfiguration().get(ScriptInputFormat.FAUNUS_GRAPH_INPUT_SCRIPT_FILE)))));
  } catch (Exception e) {
    throw new IOException(e.getMessage());
  }
}

@Override
public AvroWrapper<Utf8> getCurrentKey() throws IOException,
  InterruptedException {
 Text txt = lineRecordReader.getCurrentValue();
 currentKey.datum(new Utf8(txt.toString()));
 return currentKey;
}

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
  this.lineRecordReader.initialize(genericSplit, context);
  final Configuration configuration = context.getConfiguration();
  if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
    this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
  this.engine = manager.getEngineByName(configuration.get(SCRIPT_ENGINE, "gremlin-groovy"));
  final FileSystem fs = FileSystem.get(configuration);
  try (final InputStream stream = fs.open(new Path(configuration.get(SCRIPT_FILE)));
     final InputStreamReader reader = new InputStreamReader(stream)) {
    final String parse = String.join("\n", IOUtils.toString(reader), READ_CALL);
    script = ((Compilable) engine).compile(parse);
  } catch (ScriptException e) {
    throw new IOException(e.getMessage());
  }
}

@Override
public boolean nextKeyValue() throws IOException {
  while (true) {
    if (!this.lineRecordReader.nextKeyValue()) return false;
    try {
      final Bindings bindings = this.engine.createBindings();
      final StarGraph graph = StarGraph.open();
      bindings.put(GRAPH, graph);
      bindings.put(LINE, this.lineRecordReader.getCurrentValue().toString());
      final StarGraph.StarVertex sv = (StarGraph.StarVertex) script.eval(bindings);
      if (sv != null) {
        final Optional<StarGraph.StarVertex> vertex = sv.applyGraphFilter(this.graphFilter);
        if (vertex.isPresent()) {
          this.vertexWritable.set(vertex.get());
          return true;
        }
      }
    } catch (final ScriptException e) {
      throw new IOException(e.getMessage());
    }
  }
}

public synchronized boolean nextKeyValue() throws IOException {
  if (this.lineRecordReader.nextKeyValue()) {
    this.key = new Text(fileName);
    this.value = this.lineRecordReader.getCurrentValue();
    return true;
  }
  return false;
}

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
  this.lineRecordReader.initialize(genericSplit, context);
  this.pathEnabled = context.getConfiguration().getBoolean(FaunusCompiler.PATH_ENABLED, false);
}

@Override public boolean nextKeyValue() throws IOException {
 if (reader.nextKeyValue()) {
  outValue.set(reader.getCurrentValue());
  outKey.set(reader.getCurrentKey().get());
  return true;
 } else {
  return false;
 }
}

@Override
public boolean nextKeyValue() throws IOException {
  if (!this.lineRecordReader.nextKeyValue())
    return false;
  try (InputStream in = new ByteArrayInputStream(this.lineRecordReader.getCurrentValue().getBytes())) {
    this.vertexWritable.set(this.hasEdges ?
        this.graphsonReader.readVertex(in, Attachable::get, Attachable::get, Direction.BOTH) :
        this.graphsonReader.readVertex(in, Attachable::get));
    return true;
  }
}

  delegate = new LineRecordReader();
  delegate.close();
public float getProgress() {
  try {
    return delegate.getProgress();
  CombineFileSplit csplit=(CombineFileSplit)split;
  FileSplit fileSplit = new FileSplit(csplit.getPath(idx), csplit.getOffset(idx), csplit.getLength(idx), csplit.getLocations());
  delegate.initialize(fileSplit, taskcontext);
public LongWritable getCurrentKey() throws IOException,
    InterruptedException {
  return delegate.getCurrentKey();
  return delegate.getCurrentValue();
  return delegate.nextKeyValue();

@Override
public float getProgress() 
  throws IOException, InterruptedException {
  return this.lrr.getProgress();
}

Javadoc

Treats keys as offset in file and value as line.

Most used methods

getCurrentValue
nextKeyValue
close
initialize
<init>
getProgress
Get the progress within the split
getCurrentKey
getFilePosition
maxBytesToConsume
skipUtfByteOrderMark
createKey
createValue

Popular in Java

Reactive rest calls using spring rest template
requestLocationUpdates (LocationManager)
onCreateOptionsMenu (Activity)
getSystemService (Context)
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
Best IntelliJ plugins

How to useLineRecordReader in org.apache.hadoop.mapreduce.lib.input

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.LineRecordReader (Showing top 20 results out of 315)

Refine search

How to use
LineRecordReader
in
org.apache.hadoop.mapreduce.lib.input