Refine search
@Override public boolean nextKeyValue() throws IOException, InterruptedException { boolean next = lineRecordReader.nextKeyValue(); if (next) { String line = lineRecordReader.getCurrentValue().toString(); try { JSONObject json = (JSONObject) parser.parse(line); String author = (String) json.get("author"); String link = (String) json.get("link"); } catch (ParseException e) { if (skipBadLines) { System.err.println("Bad line at offset: " + lineRecordReader.getCurrentKey().get() + ":\n" + e.getMessage()); badLineCount++; } else { throw new IOException(e); } } } return next; }
@Override public synchronized void close() throws IOException { this.lineRecordReader.close(); } }
@Override public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException { this.lineRecordReader.initialize(genericSplit, context); this.hasEdges = context.getConfiguration().getBoolean(Constants.GREMLIN_HADOOP_GRAPH_READER_HAS_EDGES, true); this.graphsonReader = GraphSONReader.build().mapper( GraphSONMapper.build(). version(GraphSONVersion.valueOf(context.getConfiguration().get(Constants.GREMLIN_HADOOP_GRAPHSON_VERSION, "V3_0"))). typeInfo(TypeInfo.PARTIAL_TYPES). addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(context.getConfiguration()))).create()).create(); }
@Override public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { this.lrr = new LineRecordReader(); this.lrr.initialize(inputSplit, taskAttemptContext); }
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { lineRecordReader = new LineRecordReader(); lineRecordReader.initialize(inputSplit, taskAttemptContext); currentKey = new ImmutableBytesWritable(); parser = new JSONParser(); skipBadLines = taskAttemptContext.getConfiguration().getBoolean( SKIP_LINES_CONF_KEY, true); }
linerecord.close(); public LongWritable createKey() { return linerecord.createKey(); public Text createValue() { return linerecord.createValue(); public long getPos() throws IOException { return linerecord.getPos(); public float getProgress() throws IOException { return linerecord.getProgress(); return linerecord.next(key, value);
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); while (reader.nextKeyValue()) { records.add(reader.getCurrentValue().toString()); } offset += splitSize; } return records; }
public KeyValueLineRecordReader(Configuration conf) throws IOException { lineRecordReader = new LineRecordReader(); String sepStr = conf.get(KEY_VALUE_SEPERATOR, "\t"); this.separator = (byte) sepStr.charAt(0); }
@Test public void testMultipleClose() throws IOException { URL testFileUrl = getClass().getClassLoader(). getResource("recordSpanningMultipleSplits.txt.bz2"); assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); //noinspection StatementWithEmptyBody while (reader.nextKeyValue()) ; reader.close(); reader.close(); BZip2Codec codec = new BZip2Codec(); codec.setConf(conf); Set<Decompressor> decompressors = new HashSet<Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.add(CodecPool.getDecompressor(codec)); } assertEquals(10, decompressors.size()); }
LineRecordReader reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); int numRecordsNoSplits = 0; while (reader.nextKeyValue()) { ++numRecordsNoSplits; reader.close(); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); int numRecordsFirstSplit = 0; while (reader.nextKeyValue()) { ++numRecordsFirstSplit; reader.close(); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); int numRecordsRemainingSplits = 0; while (reader.nextKeyValue()) { ++numRecordsRemainingSplits; reader.close(); assertEquals("Unexpected number of records in split ", numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
public ScriptRecordReader(final VertexQueryFilter vertexQuery, final TaskAttemptContext context) throws IOException { this.lineRecordReader = new LineRecordReader(); this.vertex = new FaunusVertex(); this.vertexQuery = vertexQuery; this.pathEnabled = context.getConfiguration().getBoolean(FaunusCompiler.PATH_ENABLED, false); final FileSystem fs = FileSystem.get(context.getConfiguration()); try { this.engine.eval(new InputStreamReader(fs.open(new Path(context.getConfiguration().get(ScriptInputFormat.FAUNUS_GRAPH_INPUT_SCRIPT_FILE))))); } catch (Exception e) { throw new IOException(e.getMessage()); } }
@Override public AvroWrapper<Utf8> getCurrentKey() throws IOException, InterruptedException { Text txt = lineRecordReader.getCurrentValue(); currentKey.datum(new Utf8(txt.toString())); return currentKey; }
@Override public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException { this.lineRecordReader.initialize(genericSplit, context); final Configuration configuration = context.getConfiguration(); if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null) this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER); this.engine = manager.getEngineByName(configuration.get(SCRIPT_ENGINE, "gremlin-groovy")); final FileSystem fs = FileSystem.get(configuration); try (final InputStream stream = fs.open(new Path(configuration.get(SCRIPT_FILE))); final InputStreamReader reader = new InputStreamReader(stream)) { final String parse = String.join("\n", IOUtils.toString(reader), READ_CALL); script = ((Compilable) engine).compile(parse); } catch (ScriptException e) { throw new IOException(e.getMessage()); } }
@Override public boolean nextKeyValue() throws IOException { while (true) { if (!this.lineRecordReader.nextKeyValue()) return false; try { final Bindings bindings = this.engine.createBindings(); final StarGraph graph = StarGraph.open(); bindings.put(GRAPH, graph); bindings.put(LINE, this.lineRecordReader.getCurrentValue().toString()); final StarGraph.StarVertex sv = (StarGraph.StarVertex) script.eval(bindings); if (sv != null) { final Optional<StarGraph.StarVertex> vertex = sv.applyGraphFilter(this.graphFilter); if (vertex.isPresent()) { this.vertexWritable.set(vertex.get()); return true; } } } catch (final ScriptException e) { throw new IOException(e.getMessage()); } } }
public synchronized boolean nextKeyValue() throws IOException { if (this.lineRecordReader.nextKeyValue()) { this.key = new Text(fileName); this.value = this.lineRecordReader.getCurrentValue(); return true; } return false; }
@Override public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException { this.lineRecordReader.initialize(genericSplit, context); this.pathEnabled = context.getConfiguration().getBoolean(FaunusCompiler.PATH_ENABLED, false); }
@Override public boolean nextKeyValue() throws IOException { if (reader.nextKeyValue()) { outValue.set(reader.getCurrentValue()); outKey.set(reader.getCurrentKey().get()); return true; } else { return false; } }
@Override public boolean nextKeyValue() throws IOException { if (!this.lineRecordReader.nextKeyValue()) return false; try (InputStream in = new ByteArrayInputStream(this.lineRecordReader.getCurrentValue().getBytes())) { this.vertexWritable.set(this.hasEdges ? this.graphsonReader.readVertex(in, Attachable::get, Attachable::get, Direction.BOTH) : this.graphsonReader.readVertex(in, Attachable::get)); return true; } }
delegate = new LineRecordReader(); delegate.close(); public float getProgress() { try { return delegate.getProgress(); CombineFileSplit csplit=(CombineFileSplit)split; FileSplit fileSplit = new FileSplit(csplit.getPath(idx), csplit.getOffset(idx), csplit.getLength(idx), csplit.getLocations()); delegate.initialize(fileSplit, taskcontext); public LongWritable getCurrentKey() throws IOException, InterruptedException { return delegate.getCurrentKey(); return delegate.getCurrentValue(); return delegate.nextKeyValue();
@Override public float getProgress() throws IOException, InterruptedException { return this.lrr.getProgress(); }