/** {@inheritDoc} */ @Override public Object convert(AvroWrapper<?> input) { return input.datum(); }
public void testProjectionNoreducer(String inputDirectory) throws Exception { JobConf job = new JobConf(); long onel = 1; Schema readerSchema = Schema.create(Schema.Type.STRING); AvroJob.setInputSchema(job, readerSchema); Path inputPath = new Path(inputDirectory + "/myavro2-m-00000.avro"); FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath); FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job); AvroRecordReader<Utf8> recordReader = new AvroRecordReader<>(job, fileSplit); AvroWrapper<Utf8> inputPair = new AvroWrapper<>(null); NullWritable ignore = NullWritable.get(); while (recordReader.next(inputPair, ignore)) { long testl = Long.parseLong(inputPair.datum().toString().split(":")[2].replace("}", "").trim()); Assert.assertEquals(onel, testl); } } }
@Test public void testToString() { String datum = "my string"; AvroWrapper<CharSequence> wrapper = new AvroWrapper<>(datum); assertEquals(datum, wrapper.toString()); } }
@Override public AvroWrapper<Utf8> createKey() { return new AvroWrapper<>(null); }
public AvroWrapper<T> createKey() { return new AvroWrapper<>(null); }
/** {@inheritDoc} */ @Override public T deserialize(T avroWrapperToReuse) throws IOException { // Create a new Avro wrapper if there isn't one to reuse. if (null == avroWrapperToReuse) { avroWrapperToReuse = createAvroWrapper(); } // Deserialize the Avro datum from the input stream. avroWrapperToReuse.datum(mAvroDatumReader.read(avroWrapperToReuse.datum(), mAvroDecoder)); return avroWrapperToReuse; }
@SuppressWarnings("deprecation") // Test for a different schema output public void testProjection1(String inputDirectory) throws Exception { JobConf job = new JobConf(); Schema readerSchema = Schema.create(Schema.Type.STRING); AvroJob.setInputSchema(job, readerSchema); Path inputPath = new Path(inputDirectory + "/myavro1-r-00000.avro"); FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath); FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job); AvroWrapper<Utf8> inputPair = new AvroWrapper<>(null); NullWritable ignore = NullWritable.get(); AvroRecordReader<Utf8> recordReader = new AvroRecordReader<>(job, fileSplit); long sumOfCounts = 0; long numOfCounts = 0; while (recordReader.next(inputPair, ignore)) { sumOfCounts += Long.parseLong(inputPair.datum().toString().split(":")[2].replace("}", "").trim()); numOfCounts++; } Assert.assertEquals(numOfCounts, WordCountUtil.COUNTS.size()); long actualSumOfCounts = 0; for (Long count : WordCountUtil.COUNTS.values()) { actualSumOfCounts += count; } Assert.assertEquals(sumOfCounts, actualSumOfCounts); }
public AvroWrapper<T> createKey() { return new AvroWrapper<>(null); }
/** {@inheritDoc} */ @Override public void serialize(AvroWrapper<T> avroWrapper) throws IOException { mAvroDatumWriter.write(avroWrapper.datum(), mAvroEncoder); // This would be a lot faster if the Serializer interface had a flush() method and the // Hadoop framework called it when needed. For now, we'll have to flush on every record. mAvroEncoder.flush(); }
@SuppressWarnings("deprecation") // Test for a different schema output public void testProjectionNewMethodsTwo(String inputDirectory) throws Exception { JobConf job = new JobConf(); Schema readerSchema = Schema.create(Schema.Type.STRING); AvroJob.setInputSchema(job, readerSchema); Path inputPath = new Path(inputDirectory + "/testavrofile1-r-00000.avro"); FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath); FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job); AvroWrapper<Utf8> inputPair = new AvroWrapper<>(null); NullWritable ignore = NullWritable.get(); AvroRecordReader<Utf8> recordReader = new AvroRecordReader<>(job, fileSplit); long sumOfCounts = 0; long numOfCounts = 0; while (recordReader.next(inputPair, ignore)) { sumOfCounts += Long.parseLong(inputPair.datum().toString().split(":")[2].replace("}", "").trim()); numOfCounts++; } Assert.assertEquals(numOfCounts, WordCountUtil.COUNTS.size()); long actualSumOfCounts = 0; for (Long count : WordCountUtil.COUNTS.values()) { actualSumOfCounts += count; } Assert.assertEquals(sumOfCounts, actualSumOfCounts); }
@SuppressWarnings({"unchecked"}) public void collect(Object key) throws IOException{ AvroWrapper wrapper = new AvroWrapper(key); writer.write(wrapper, NullWritable.get()); }
AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<>(null); NullWritable ignore = NullWritable.get(); long numOfCounts = 0; while (recordReader.next(inputPair, ignore)) { Assert.assertEquals(inputPair.datum().get(0), defaultRank); sumOfCounts += (Long) inputPair.datum().get(1); numOfCounts++;
public void map(LongWritable key, Text value, OutputCollector<AvroWrapper<Pair<Long,Utf8>>,NullWritable> out, Reporter reporter) throws IOException { out.collect(new AvroWrapper<>(new Pair<>(key.get(), new Utf8(value.toString()))), NullWritable.get()); } }
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } public void close(Reporter reporter) throws IOException {
AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<>(null); NullWritable ignore = NullWritable.get(); long numOfCounts = 0; while (recordReader.next(inputPair, ignore)) { Assert.assertEquals(inputPair.datum().get(0), defaultRank); sumOfCounts += (Long) inputPair.datum().get(1); numOfCounts++;
public void map(LongWritable key, Text value, OutputCollector<AvroWrapper<Pair<Long,GenericData.Record>>,NullWritable> out, Reporter reporter) throws IOException { GenericData.Record optional_entry = new GenericData.Record(createInnerSchema("optional_field_1")); optional_entry.put("optional_field_1", 0L); GenericData.Array<GenericData.Record> array = new GenericData.Array<>(1, createArraySchema()); array.add(optional_entry); GenericData.Record container = new GenericData.Record(createSchema()); container.put("Optional", array); out.collect(new AvroWrapper<>(new Pair<>(key.get(), container)), NullWritable.get()); } }
public boolean next(AvroWrapper<Utf8> key, NullWritable value) throws IOException { boolean success = lineRecordReader.next(currentKeyHolder, currentValueHolder); if (success) { key.datum(new Utf8(currentValueHolder.getBytes()) .setLength(currentValueHolder.getLength())); } else { key.datum(null); } return success; }
AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<>(null); NullWritable ignore = NullWritable.get(); long numOfCounts = 0; while (recordReader.next(inputPair, ignore)) { Assert.assertEquals(inputPair.datum().get(0), defaultRank); sumOfCounts += (Long) inputPair.datum().get(1); numOfCounts++;
private AvroWrapper<K> getWrapper() { if (wrapper == null) { wrapper = new AvroWrapper<K>(); } return wrapper; }