@Override public CharSequence subSequence(int start, int end) { return toString().subSequence(start, end); }
@Override public CharSequence subSequence(int start, int end) { return toString().subSequence(start, end); }
@Override public int length() { return toString().length(); } @Override public CharSequence subSequence(int start, int end) {
@Override public int length() { return toString().length(); } @Override public CharSequence subSequence(int start, int end) {
@Override public char charAt(int index) { return toString().charAt(index); } @Override public int length() { return toString().length(); }
@Override public char charAt(int index) { return toString().charAt(index); } @Override public int length() { return toString().length(); }
@Override public String readString() throws IOException { return readString(stringDecoder).toString(); }
@Override public String readString() throws IOException { return readString(scratchUtf8).toString(); }
@Override public void writeString(Utf8 utf8) throws IOException { writeString(utf8.toString()); }
@Override public String readString() throws IOException { return readString(scratchUtf8).toString(); }
@Override public void writeString(Utf8 utf8) throws IOException { writeString(utf8.toString()); }
@Override public Object getField(String fieldName) { Object value = record.get(fieldName); if (value instanceof Utf8) { return ((Utf8) value).toString(); } else if (value instanceof org.apache.avro.generic.GenericRecord) { org.apache.avro.generic.GenericRecord avroRecord = (org.apache.avro.generic.GenericRecord) value; org.apache.avro.Schema recordSchema = avroRecord.getSchema(); List<Field> fields = recordSchema.getFields() .stream() .map(f -> new Field(f.name(), f.pos())) .collect(Collectors.toList()); return new GenericAvroRecord(schema, fields, avroRecord); } else { return value; } }
public void reduce(AvroKey<Long> key, Iterator<AvroValue<Utf8>> values, OutputCollector<LongWritable, Text> out, Reporter reporter) throws IOException { while (values.hasNext()) { AvroValue<Utf8> value = values.next(); out.collect(new LongWritable(key.datum()), new Text(value.datum().toString())); } } }
@Override public void map(Utf8 text, Collector<Pair<Utf8,Long>> collector) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) collector.collect(new Pair<>(new Utf8(tokens.nextToken()), 1L)); }
@Override public void map(Utf8 text, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) collector.collect(new Pair<>(new Utf8(tokens.nextToken()), 1L)); } }
private static Object[] transformAvroArrayToObjectArray(Array arr) { if (arr == null) { return new Object[0]; } final Object[] ret = new Object[arr.size()]; final Iterator iterator = arr.iterator(); int i = 0; while (iterator.hasNext()) { Object value = iterator.next(); if (value instanceof Record) { value = ((Record) value).get(0); } if (value instanceof Utf8) { value = ((Utf8) value).toString(); } ret[i++] = value; } return ret; } }
private static Object[] transformAvroArrayToObjectArray(Array arr) { if (arr == null) { return new Object[0]; } final Object[] ret = new Object[arr.size()]; final Iterator iterator = arr.iterator(); int i = 0; while (iterator.hasNext()) { Object value = iterator.next(); if (value instanceof Record) { value = ((Record) value).get(0); } if (value instanceof Utf8) { value = ((Utf8) value).toString(); } ret[i++] = value; } return ret; } }
public static void validateCountsFile(File file) throws Exception { int numWords = 0; DatumReader<Pair<Utf8,Long>> reader = new SpecificDatumReader<>(); try(InputStream in = new BufferedInputStream(new FileInputStream(file))) { try (DataFileStream<Pair<Utf8, Long>> counts = new DataFileStream<>(in, reader)) { for (Pair<Utf8, Long> wc : counts) { assertEquals(wc.key().toString(), COUNTS.get(wc.key().toString()), wc.value()); numWords++; } checkMeta(counts); } } assertEquals(COUNTS.size(), numWords); }
@Override public void map(Utf8 text, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) { String tok = tokens.nextToken(); collector.collect(new Pair<>(new Utf8(tok), 1L)); amos.getCollector("myavro2", reporter).collect(new Pair<Utf8, Long>(new Utf8(tok), 1L).toString()); } }