/** * This method is to get object from map given a key as string. * Avro persists string as Utf8 * @param map passed from {@link #getFieldHelper(Map, Object, List, int)} * @param key passed from {@link #getFieldHelper(Map, Object, List, int)} * @return This could again be a GenericRecord */ private static Object getObjectFromMap(Map map, String key) { Utf8 utf8Key = new Utf8(key); return map.get(utf8Key); }
@Override public Utf8 readString(Utf8 old) throws IOException { int length = readInt(); Utf8 result = (old != null ? old : new Utf8()); result.setByteLength(length); if (length > 0) { in.readFully(result.getBytes(), 0, length); } return result; }
@Override public Utf8 readString(Utf8 old) throws IOException { Symbol actual = parser.advance(Symbol.STRING); if (actual == Symbol.BYTES) { return new Utf8(in.readBytes(null).array()); } else { assert actual == Symbol.STRING; return in.readString(old); } }
public void map(LongWritable key, Text value, OutputCollector<AvroWrapper<Pair<Long,Utf8>>,NullWritable> out, Reporter reporter) throws IOException { out.collect(new AvroWrapper<>(new Pair<>(key.get(), new Utf8(value.toString()))), NullWritable.get()); } }
@Override public void map(Utf8 text, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) collector.collect(new Pair<>(new Utf8(tokens.nextToken()), 1L)); } }
@Override public void map(Utf8 text, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) { String tok = tokens.nextToken(); collector.collect(new Pair<>(new Utf8(tok), 1L)); amos.getCollector("myavro2", reporter).collect(new Pair<Utf8, Long>(new Utf8(tok), 1L).toString()); } }
private static byte[] writeVersion0(Schema s0) { GenericData.Record record = new GenericData.Record(s0); record.put("original", new Utf8("Abhinay")); AvroVersionedGenericSerializer serializer = new AvroVersionedGenericSerializer(s0.toString()); return serializer.toBytes(record); }
public void testRoundtripAvroWithString() throws Exception { String jsonSchema = "{\"name\": \"Str\", \"type\": \"string\"}"; AvroGenericSerializer serializer = new AvroGenericSerializer(jsonSchema); byte[] bytes = serializer.toBytes(new Utf8("BAR")); byte[] bytes2 = serializer.toBytes(new Utf8("BAR")); assertEquals(ByteUtils.compare(bytes, bytes2), 0); assertTrue(serializer.toObject(bytes).equals(new Utf8("BAR"))); }
private void assertInverse(AvroVersionedGenericSerializer serializer, Object O) { assertEquals("Value after ser/de-ser is not the same", new Utf8(O.toString()), serializer.toObject(serializer.toBytes(O))); }
private static byte[] writeVersion0with1Present(Map<Integer, String> versions, Schema s0) { GenericData.Record record = new GenericData.Record(s0); record.put("original", new Utf8("Abhinay")); AvroVersionedGenericSerializer serializer = new AvroVersionedGenericSerializer(versions); return serializer.toBytes(record); }
@Test public void testConvertToAvroStreamForDateTimeAsString() throws SQLException, IOException, ParseException { final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions .builder().convertNames(true).useLogicalTypes(false).build(); testConvertToAvroStreamForDateTime(options, (record, date) -> assertEquals(new Utf8(date.toString()), record.get("date")), (record, time) -> assertEquals(new Utf8(time.toString()), record.get("time")), (record, timestamp) -> assertEquals(new Utf8(timestamp.toString()), record.get("timestamp")) ); }
@Test public void testGetStringArrayUtf8() throws IOException { // Expectation: Even though we read an Avro object with UTF8 underneath, the accessor converts it into a // Java String List<String> expectedQuotes = ImmutableList.of("abc", "defg"); GenericData.Array<Utf8> strings = new GenericData.Array<Utf8>(2, Schema.createArray(Schema.create(Schema.Type.STRING))); expectedQuotes.forEach(s -> strings.add(new Utf8(s))); record.put("favorite_quotes", strings); Assert.assertEquals(accessor.getGeneric("favorite_quotes"), expectedQuotes); }
public void testRoundtripAvroWithGenericRecord() throws Exception { String jsonSchema = "{\"name\": \"Compact Disk\", \"type\": \"record\", " + "\"fields\": [" + "{\"name\": \"name\", \"type\": \"string\", \"order\": \"ascending\"}" + "]}"; AvroGenericSerializer serializer = new AvroGenericSerializer(jsonSchema); Record record = new Record(Schema.parse(jsonSchema)); // we need to use a Utf8 instance to map to a String. record.put("name", new Utf8("Hello")); byte[] bytes = serializer.toBytes(record); assertTrue(serializer.toObject(bytes).equals(record)); }
public void testRecordStringSchema() { String stringSchema = "\"string\""; AvroVersionedGenericSerializer stringSerializer = new AvroVersionedGenericSerializer(stringSchema); // string and a record with string serializes to the same bytes. String recordSchema = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" }]}"; AvroVersionedGenericSerializer recordSerializer = new AvroVersionedGenericSerializer(recordSchema); Utf8 sample = new Utf8("abc"); byte[] byte1 = stringSerializer.toBytes(sample); Object obj = recordSerializer.toObject(byte1); assertEquals(" should serialize to same value", Record.class, obj.getClass()); }
public void testSimpleStringSchema() throws Exception { String jsonSchema = "\"string\""; AvroGenericSerializer serializer = new AvroGenericSerializer(jsonSchema); byte[] byte1 = serializer.toBytes(new Utf8("abc")); byte[] byte2 = serializer.toBytes("abc"); assertArrayEquals(" should serialize to same value", byte1, byte2); }
@Test public void shouldSerializeString() { shouldSerializeTypeCorrectly( Schema.OPTIONAL_STRING_SCHEMA, "foobar", org.apache.avro.SchemaBuilder.builder().stringType(), new Utf8("foobar")); }
@Test public void testStringSerializer() { String[] schemaInfos = { "utf8", "utf-8", "UTF8", "UTF-8", null }; for(String schemaInfo: schemaInfos) { Serializer<Object> ser = getStringSerializer(schemaInfo); assertInverse(ser, "abc"); assertInverse(ser, null); assertSerializationFails(ser, new Utf8("foobar")); assertSerializationFails(ser, 'C'); assertSerializationFails(ser, 123); assertSerializationFails(ser, new Integer(123)); assertSerializationFails(ser, new byte[5]); } }
@Override void readInternal(Decoder d) throws IOException { Utf8 utf = new Utf8(); for (int i = 0; i < count/4; i++) { d.readString(utf).toString(); d.readString(utf).toString(); d.readString(utf).toString(); d.readString(utf).toString(); } }