private static Map<DecoderColumnHandle, FieldValueProvider> decodeRow(byte[] avroData, Set<DecoderColumnHandle> columns, Map<String, String> dataParams) { RowDecoder rowDecoder = DECODER_FACTORY.create(dataParams, columns); return rowDecoder.decodeRow(avroData, null) .orElseThrow(AssertionError::new); }
private FieldValueProvider fieldValueDecoderFor(BigintType type, String csv) { DecoderTestColumnHandle column = new DecoderTestColumnHandle(0, "column", type, "0", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(column); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(csv.getBytes(StandardCharsets.UTF_8), null) .orElseThrow(AssertionError::new); return decodedRow.get(column); }
@Test public void testEmptyRecord() { byte[] emptyRow = new byte[0]; DecoderTestColumnHandle column = new DecoderTestColumnHandle(0, "row1", createUnboundedVarcharType(), null, "BYTE", null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(column); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(emptyRow, null) .orElseThrow(AssertionError::new); checkIsNull(decodedRow, column); }
private FieldValueProvider decode(Optional<String> jsonValue, Type type) { String jsonField = "value"; String json = jsonValue.map(value -> format("{\"%s\":%s}", jsonField, value)).orElse("{}"); DecoderTestColumnHandle columnHandle = new DecoderTestColumnHandle( 0, "some_column", type, jsonField, dataFormat.orElse(null), formatHint.orElse(null), false, false, false); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), ImmutableSet.of(columnHandle)); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(json.getBytes(UTF_8), null) .orElseThrow(AssertionError::new); assertTrue(decodedRow.containsKey(columnHandle), format("column '%s' not found in decoded row", columnHandle.getName())); return decodedRow.get(columnHandle); } }
@SuppressWarnings("NumericCastThatLosesPrecision") @Test public void testFloatStuff() { ByteBuffer buf = ByteBuffer.allocate(100); buf.putDouble(Math.PI); buf.putFloat((float) Math.E); buf.putDouble(Math.E); byte[] row = new byte[buf.position()]; System.arraycopy(buf.array(), 0, row, 0, buf.position()); DecoderTestColumnHandle row1 = new DecoderTestColumnHandle(0, "row1", DOUBLE, null, "DOUBLE", null, false, false, false); DecoderTestColumnHandle row2 = new DecoderTestColumnHandle(1, "row2", DOUBLE, "8", "FLOAT", null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(row1, row2); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(row, null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkValue(decodedRow, row1, Math.PI); checkValue(decodedRow, row2, Math.E); }
@Test public void testStringNumber() { byte[] json = "{\"a_number\":481516,\"a_string\":\"2342\"}".getBytes(StandardCharsets.UTF_8); DecoderTestColumnHandle column1 = new DecoderTestColumnHandle(0, "column1", createVarcharType(100), "a_number", null, null, false, false, false); DecoderTestColumnHandle column2 = new DecoderTestColumnHandle(1, "column2", BIGINT, "a_number", null, null, false, false, false); DecoderTestColumnHandle column3 = new DecoderTestColumnHandle(2, "column3", createVarcharType(100), "a_string", null, null, false, false, false); DecoderTestColumnHandle column4 = new DecoderTestColumnHandle(3, "column4", BIGINT, "a_string", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(column1, column2, column3, column4); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Optional<Map<DecoderColumnHandle, FieldValueProvider>> decodedRow = rowDecoder.decodeRow(json, null); assertTrue(decodedRow.isPresent()); assertEquals(decodedRow.get().size(), columns.size()); checkValue(decodedRow.get(), column1, "481516"); checkValue(decodedRow.get(), column2, 481516); checkValue(decodedRow.get(), column3, "2342"); checkValue(decodedRow.get(), column4, 2342); }
@Test public void testSimple() { ByteBuffer buf = ByteBuffer.allocate(100); buf.putLong(4815162342L); // 0 - 7 buf.putInt(12345678); // 8 - 11 buf.putShort((short) 4567); // 12 - 13 buf.put((byte) 123); // 14 buf.put("Ich bin zwei Oeltanks".getBytes(StandardCharsets.UTF_8)); // 15+ byte[] row = new byte[buf.position()]; System.arraycopy(buf.array(), 0, row, 0, buf.position()); DecoderTestColumnHandle row1 = new DecoderTestColumnHandle(0, "row1", BigintType.BIGINT, "0", "LONG", null, false, false, false); DecoderTestColumnHandle row2 = new DecoderTestColumnHandle(1, "row2", BigintType.BIGINT, "8", "INT", null, false, false, false); DecoderTestColumnHandle row3 = new DecoderTestColumnHandle(2, "row3", BigintType.BIGINT, "12", "SHORT", null, false, false, false); DecoderTestColumnHandle row4 = new DecoderTestColumnHandle(3, "row4", BigintType.BIGINT, "14", "BYTE", null, false, false, false); DecoderTestColumnHandle row5 = new DecoderTestColumnHandle(4, "row5", createVarcharType(10), "15", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(row1, row2, row3, row4, row5); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(row, null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkValue(decodedRow, row1, 4815162342L); checkValue(decodedRow, row2, 12345678); checkValue(decodedRow, row3, 4567); checkValue(decodedRow, row4, 123); checkValue(decodedRow, row5, "Ich bin zw"); }
@Test public void testNulls() { String csv = ",,,"; DecoderTestColumnHandle row1 = new DecoderTestColumnHandle(0, "row1", createVarcharType(10), "0", null, null, false, false, false); DecoderTestColumnHandle row2 = new DecoderTestColumnHandle(1, "row2", BigintType.BIGINT, "1", null, null, false, false, false); DecoderTestColumnHandle row3 = new DecoderTestColumnHandle(2, "row3", DoubleType.DOUBLE, "2", null, null, false, false, false); DecoderTestColumnHandle row4 = new DecoderTestColumnHandle(3, "row4", BooleanType.BOOLEAN, "3", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(row1, row2, row3, row4); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(csv.getBytes(StandardCharsets.UTF_8), null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkIsNull(decodedRow, row1); checkIsNull(decodedRow, row2); checkIsNull(decodedRow, row3); checkIsNull(decodedRow, row4); }
@Test public void testNonExistent() { byte[] json = "{}".getBytes(StandardCharsets.UTF_8); DecoderTestColumnHandle column1 = new DecoderTestColumnHandle(0, "column1", createVarcharType(100), "very/deep/varchar", null, null, false, false, false); DecoderTestColumnHandle column2 = new DecoderTestColumnHandle(1, "column2", BIGINT, "no_bigint", null, null, false, false, false); DecoderTestColumnHandle column3 = new DecoderTestColumnHandle(2, "column3", DOUBLE, "double/is_missing", null, null, false, false, false); DecoderTestColumnHandle column4 = new DecoderTestColumnHandle(3, "column4", BOOLEAN, "hello", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(column1, column2, column3, column4); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(json, null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkIsNull(decodedRow, column1); checkIsNull(decodedRow, column2); checkIsNull(decodedRow, column3); checkIsNull(decodedRow, column4); }
@Test public void testFixedWithString() { String str = "Ich bin zwei Oeltanks"; byte[] row = str.getBytes(StandardCharsets.UTF_8); DecoderTestColumnHandle row1 = new DecoderTestColumnHandle(0, "row1", createVarcharType(100), null, null, null, false, false, false); DecoderTestColumnHandle row2 = new DecoderTestColumnHandle(1, "row2", createVarcharType(100), "0", null, null, false, false, false); DecoderTestColumnHandle row3 = new DecoderTestColumnHandle(2, "row3", createVarcharType(100), "0:4", null, null, false, false, false); DecoderTestColumnHandle row4 = new DecoderTestColumnHandle(3, "row4", createVarcharType(100), "5:8", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(row1, row2, row3, row4); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(row, null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkValue(decodedRow, row1, str); checkValue(decodedRow, row2, str); // these only work for single byte encodings... checkValue(decodedRow, row3, str.substring(0, 4)); checkValue(decodedRow, row4, str.substring(5, 8)); }
@Test public void testSimple() throws Exception { byte[] json = ByteStreams.toByteArray(TestJsonDecoder.class.getResourceAsStream("/decoder/json/message.json")); DecoderTestColumnHandle column1 = new DecoderTestColumnHandle(0, "column1", createVarcharType(100), "source", null, null, false, false, false); DecoderTestColumnHandle column2 = new DecoderTestColumnHandle(1, "column2", createVarcharType(10), "user/screen_name", null, null, false, false, false); DecoderTestColumnHandle column3 = new DecoderTestColumnHandle(2, "column3", BIGINT, "id", null, null, false, false, false); DecoderTestColumnHandle column4 = new DecoderTestColumnHandle(3, "column4", BIGINT, "user/statuses_count", null, null, false, false, false); DecoderTestColumnHandle column5 = new DecoderTestColumnHandle(4, "column5", BOOLEAN, "user/geo_enabled", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(column1, column2, column3, column4, column5); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(json, null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkValue(decodedRow, column1, "<a href=\"http://twitterfeed.com\" rel=\"nofollow\">twitterfeed</a>"); checkValue(decodedRow, column2, "EKentuckyN"); checkValue(decodedRow, column3, 493857959588286460L); checkValue(decodedRow, column4, 7630); checkValue(decodedRow, column5, true); }
@Test public void testLessTokensThanColumns() { String csv = "ala,10"; DecoderTestColumnHandle column1 = new DecoderTestColumnHandle(0, "column1", createVarcharType(10), "0", null, null, false, false, false); DecoderTestColumnHandle column2 = new DecoderTestColumnHandle(1, "column2", BigintType.BIGINT, "1", null, null, false, false, false); DecoderTestColumnHandle column3 = new DecoderTestColumnHandle(2, "column3", createVarcharType(10), "2", null, null, false, false, false); DecoderTestColumnHandle column4 = new DecoderTestColumnHandle(0, "column4", BigintType.BIGINT, "3", null, null, false, false, false); DecoderTestColumnHandle column5 = new DecoderTestColumnHandle(0, "column5", DoubleType.DOUBLE, "4", null, null, false, false, false); DecoderTestColumnHandle column6 = new DecoderTestColumnHandle(0, "column6", BooleanType.BOOLEAN, "5", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(column1, column2, column3, column4, column5, column6); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(csv.getBytes(StandardCharsets.UTF_8), null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkValue(decodedRow, column1, "ala"); checkValue(decodedRow, column2, 10); checkIsNull(decodedRow, column3); checkIsNull(decodedRow, column4); checkIsNull(decodedRow, column5); checkIsNull(decodedRow, column6); }
@Test public void testSimple() { String csv = "\"row 1\",row2,\"row3\",100,\"200\",300,4.5"; DecoderTestColumnHandle row1 = new DecoderTestColumnHandle(0, "row1", createVarcharType(2), "0", null, null, false, false, false); DecoderTestColumnHandle row2 = new DecoderTestColumnHandle(1, "row2", createVarcharType(10), "1", null, null, false, false, false); DecoderTestColumnHandle row3 = new DecoderTestColumnHandle(2, "row3", createVarcharType(10), "2", null, null, false, false, false); DecoderTestColumnHandle row4 = new DecoderTestColumnHandle(3, "row4", BigintType.BIGINT, "3", null, null, false, false, false); DecoderTestColumnHandle row5 = new DecoderTestColumnHandle(4, "row5", BigintType.BIGINT, "4", null, null, false, false, false); DecoderTestColumnHandle row6 = new DecoderTestColumnHandle(5, "row6", BigintType.BIGINT, "5", null, null, false, false, false); DecoderTestColumnHandle row7 = new DecoderTestColumnHandle(6, "row7", DoubleType.DOUBLE, "6", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(row1, row2, row3, row4, row5, row6, row7); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(csv.getBytes(StandardCharsets.UTF_8), null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkValue(decodedRow, row1, "ro"); checkValue(decodedRow, row2, "row2"); checkValue(decodedRow, row3, "row3"); checkValue(decodedRow, row4, 100); checkValue(decodedRow, row5, 200); checkValue(decodedRow, row6, 300); checkValue(decodedRow, row7, 4.5d); }
@Test public void testBoolean() { String csv = "True,False,0,1,\"0\",\"1\",\"true\",\"false\""; DecoderTestColumnHandle row1 = new DecoderTestColumnHandle(0, "row1", BooleanType.BOOLEAN, "0", null, null, false, false, false); DecoderTestColumnHandle row2 = new DecoderTestColumnHandle(1, "row2", BooleanType.BOOLEAN, "1", null, null, false, false, false); DecoderTestColumnHandle row3 = new DecoderTestColumnHandle(2, "row3", BooleanType.BOOLEAN, "2", null, null, false, false, false); DecoderTestColumnHandle row4 = new DecoderTestColumnHandle(3, "row4", BooleanType.BOOLEAN, "3", null, null, false, false, false); DecoderTestColumnHandle row5 = new DecoderTestColumnHandle(4, "row5", BooleanType.BOOLEAN, "4", null, null, false, false, false); DecoderTestColumnHandle row6 = new DecoderTestColumnHandle(5, "row6", BooleanType.BOOLEAN, "5", null, null, false, false, false); DecoderTestColumnHandle row7 = new DecoderTestColumnHandle(6, "row7", BooleanType.BOOLEAN, "6", null, null, false, false, false); DecoderTestColumnHandle row8 = new DecoderTestColumnHandle(7, "row8", BooleanType.BOOLEAN, "7", null, null, false, false, false); Set<DecoderColumnHandle> columns = ImmutableSet.of(row1, row2, row3, row4, row5, row6, row7, row8); RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(csv.getBytes(StandardCharsets.UTF_8), null) .orElseThrow(AssertionError::new); assertEquals(decodedRow.size(), columns.size()); checkValue(decodedRow, row1, true); checkValue(decodedRow, row2, false); checkValue(decodedRow, row3, false); checkValue(decodedRow, row4, false); checkValue(decodedRow, row5, false); checkValue(decodedRow, row6, false); checkValue(decodedRow, row7, true); checkValue(decodedRow, row8, false); }
RowDecoder rowDecoder = DECODER_FACTORY.create(emptyMap(), columns); Map<DecoderColumnHandle, FieldValueProvider> decodedRow = rowDecoder.decodeRow(row, null) .orElseThrow(AssertionError::new);