public static MessageType getSchema(final Configuration configuration) { return MessageTypeParser.parseMessageType(configuration.get(PARQUET_HIVE_SCHEMA)); }
public DataWritableRecordConverter(final GroupType requestedSchema, final Map<String, String> metadata, TypeInfo hiveTypeInfo) { this.root = new HiveStructConverter(requestedSchema, MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)), metadata, hiveTypeInfo); }
private void writeParquetRecord(String schema, ParquetHiveRecord record) throws SerDeException { MessageType fileSchema = MessageTypeParser.parseMessageType(schema); DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema); hiveParquetWriter.write(record); }
@Test public void testGetProjectedSchema2() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional double b;\n" + " }\n" + "}\n"); testConversion("structCol", "struct<a:int,b:double>", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.a", "structCol.b")).toString()); }
@Test public void testGetProjectedSchema3() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional double b;\n" + " }\n" + " optional boolean c;\n" + "}\n"); testConversion("structCol,c", "struct<b:double>,boolean", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol", "c"), Arrays.asList(0, 1), Sets.newHashSet("structCol.b", "c")).toString()); }
@Test public void testGetProjectedSchema1() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional double b;\n" + " optional boolean c;\n" + " optional fixed_len_byte_array(3) d (DECIMAL(5,2));\n" + " }\n" + "}\n"); testConversion("structCol", "struct<a:int>", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.a")).toString()); }
@Test public void testGetProjectedSchema4() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional group subStructCol {\n" + " optional int64 b;\n" + " optional boolean c;\n" + " }\n" + " }\n" + " optional boolean d;\n" + "}\n"); testConversion("structCol", "struct<subStructCol:struct<b:bigint>>", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.subStructCol.b")).toString()); }
@Test public void testGetProjectedSchema5() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional group subStructCol {\n" + " optional int64 b;\n" + " optional boolean c;\n" + " }\n" + " }\n" + " optional boolean d;\n" + "}\n"); testConversion("structCol", "struct<subStructCol:struct<b:bigint,c:boolean>>", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.subStructCol", "structCol.subStructCol.b", "structCol.subStructCol.c")).toString()); } }
public static void testConversion( final String columnNamesStr, final String columnsTypeStr, final String actualSchema) throws Exception { final List<String> columnNames = createHiveColumnsFrom(columnNamesStr); final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr); final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes); final MessageType expectedMT = MessageTypeParser.parseMessageType(actualSchema); assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + actualSchema, expectedMT, messageTypeFound); // Required to check the original types manually as PrimitiveType.equals does not care about it List<Type> expectedFields = expectedMT.getFields(); List<Type> actualFields = messageTypeFound.getFields(); for (int i = 0, n = expectedFields.size(); i < n; ++i) { OriginalType exp = expectedFields.get(i).getOriginalType(); OriginalType act = actualFields.get(i).getOriginalType(); assertEquals("Original types of the field do not match", exp, act); } } }
@Test public void testFilterBetween() { MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 bCol; }"); SearchArgument sarg = SearchArgumentFactory.newBuilder() .between("bCol", PredicateLeaf.Type.LONG, 1L, 5L) .build(); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = "and(lteq(bCol, 5), not(lt(bCol, 1)))"; assertEquals(expected, p.toString()); sarg = SearchArgumentFactory.newBuilder() .between("bCol", PredicateLeaf.Type.LONG, 5L, 1L) .build(); p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); expected = "and(lteq(bCol, 1), not(lt(bCol, 5)))"; assertEquals(expected, p.toString()); sarg = SearchArgumentFactory.newBuilder() .between("bCol", PredicateLeaf.Type.LONG, 1L, 1L) .build(); p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); expected = "and(lteq(bCol, 1), not(lt(bCol, 1)))"; assertEquals(expected, p.toString()); } }
MessageTypeParser.parseMessageType("message test { required int32 id;" + " required binary first_name; }"); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
@Test public void testFilterFloatColumns() { MessageType schema = MessageTypeParser.parseMessageType("message test { required float a; required int32 b; }"); SearchArgument sarg = SearchArgumentFactory.newBuilder() .startNot() .startOr() .isNull("a", PredicateLeaf.Type.FLOAT) .between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3) .in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L) .end() .end() .build(); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = "and(and(not(eq(a, null)), not(and(lteq(a, 20.3), not(lt(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))"; assertEquals(expected, p.toString()); }
@Test public void testFilterColumnsThatDoNoExistOnSchema() { MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required binary stinger; }"); SearchArgument sarg = SearchArgumentFactory.newBuilder() .startNot() .startOr() .isNull("a", PredicateLeaf.Type.LONG) .between("y", PredicateLeaf.Type.LONG, 10L, 20L) // Column will be removed from filter .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) // Column will be removed from filter .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") .end() .end() .build(); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))"; assertEquals(expected, p.toString()); }
columnTypes = "int"; StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes); MessageType fileSchema = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional int32 intCol;\n"
@Test public void testBuilderFloat() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() .lessThan("x", PredicateLeaf.Type.LONG, 22L) .lessThan("x1", PredicateLeaf.Type.LONG, 22L) .lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()) .equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22)) .equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22)) .end() .build(); MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required int32 x1;" + " required binary y; required float z; required float z1;}"); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," + " lteq(y, Binary{\"hi \"})), eq(z, " + "0.22)), eq(z1, 0.22))"; assertEquals(expected, p.toString()); } }
@Test public void testFilterColumnsThatDoNoExistOnSchemaHighOrder1() { MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required int32 b; }"); SearchArgument sarg = SearchArgumentFactory.newBuilder() .startOr() .startAnd() .equals("a", PredicateLeaf.Type.LONG, 1L) .equals("none", PredicateLeaf.Type.LONG, 1L) .end() .startAnd() .equals("a", PredicateLeaf.Type.LONG, 999L) .equals("none", PredicateLeaf.Type.LONG, 999L) .end() .end() .build(); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = "or(eq(a, 1), eq(a, 999))"; assertEquals(expected, p.toString()); }
@Test public void testFilterColumnsThatDoNoExistOnSchemaHighOrder2() { MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required int32 b; }"); SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() .startOr() .equals("a", PredicateLeaf.Type.LONG, 1L) .equals("b", PredicateLeaf.Type.LONG, 1L) .end() .startOr() .equals("a", PredicateLeaf.Type.LONG, 999L) .equals("none", PredicateLeaf.Type.LONG, 999L) .end() .end() .build(); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = "or(eq(a, 1), eq(b, 1))"; assertEquals(expected, p.toString()); }
@Test public void testBuilder() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startNot() .startOr() .isNull("x", PredicateLeaf.Type.LONG) .between("y", PredicateLeaf.Type.LONG, 10L, 20L) .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger") .end() .end() .build(); MessageType schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required int32 y; required int32 z;" + " optional binary a;}"); FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); String expected = "and(and(and(not(eq(x, null)), not(and(lteq(y, 20), not(lt(y, 10))))), not(or(or(eq(z, 1), " + "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))"; assertEquals(expected, p.toString()); }
.end() .build(); MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required binary y; required binary z;}"); assertEquals(null, .end() .build(); schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required binary y; required int32 z;" + " optional binary a;}");
.end() .build(); MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required binary y; required binary z;}"); assertEquals(null, .end() .build(); schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required binary y; required int32 z;" + " optional binary a;}");