/** * Searches column names by name on a given Parquet message schema, and returns its projected * Parquet schema types. * * @param schema Message type schema where to search for column names. * @param colNames List of column names. * @param colTypes List of column types. * @return A MessageType object of projected columns. */ public static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) { List<Type> projectedFields = getProjectedGroupFields(schema, colNames, colTypes); Type[] typesArray = projectedFields.toArray(new Type[0]); return Types.buildMessage() .addFields(typesArray) .named(schema.getName()); }
@Test public void testUnannotatedListOfPrimitives() throws Exception { MessageType fileSchema = Types.buildMessage() .repeated(INT32).named("list_of_ints") .named("UnannotatedListOfPrimitives"); Path test = writeDirect("UnannotatedListOfPrimitives", fileSchema, new DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("list_of_ints", 0); rc.addInteger(34); rc.addInteger(35); rc.addInteger(36); rc.endField("list_of_ints", 0); rc.endMessage(); } }); ArrayWritable expected = list( new IntWritable(34), new IntWritable(35), new IntWritable(36)); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); }
@Test public void testAvroPrimitiveInList() throws Exception { Path test = writeDirect("AvroPrimitiveInList", Types.buildMessage() .requiredGroup().as(LIST) .repeated(INT32).named("array") .named("list_of_ints") .named("AvroPrimitiveInList"), new DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("list_of_ints", 0); rc.startGroup(); rc.startField("array", 0); rc.addInteger(34); rc.addInteger(35); rc.addInteger(36); rc.endField("array", 0); rc.endGroup(); rc.endField("list_of_ints", 0); rc.endMessage(); } }); ArrayWritable expected = list( new IntWritable(34), new IntWritable(35), new IntWritable(36)); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); }
@Test public void testThriftPrimitiveInList() throws Exception { Path test = writeDirect("ThriftPrimitiveInList", Types.buildMessage() .requiredGroup().as(LIST) .repeated(INT32).named("list_of_ints_tuple") .named("list_of_ints") .named("ThriftPrimitiveInList"), new DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("list_of_ints", 0); rc.startGroup(); rc.startField("list_of_ints_tuple", 0); rc.addInteger(34); rc.addInteger(35); rc.addInteger(36); rc.endField("list_of_ints_tuple", 0); rc.endGroup(); rc.endField("list_of_ints", 0); rc.endMessage(); } }); ArrayWritable expected = list( new IntWritable(34), new IntWritable(35), new IntWritable(36)); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); }
@Test public void testStringMapRequiredPrimitive() throws Exception { Path test = writeDirect("StringMapRequiredPrimitive", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup()
@Test public void testStringMapOptionalPrimitive() throws Exception { Path test = writeDirect("StringMapOptionalPrimitive", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup()
Types.buildMessage() .optionalGroup().as(LIST) .repeatedGroup()
Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup()
@Test public void testDoubleMapWithStructValue() throws Exception { Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup()
Types.buildMessage() .optionalGroup().as(LIST) .repeatedGroup()
Types.buildMessage() .optionalGroup().as(LIST) .repeatedGroup()
Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup()
@Test public void testMapWithComplexKey() throws Exception { Path test = writeDirect("MapWithComplexKey", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup()
@Test public void testNestedMap() throws Exception { Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup()
@Test public void testUnannotatedListOfGroups() throws Exception { Path test = writeDirect("UnannotatedListOfGroups", Types.buildMessage() .repeatedGroup() .required(FLOAT).named("x")
Types.buildMessage() .optionalGroup().as(LIST) .repeatedGroup()
Types.buildMessage() .optionalGroup().as(LIST) .repeatedGroup()
@Test public void testNewRequiredGroupInList() throws Exception { Path test = writeDirect("NewRequiredGroupInList", Types.buildMessage() .optionalGroup().as(LIST) .repeatedGroup()
@Test public void testNewOptionalGroupInList() throws Exception { Path test = writeDirect("NewOptionalGroupInList", Types.buildMessage() .optionalGroup().as(LIST) .repeatedGroup()
} else { if (columns.size() > 0) { Types.MessageTypeBuilder builder = Types.buildMessage(); for (String s: columns) { if (!fileSchema.containsField(s)) {