/** * Searches column names by name on a given Parquet message schema, and returns its projected * Parquet schema types. * * @param schema Message type schema where to search for column names. * @param colNames List of column names. * @param colTypes List of column types. * @return A MessageType object of projected columns. */ public static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) { List<Type> projectedFields = getProjectedGroupFields(schema, colNames, colTypes); Type[] typesArray = projectedFields.toArray(new Type[0]); return Types.buildMessage() .addFields(typesArray) .named(schema.getName()); }
@Test public void testUnannotatedListOfPrimitives() throws Exception { MessageType fileSchema = Types.buildMessage() .repeated(INT32).named("list_of_ints") .named("UnannotatedListOfPrimitives"); Path test = writeDirect("UnannotatedListOfPrimitives", fileSchema, new DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("list_of_ints", 0); rc.addInteger(34); rc.addInteger(35); rc.addInteger(36); rc.endField("list_of_ints", 0); rc.endMessage(); } }); ArrayWritable expected = list( new IntWritable(34), new IntWritable(35), new IntWritable(36)); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); }
@Test public void testAvroPrimitiveInList() throws Exception { Path test = writeDirect("AvroPrimitiveInList", Types.buildMessage() .requiredGroup().as(LIST) .repeated(INT32).named("array") .named("list_of_ints") .named("AvroPrimitiveInList"), new DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("list_of_ints", 0); rc.startGroup(); rc.startField("array", 0); rc.addInteger(34); rc.addInteger(35); rc.addInteger(36); rc.endField("array", 0); rc.endGroup(); rc.endField("list_of_ints", 0); rc.endMessage(); } }); ArrayWritable expected = list( new IntWritable(34), new IntWritable(35), new IntWritable(36)); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); }
@Test public void testThriftPrimitiveInList() throws Exception { Path test = writeDirect("ThriftPrimitiveInList", Types.buildMessage() .requiredGroup().as(LIST) .repeated(INT32).named("list_of_ints_tuple") .named("list_of_ints") .named("ThriftPrimitiveInList"), new DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("list_of_ints", 0); rc.startGroup(); rc.startField("list_of_ints_tuple", 0); rc.addInteger(34); rc.addInteger(35); rc.addInteger(36); rc.endField("list_of_ints_tuple", 0); rc.endGroup(); rc.endField("list_of_ints", 0); rc.endMessage(); } }); ArrayWritable expected = list( new IntWritable(34), new IntWritable(35), new IntWritable(36)); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); }