Refine search
public static MessageType getSchema(final Configuration configuration) { return MessageTypeParser.parseMessageType(configuration.get(PARQUET_HIVE_SCHEMA)); }
@Test public void testGetProjectedSchema3() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional double b;\n" + " }\n" + " optional boolean c;\n" + "}\n"); testConversion("structCol,c", "struct<b:double>,boolean", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol", "c"), Arrays.asList(0, 1), Sets.newHashSet("structCol.b", "c")).toString()); }
public static void testConversion( final String columnNamesStr, final String columnsTypeStr, final String actualSchema) throws Exception { final List<String> columnNames = createHiveColumnsFrom(columnNamesStr); final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr); final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes); final MessageType expectedMT = MessageTypeParser.parseMessageType(actualSchema); assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + actualSchema, expectedMT, messageTypeFound); // Required to check the original types manually as PrimitiveType.equals does not care about it List<Type> expectedFields = expectedMT.getFields(); List<Type> actualFields = messageTypeFound.getFields(); for (int i = 0, n = expectedFields.size(); i < n; ++i) { OriginalType exp = expectedFields.get(i).getOriginalType(); OriginalType act = actualFields.get(i).getOriginalType(); assertEquals("Original types of the field do not match", exp, act); } } }
schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount(); final List<BlockMetaData> splitGroup = new ArrayList<BlockMetaData>(); final long splitStart = ((FileSplit) oldSplit).getStart(); oldSplit.getLocations(), filtedBlocks, readContext.getRequestedSchema().toString(), fileMetaData.getSchema().toString(), fileMetaData.getKeyValueMetaData(), readContext.getReadSupportMetadata());
private static long end(List<BlockMetaData> blocks, String requestedSchema) { MessageType requested = MessageTypeParser.parseMessageType(requestedSchema); long length = 0; for (BlockMetaData block : blocks) { List<ColumnChunkMetaData> columns = block.getColumns(); for (ColumnChunkMetaData column : columns) { if (requested.containsPath(column.getPath().toArray())) { length += column.getTotalSize(); } } } return length; }
MessageType schema = MessageTypeParser.parseMessageType(messageSchema); ColumnDescriptor c1 = schema.getColumnDescription(path1);
private static void addType(String t, Tokenizer st, Types.GroupBuilder builder) { Repetition repetition = asRepetition(t, st); // Read type. String type = st.nextToken(); if ("group".equalsIgnoreCase(type)) { addGroupType(st, repetition, builder); } else { addPrimitiveType(st, asPrimitive(type, st), repetition, builder); } }
private static MessageType parse(String schemaString) { Tokenizer st = new Tokenizer(schemaString, " ;{}()\n\t"); Types.MessageTypeBuilder builder = Types.buildMessage(); String t = st.nextToken(); check(t, "message", "start with 'message'", st); String name = st.nextToken(); addGroupTypeFields(st.nextToken(), st, builder); return builder.named(name); }
@Test public void testGetProjectedSchema2() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional double b;\n" + " }\n" + "}\n"); testConversion("structCol", "struct<a:int,b:double>", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.a", "structCol.b")).toString()); }
private static long end(List<BlockMetaData> blocks, String requestedSchema) { MessageType requested = MessageTypeParser.parseMessageType(requestedSchema); long length = 0; for (BlockMetaData block : blocks) { List<ColumnChunkMetaData> columns = block.getColumns(); for (ColumnChunkMetaData column : columns) { if (requested.containsPath(column.getPath().toArray())) { length += column.getTotalSize(); } } } return length; }
private static void addType(String t, Tokenizer st, Types.GroupBuilder builder) { Repetition repetition = asRepetition(t, st); // Read type. String type = st.nextToken(); if ("group".equalsIgnoreCase(type)) { addGroupType(st, repetition, builder); } else { addPrimitiveType(st, asPrimitive(type, st), repetition, builder); } }
private static MessageType parse(String schemaString) { Tokenizer st = new Tokenizer(schemaString, " ;{}()\n\t"); Types.MessageTypeBuilder builder = Types.buildMessage(); String t = st.nextToken(); check(t, "message", "start with 'message'", st); String name = st.nextToken(); addGroupTypeFields(st.nextToken(), st, builder); return builder.named(name); }
public DataWritableRecordConverter(final GroupType requestedSchema, final Map<String, String> metadata, TypeInfo hiveTypeInfo) { this.root = new HiveStructConverter(requestedSchema, MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)), metadata, hiveTypeInfo); }
@Test public void testGetProjectedSchema1() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional double b;\n" + " optional boolean c;\n" + " optional fixed_len_byte_array(3) d (DECIMAL(5,2));\n" + " }\n" + "}\n"); testConversion("structCol", "struct<a:int>", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.a")).toString()); }
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException { MessageType requested = MessageTypeParser.parseMessageType(requestedSchema); long length = 0; for (BlockMetaData block : this.getRowGroups()) { List<ColumnChunkMetaData> columns = block.getColumns(); for (ColumnChunkMetaData column : columns) { if (requested.containsPath(column.getPath().toArray())) { length += column.getTotalSize(); } } } BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1); long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize(); long[] rowGroupOffsets = new long[this.getRowGroupCount()]; for (int i = 0; i < rowGroupOffsets.length; i++) { rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos(); } return new ParquetInputSplit( fileStatus.getPath(), hdfsBlock.getOffset(), end, length, hdfsBlock.getHosts(), rowGroupOffsets ); } }
private static void addGroupType(Tokenizer st, Repetition r, GroupBuilder<?> builder) { GroupBuilder<?> childBuilder = builder.group(r); String t; String name = st.nextToken(); // Read annotation, if any. t = st.nextToken(); OriginalType originalType = null; if (t.equalsIgnoreCase("(")) { originalType = OriginalType.valueOf(st.nextToken()); childBuilder.as(originalType); check(st.nextToken(), ")", "original type ended by )", st); t = st.nextToken(); } if (t.equals("=")) { childBuilder.id(Integer.parseInt(st.nextToken())); t = st.nextToken(); } try { addGroupTypeFields(t, st, childBuilder); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("problem reading type: type = group, name = " + name + ", original type = " + originalType, e); } childBuilder.named(name); }
private void writeParquetRecord(String schema, ParquetHiveRecord record) throws SerDeException { MessageType fileSchema = MessageTypeParser.parseMessageType(schema); DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema); hiveParquetWriter.write(record); }
@Test public void testGetProjectedSchema4() throws Exception { MessageType originalMsg = MessageTypeParser.parseMessageType( "message hive_schema {\n" + " optional group structCol {\n" + " optional int32 a;\n" + " optional group subStructCol {\n" + " optional int64 b;\n" + " optional boolean c;\n" + " }\n" + " }\n" + " optional boolean d;\n" + "}\n"); testConversion("structCol", "struct<subStructCol:struct<b:bigint>>", DataWritableReadSupport .getProjectedSchema(originalMsg, Arrays.asList("structCol"), Arrays.asList(0), Sets.newHashSet("structCol.subStructCol.b")).toString()); }