StructTypeInfo prune() { List<String> newNames = new ArrayList<>(); List<TypeInfo> newTypes = new ArrayList<>(); List<String> oldNames = typeInfo.getAllStructFieldNames(); List<TypeInfo> oldTypes = typeInfo.getAllStructFieldTypeInfos(); for (int i = 0; i < oldNames.size(); ++i) { String fn = oldNames.get(i); if (selected[i]) { newNames.add(fn); if (children.containsKey(fn.toLowerCase())) { newTypes.add(children.get(fn.toLowerCase()).prune()); } else { newTypes.add(oldTypes.get(i)); } } } return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(newNames, newTypes); } }
private static TypeInfo generateRecordTypeInfo(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException { assert schema.getType().equals(Schema.Type.RECORD); if (seenSchemas == null) { seenSchemas = Collections.newSetFromMap(new IdentityHashMap<Schema, Boolean>()); } else if (seenSchemas.contains(schema)) { throw new AvroSerdeException( "Recursive schemas are not supported. Recursive schema was " + schema .getFullName()); } seenSchemas.add(schema); List<Schema.Field> fields = schema.getFields(); List<String> fieldNames = new ArrayList<String>(fields.size()); List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(fields.size()); for(int i = 0; i < fields.size(); i++) { fieldNames.add(i, fields.get(i).name()); typeInfos.add(i, generateTypeInfo(fields.get(i).schema(), seenSchemas)); } return TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); }
public static TypeInfo convertStructType(RelDataType rType) { List<TypeInfo> fTypes = Lists.transform(rType.getFieldList(), new Function<RelDataTypeField, TypeInfo>() { @Override public TypeInfo apply(RelDataTypeField f) { return convert(f.getType()); } }); List<String> fNames = Lists.transform(rType.getFieldList(), new Function<RelDataTypeField, String>() { @Override public String apply(RelDataTypeField f) { return f.getName(); } }); return TypeInfoFactory.getStructTypeInfo(fNames, fTypes); }
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { LOG.debug("Initializing HCatRecordSerDe"); LOG.debug("props to serde: {}", tbl.entrySet()); // Get column names and types String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA); // all table column names if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); } // all column types if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } LOG.debug("columns: {} {}", columnNameProperty, columnNames); LOG.debug("types: {} {}", columnTypeProperty, columnTypes); assert (columnNames.size() == columnTypes.size()); rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); }
StructTypeInfo prune() { List<String> newNames = new ArrayList<>(); List<TypeInfo> newTypes = new ArrayList<>(); List<String> oldNames = typeInfo.getAllStructFieldNames(); List<TypeInfo> oldTypes = typeInfo.getAllStructFieldTypeInfos(); for (int i = 0; i < oldNames.size(); ++i) { String fn = oldNames.get(i); if (selected[i]) { newNames.add(fn); if (children.containsKey(fn.toLowerCase())) { newTypes.add(children.get(fn.toLowerCase()).prune()); } else { newTypes.add(oldTypes.get(i)); } } } return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(newNames, newTypes); } }
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { this.conf = conf; final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA); // Create an object inspector final List<String> columnNames; if (columnNameProperty.length() == 0) { columnNames = new ArrayList<>(); } else { columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); } final List<TypeInfo> columnTypes; if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); rowObjectInspector = (StructObjectInspector) getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo); final List<Field> fields = new ArrayList<>(); final int size = columnNames.size(); for (int i = 0; i < size; i++) { fields.add(toField(columnNames.get(i), columnTypes.get(i))); } }
public static TypeInfo getOrcSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException { List<RecordField> recordFields = recordSchema.getFields(); if (recordFields != null) { List<String> orcFieldNames = new ArrayList<>(recordFields.size()); List<TypeInfo> orcFields = new ArrayList<>(recordFields.size()); recordFields.forEach(recordField -> { String fieldName = hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName(); orcFieldNames.add(fieldName); orcFields.add(getOrcField(recordField.getDataType(), hiveFieldNames)); }); return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields); } return null; }
rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); rowObjectInspector = (StructObjectInspector) TypeInfoUtils
@BeforeClass public static void setup(){ List<String> ns = new ArrayList<>(); ns.add("a"); ns.add("b"); List<TypeInfo> tis = new ArrayList<>(); TypeInfo aType = TypeInfoFactory.booleanTypeInfo; TypeInfo bType = TypeInfoFactory.doubleTypeInfo; tis.add(aType); tis.add(bType); col1Type = TypeInfoFactory.getStructTypeInfo(ns, tis); col2Type = TypeInfoFactory.doubleTypeInfo; List<String> names = new ArrayList<>(); names.add("col1"); names.add("col2"); List<TypeInfo> typeInfos = new ArrayList<>(); typeInfos.add(col1Type); typeInfos.add(col2Type); col3Type = TypeInfoFactory.getStructTypeInfo(names, typeInfos); }
@Test(expected = EsHadoopIllegalArgumentException.class) public void testHiveFieldExtractorNested() throws Exception { List<String> nested = Arrays.asList(new String[] { "bar", "bor" }); List<TypeInfo> types = Arrays.asList(new TypeInfo[] { stringTypeInfo, intTypeInfo }); MyHiveType struct = new MyHiveType(Arrays.asList(new Object[] { new Text("found"), new IntWritable(2) }), getStructTypeInfo(nested, types)); List<String> topNames = Arrays.asList(new String[] { "foo", "far" }); List<TypeInfo> topTypes = Arrays.asList(new TypeInfo[] { getStructTypeInfo(nested, types), intTypeInfo }); MyHiveType topStruct = new MyHiveType(Arrays.asList(new Object[] { struct, new IntWritable(1) }), getStructTypeInfo(topNames, topTypes)); String colDesc = "bar,bor"; assertEquals(new Text("found"), extract("foo.bar", topStruct)); }
private TypeInfo getTypeInfo() throws Exception { List<String> names = new ArrayList<String>(4); names.add("an_int"); names.add("a_long"); names.add("a_double"); names.add("a_string"); List<TypeInfo> tis = new ArrayList<TypeInfo>(4); tis.add(TypeInfoFactory.getPrimitiveTypeInfo("int")); tis.add(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); tis.add(TypeInfoFactory.getPrimitiveTypeInfo("double")); tis.add(TypeInfoFactory.getPrimitiveTypeInfo("string")); return TypeInfoFactory.getStructTypeInfo(names, tis); }
/** * It creates the readContext for Parquet side with the requested schema during the init phase. * * @param context * @return the parquet ReadContext */ @Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) { Configuration configuration = context.getConfiguration(); MessageType fileSchema = context.getFileSchema(); String columnNames = configuration.get(IOConstants.COLUMNS); Map<String, String> contextMetadata = new HashMap<String, String>(); boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); if (columnNames != null) { List<String> columnNamesList = getColumnNames(columnNames); String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES); List<TypeInfo> columnTypesList = getColumnTypes(columnTypes); MessageType tableSchema = getRequestedSchemaForIndexAccess(indexAccess, columnNamesList, columnTypesList, fileSchema); contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, tableSchema.toString()); contextMetadata.put(PARQUET_COLUMN_INDEX_ACCESS, String.valueOf(indexAccess)); this.hiveTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList); return new ReadContext(getRequestedPrunedSchema(columnNamesList, tableSchema, configuration), contextMetadata); } else { contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, fileSchema.toString()); return new ReadContext(fileSchema, contextMetadata); } }
@Test public void testStruct() { List<String> names = Arrays.asList(new String[] { "one", "two" }); List<TypeInfo> types = Arrays.asList(new TypeInfo[] { stringTypeInfo, intTypeInfo }); assertEquals("{\"one\":\"first\",\"two\":2}", hiveTypeToJson(new MyHiveType(Arrays.asList(new Object[] { new Text("first"), new IntWritable(2) }), getStructTypeInfo(names, types)))); }
@Test public void testHiveFieldExtractorTopLevel() throws Exception { List<String> names = Arrays.asList(new String[] { "one", "two" }); List<TypeInfo> types = Arrays.asList(new TypeInfo[] { stringTypeInfo, intTypeInfo }); MyHiveType struct = new MyHiveType(Arrays.asList(new Object[] { new Text("first"), new IntWritable(2) }), getStructTypeInfo(names, types)); String colDesc = "one,two"; assertEquals("first", extract("one", struct)); } }
private static StructObjectInspector createStructObjectInspector(Configuration conf) { // Create row related objects String columnNames = conf.get(IOConstants.COLUMNS); List<String> columnNamesList = DataWritableReadSupport.getColumnNames(columnNames); String columnTypes = conf.get(IOConstants.COLUMNS_TYPES); List<TypeInfo> columnTypesList = DataWritableReadSupport.getColumnTypes(columnTypes); TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList); return new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); }
public static TypeInfo convertStructType(RelDataType rType) { List<TypeInfo> fTypes = Lists.transform(rType.getFieldList(), new Function<RelDataTypeField, TypeInfo>() { @Override public TypeInfo apply(RelDataTypeField f) { return convert(f.getType()); } }); List<String> fNames = Lists.transform(rType.getFieldList(), new Function<RelDataTypeField, String>() { @Override public String apply(RelDataTypeField f) { return f.getName(); } }); return TypeInfoFactory.getStructTypeInfo(fNames, fTypes); }
private StructObjectInspector initSerDe(AbstractSerDe serDe, String[][] schema) throws SerDeException { List<String> fieldNameList = newArrayList(); List<String> fieldTypeList = newArrayList(); List<TypeInfo> typeInfoList = newArrayList(); for (String[] nameAndType : schema) { String name = nameAndType[0]; String type = nameAndType[1]; fieldNameList.add(name); fieldTypeList.add(type); typeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(type)); } String fieldNames = Joiner.on(',').join(fieldNameList); String fieldTypes = Joiner.on(',').join(fieldTypeList); Properties schemaProperties = new Properties(); schemaProperties.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); schemaProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); SerDeUtils.initializeSerDe(serDe, conf, schemaProperties, null); return (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( TypeInfoFactory.getStructTypeInfo(fieldNameList, typeInfoList)); }
private ArrayWritableObjectInspector getObjectInspector(final String columnNames, final String columnTypes) { List<TypeInfo> columnTypeList = createHiveTypeInfoFrom(columnTypes); List<String> columnNameList = createHiveColumnsFrom(columnNames); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList); return new ArrayWritableObjectInspector(rowTypeInfo); }
private ArrayWritableObjectInspector getObjectInspector(final String columnNames, final String columnTypes) { List<TypeInfo> columnTypeList = createHiveTypeInfoFrom(columnTypes); List<String> columnNameList = createHiveColumnsFrom(columnNames); StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList); return new ArrayWritableObjectInspector(rowTypeInfo); }