public static int estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema, List<String> neededColumns) { int avgRowSize = 0; for (String neededCol : neededColumns) { ColumnInfo ci = getColumnInfoForColumn(neededCol, schema); if (ci == null) { // No need to collect statistics of index columns continue; } ObjectInspector oi = ci.getObjectInspector(); String colTypeLowerCase = ci.getTypeName().toLowerCase(); if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) { avgRowSize += getAvgColLenOf(conf, oi, colTypeLowerCase); } else { avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase); } } return avgRowSize; }
public static int estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema, List<String> neededColumns) { int avgRowSize = 0; for (String neededCol : neededColumns) { ColumnInfo ci = getColumnInfoForColumn(neededCol, schema); if (ci == null) { // No need to collect statistics of index columns continue; } ObjectInspector oi = ci.getObjectInspector(); String colTypeLowerCase = ci.getTypeName().toLowerCase(); if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) { avgRowSize += getAvgColLenOf(conf, oi, colTypeLowerCase); } else { avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase); } } return avgRowSize; }
private static ColStatistics estimateColStats(long numRows, String colName, HiveConf conf, List<ColumnInfo> schema) { ColumnInfo cinfo = getColumnInfoForColumn(colName, schema); ColStatistics cs = new ColStatistics(colName, cinfo.getTypeName()); cs.setIsEstimated(true);
public static int estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema, List<String> neededColumns) { int avgRowSize = 0; for (String neededCol : neededColumns) { ColumnInfo ci = getColumnInfoForColumn(neededCol, schema); if (ci == null) { // No need to collect statistics of index columns continue; } ObjectInspector oi = ci.getObjectInspector(); String colType = ci.getTypeName(); if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME) || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) || colType.startsWith(serdeConstants.CHAR_TYPE_NAME) || colType.startsWith(serdeConstants.LIST_TYPE_NAME) || colType.startsWith(serdeConstants.MAP_TYPE_NAME) || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME) || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) { avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colType); } else { avgRowSize += getAvgColLenOfFixedLengthTypes(colType); } } return avgRowSize; }