private static com.amazonaws.services.glue.model.Column convertColumn(Column prestoColumn) { return new com.amazonaws.services.glue.model.Column() .withName(prestoColumn.getName()) .withType(prestoColumn.getType().toString()) .withComment(prestoColumn.getComment().orElse(null)); } }
private static ColumnStatisticsObj createBooleanStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { BooleanColumnStatsData data = new BooleanColumnStatsData(); statistics.getNullsCount().ifPresent(data::setNumNulls); statistics.getBooleanStatistics().ifPresent(booleanStatistics -> { booleanStatistics.getFalseCount().ifPresent(data::setNumFalses); booleanStatistics.getTrueCount().ifPresent(data::setNumTrues); }); return new ColumnStatisticsObj(columnName, columnType.toString(), booleanStats(data)); }
private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DecimalColumnStatsData data = new DecimalColumnStatsData(); statistics.getDecimalStatistics().ifPresent(decimalStatistics -> { decimalStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDecimal(value))); decimalStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDecimal(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data)); }
private static ColumnStatisticsObj createDateStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DateColumnStatsData data = new DateColumnStatsData(); statistics.getDateStatistics().ifPresent(dateStatistics -> { dateStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDate(value))); dateStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDate(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), dateStats(data)); }
private static ColumnStatisticsObj createDoubleStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DoubleColumnStatsData data = new DoubleColumnStatsData(); statistics.getDoubleStatistics().ifPresent(doubleStatistics -> { doubleStatistics.getMin().ifPresent(data::setLowValue); doubleStatistics.getMax().ifPresent(data::setHighValue); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), doubleStats(data)); }
private static ColumnStatisticsObj createLongStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { LongColumnStatsData data = new LongColumnStatsData(); statistics.getIntegerStatistics().ifPresent(integerStatistics -> { integerStatistics.getMin().ifPresent(data::setLowValue); integerStatistics.getMax().ifPresent(data::setHighValue); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), longStats(data)); }
private static ColumnStatisticsObj createBinaryStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) { BinaryColumnStatsData data = new BinaryColumnStatsData(); statistics.getNullsCount().ifPresent(data::setNumNulls); data.setMaxColLen(statistics.getMaxValueSizeInBytes().orElse(0)); data.setAvgColLen(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0)); return new ColumnStatisticsObj(columnName, columnType.toString(), binaryStats(data)); }
private static ColumnStatisticsObj createStringStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) { StringColumnStatsData data = new StringColumnStatsData(); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); data.setMaxColLen(statistics.getMaxValueSizeInBytes().orElse(0)); data.setAvgColLen(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0)); return new ColumnStatisticsObj(columnName, columnType.toString(), stringStats(data)); }