private void dropExtraColumnStatisticsAfterAlterPartition( String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) { List<String> dataColumns = partitionWithStatistics.getPartition().getColumns().stream() .map(Column::getName) .collect(toImmutableList()); Set<String> columnsWithMissingStatistics = new HashSet<>(dataColumns); columnsWithMissingStatistics.removeAll(partitionWithStatistics.getStatistics().getColumnStatistics().keySet()); // In case new partition had the statistics computed for all the columns, the storePartitionColumnStatistics // call in the alterPartition will just overwrite the old statistics. There is no need to explicitly remove anything. if (columnsWithMissingStatistics.isEmpty()) { return; } // check if statistics for the columnsWithMissingStatistics are actually stored in the metastore // when trying to remove any missing statistics the metastore throws NoSuchObjectException String partitionName = partitionWithStatistics.getPartitionName(); List<ColumnStatisticsObj> statisticsToBeRemoved = getMetastorePartitionColumnStatistics( databaseName, tableName, ImmutableSet.of(partitionName), ImmutableList.copyOf(columnsWithMissingStatistics)) .getOrDefault(partitionName, ImmutableList.of()); for (ColumnStatisticsObj statistics : statisticsToBeRemoved) { deletePartitionColumnStatistics(databaseName, tableName, partitionName, statistics.getColName()); } }
private void storePartitionColumnStatistics(String databaseName, String tableName, String partitionName, PartitionWithStatistics partitionWithStatistics) { PartitionStatistics statistics = partitionWithStatistics.getStatistics(); Map<String, HiveColumnStatistics> columnStatistics = statistics.getColumnStatistics(); if (columnStatistics.isEmpty()) { return; } Map<String, HiveType> columnTypes = partitionWithStatistics.getPartition().getColumns().stream() .collect(toImmutableMap(Column::getName, Column::getType)); setPartitionColumnStatistics(databaseName, tableName, partitionName, columnTypes, columnStatistics, statistics.getBasicStatistics().getRowCount()); }
public void addPartition(PartitionWithStatistics partition) { checkArgument(getPrestoQueryId(partition.getPartition()).isPresent()); partitions.add(partition); }
new PartitionWithStatistics(partition, partitionName, partitionAndMore.getStatisticsUpdate()), new PartitionWithStatistics(oldPartition.get(), partitionName, oldPartitionStatistics)));
@Override public void addPartitions(String databaseName, String tableName, List<PartitionWithStatistics> partitionsWithStatistics) { List<Partition> partitions = partitionsWithStatistics.stream() .map(ThriftMetastoreUtil::toMetastoreApiPartition) .collect(toImmutableList()); addPartitionsWithoutStatistics(databaseName, tableName, partitions); for (PartitionWithStatistics partitionWithStatistics : partitionsWithStatistics) { storePartitionColumnStatistics(databaseName, tableName, partitionWithStatistics.getPartitionName(), partitionWithStatistics); } }
public static org.apache.hadoop.hive.metastore.api.Partition toMetastoreApiPartition(PartitionWithStatistics partitionWithStatistics) { org.apache.hadoop.hive.metastore.api.Partition partition = toMetastoreApiPartition(partitionWithStatistics.getPartition()); partition.setParameters(updateStatisticsParameters(partition.getParameters(), partitionWithStatistics.getStatistics().getBasicStatistics())); return partition; }
public String getDescription() { return format( "alter partition %s.%s %s", newPartition.getPartition().getDatabaseName(), newPartition.getPartition().getTableName(), newPartition.getPartition().getValues()); }
private void prepareAddPartition(HdfsContext context, PartitionAndMore partitionAndMore) { deleteOnly = false; Partition partition = partitionAndMore.getPartition(); String targetLocation = partition.getStorage().getLocation(); Path currentPath = partitionAndMore.getCurrentLocation(); Path targetPath = new Path(targetLocation); SchemaTableName schemaTableName = new SchemaTableName(partition.getDatabaseName(), partition.getTableName()); PartitionAdder partitionAdder = partitionAdders.computeIfAbsent( schemaTableName, ignored -> new PartitionAdder(partition.getDatabaseName(), partition.getTableName(), delegate, PARTITION_COMMIT_BATCH_SIZE)); if (pathExists(context, hdfsEnvironment, currentPath)) { if (!targetPath.equals(currentPath)) { renameDirectory( context, hdfsEnvironment, currentPath, targetPath, () -> cleanUpTasksForAbort.add(new DirectoryCleanUpTask(context, targetPath, true))); } } else { cleanUpTasksForAbort.add(new DirectoryCleanUpTask(context, targetPath, true)); createDirectory(context, hdfsEnvironment, targetPath); } String partitionName = getPartitionName(partition.getDatabaseName(), partition.getTableName(), partition.getValues()); partitionAdder.addPartition(new PartitionWithStatistics(partition, partitionName, partitionAndMore.getStatisticsUpdate())); }
@Override public void alterPartition(String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) { alterPartitionWithoutStatistics(databaseName, tableName, toMetastoreApiPartition(partitionWithStatistics)); storePartitionColumnStatistics(databaseName, tableName, partitionWithStatistics.getPartitionName(), partitionWithStatistics); dropExtraColumnStatisticsAfterAlterPartition(databaseName, tableName, partitionWithStatistics); }
@Override public synchronized void alterPartition(String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) { Partition partition = toMetastoreApiPartition(partitionWithStatistics.getPartition()); if (partition.getParameters() == null) { partition.setParameters(ImmutableMap.of()); } PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionWithStatistics.getPartitionName()); partitions.put(partitionKey, partition); partitionColumnStatistics.put(partitionKey, partitionWithStatistics.getStatistics()); }
public static PartitionInput convertPartition(PartitionWithStatistics partitionWithStatistics) { PartitionInput input = convertPartition(partitionWithStatistics.getPartition()); PartitionStatistics statistics = partitionWithStatistics.getStatistics(); if (!statistics.getColumnStatistics().isEmpty()) { throw new PrestoException(NOT_SUPPORTED, "Glue metastore does not support column level statistics"); } input.setParameters(updateStatisticsParameters(input.getParameters(), statistics.getBasicStatistics())); return input; }
public AlterPartitionOperation(PartitionWithStatistics newPartition, PartitionWithStatistics oldPartition) { this.newPartition = requireNonNull(newPartition, "newPartition is null"); this.oldPartition = requireNonNull(oldPartition, "oldPartition is null"); checkArgument(newPartition.getPartition().getDatabaseName().equals(oldPartition.getPartition().getDatabaseName())); checkArgument(newPartition.getPartition().getTableName().equals(oldPartition.getPartition().getTableName())); checkArgument(newPartition.getPartition().getValues().equals(oldPartition.getPartition().getValues())); }
protected void createDummyPartitionedTable(SchemaTableName tableName, List<ColumnMetadata> columns) throws Exception { doCreateEmptyTable(tableName, ORC, columns); ExtendedHiveMetastore metastoreClient = getMetastoreClient(); Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(tableName)); List<String> firstPartitionValues = ImmutableList.of("2016-01-01"); List<String> secondPartitionValues = ImmutableList.of("2016-01-02"); String firstPartitionName = makePartName(ImmutableList.of("ds"), firstPartitionValues); String secondPartitionName = makePartName(ImmutableList.of("ds"), secondPartitionValues); List<PartitionWithStatistics> partitions = ImmutableList.of(firstPartitionName, secondPartitionName) .stream() .map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())) .collect(toImmutableList()); metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS); metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS); }
@Override public synchronized void addPartitions(String databaseName, String tableName, List<PartitionWithStatistics> partitionsWithStatistics) { for (PartitionWithStatistics partitionWithStatistics : partitionsWithStatistics) { Partition partition = toMetastoreApiPartition(partitionWithStatistics.getPartition()); if (partition.getParameters() == null) { partition.setParameters(ImmutableMap.of()); } PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionWithStatistics.getPartitionName()); partitions.put(partitionKey, partition); partitionColumnStatistics.put(partitionKey, partitionWithStatistics.getStatistics()); } }
public PartitionMetadata(Table table, PartitionWithStatistics partitionWithStatistics) { Partition partition = partitionWithStatistics.getPartition(); PartitionStatistics statistics = partitionWithStatistics.getStatistics(); this.columns = partition.getColumns(); this.parameters = updateStatisticsParameters(partition.getParameters(), statistics.getBasicStatistics()); StorageFormat tableFormat = partition.getStorage().getStorageFormat(); storageFormat = Arrays.stream(HiveStorageFormat.values()) .filter(format -> tableFormat.equals(StorageFormat.fromHiveStorageFormat(format))) .findFirst(); if (table.getTableType().equals(TableType.EXTERNAL_TABLE.name())) { externalLocation = Optional.of(partition.getStorage().getLocation()); } else { externalLocation = Optional.empty(); } bucketProperty = partition.getStorage().getBucketProperty(); serdeParameters = partition.getStorage().getSerdeParameters(); columnStatistics = ImmutableMap.copyOf(statistics.getColumnStatistics()); }
public void run(ExtendedHiveMetastore metastore) { undo = true; metastore.alterPartition(newPartition.getPartition().getDatabaseName(), newPartition.getPartition().getTableName(), newPartition); }
@Override public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List<PartitionUpdate> partitionUpdates) { // This method bypasses transaction interface because this method is inherently hacky and doesn't work well with the transaction abstraction. // Additionally, this method is not part of a test. Its purpose is to set up an environment for another test. ExtendedHiveMetastore metastoreClient = getMetastoreClient(); Optional<Partition> partition = metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), copyPartitionFrom); conflictPartition = Partition.builder(partition.get()) .setValues(toPartitionValues(partitionNameToConflict)) .build(); metastoreClient.addPartitions( tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(conflictPartition, partitionNameToConflict, PartitionStatistics.empty()))); }
@Override public synchronized void alterPartition(String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) { Partition partition = toMetastoreApiPartition(partitionWithStatistics.getPartition()); if (partition.getParameters() == null) { partition.setParameters(ImmutableMap.of()); } PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionWithStatistics.getPartitionName()); partitions.put(partitionKey, partition); partitionColumnStatistics.put(partitionKey, partitionWithStatistics.getStatistics()); }
public void undo(ExtendedHiveMetastore metastore) { if (!undo) { return; } metastore.alterPartition(oldPartition.getPartition().getDatabaseName(), oldPartition.getPartition().getTableName(), oldPartition); } }
metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1))); assertEquals( metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), .setLocation(partitionTargetPath(tableName, partitionName))) .build(); metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2)); assertEquals( metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), .setLocation(partitionTargetPath(tableName, partitionName))) .build(); metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns)); assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) .isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns)); .setLocation(partitionTargetPath(tableName, partitionName))) .build(); metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics)); assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) .isEqualTo(ImmutableMap.of(partitionName, emptyStatistics));