private static void mergeColumnStats(Map<String, ColumnStatisticsObj> oldStats, ColumnStatistics newStats) { List<ColumnStatisticsObj> newColList = newStats.getStatsObj(); if (newColList != null) { for (ColumnStatisticsObj colStat : newColList) { // This is admittedly a bit simple, StatsObjectConverter seems to allow // old stats attributes to be kept if the new values do not overwrite them. oldStats.put(colStat.getColName().toLowerCase(), colStat); } } }
public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) throws InvalidObjectException { List<ColumnStatisticsObj> list = new ArrayList<>(); if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) { csNew.getStatsObj().size(), csOld.getStatsObjSize()); for (ColumnStatisticsObj obj : csOld.getStatsObj()) { map.put(obj.getColName(), obj); for (int index = 0; index < csNew.getStatsObj().size(); index++) { ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName()); if (statsObjOld != null) {
public static void getMergableCols(ColumnStatistics csNew, Map<String, String> parameters) { List<ColumnStatisticsObj> list = new ArrayList<>(); for (int index = 0; index < csNew.getStatsObj().size(); index++) { ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); // canColumnStatsMerge guarantees that it is accurate before we do merge if (StatsSetupConst.canColumnStatsMerge(parameters, statsObjNew.getColName())) { list.add(statsObjNew); } // in all the other cases, we can not merge } csNew.setStatsObj(list); }
if (csOld != null && csOld.getStatsObjSize() != 0 && !firstColStats.getStatsObj().isEmpty()) { MetaStoreServerUtils.mergeColStats(firstColStats, csOld); if (!firstColStats.getStatsObj().isEmpty()) { result = updateTableColumnStatsInternal(firstColStats, request.getValidWriteIdList(), request.getWriteId());
private static void mergeColumnStats(Map<String, ColumnStatisticsObj> oldStats, ColumnStatistics newStats) { List<ColumnStatisticsObj> newColList = newStats.getStatsObj(); if (newColList != null) { for (ColumnStatisticsObj colStat : newColList) { // This is admittedly a bit simple, StatsObjectConverter seems to allow // old stats attributes to be kept if the new values do not overwrite them. oldStats.put(colStat.getColName().toLowerCase(), colStat); } } }
public Object getFieldValue(_Fields field) { switch (field) { case STATS_DESC: return getStatsDesc(); case STATS_OBJ: return getStatsObj(); case IS_STATS_COMPLIANT: return isIsStatsCompliant(); } throw new IllegalStateException(); }
private List<String> getExistingStatsToUpdate( ColumnStatistics existingStats, Map<String, String> params, boolean isTxnValid) { boolean hasAnyAccurate = isTxnValid && StatsSetupConst.areBasicStatsUptoDate(params); List<String> colsToUpdate = new ArrayList<>(); for (ColumnStatisticsObj obj : existingStats.getStatsObj()) { String col = obj.getColName(); if (!hasAnyAccurate || !StatsSetupConst.areColumnStatsUptoDate(params, col)) { colsToUpdate.add(col); } } return colsToUpdate; }
new HashMap<String, ColumnStatsAggregator>(); for (ColumnStatistics css : partStats) { List<ColumnStatisticsObj> objs = css.getStatsObj(); for (ColumnStatisticsObj obj : objs) { String partName = css.getStatsDesc().getPartName();
try { partVal = Warehouse.makeValsFromName(cs.getStatsDesc().getPartName(), null); List<ColumnStatisticsObj> colStatsObjs = cs.getStatsObj(); for (ColumnStatisticsObj colStatObj : colStatsObjs) { if (isPartitionColStatsCacheDirty.compareAndSet(true, false)) {
private void normalizeColStatsInput(ColumnStatistics colStats) throws MetaException { // TODO: is this really needed? this code is propagated from HIVE-1362 but most of it is useless. ColumnStatisticsDesc statsDesc = colStats.getStatsDesc(); statsDesc.setCatName(statsDesc.isSetCatName() ? statsDesc.getCatName().toLowerCase() : getDefaultCatalog(conf)); statsDesc.setDbName(statsDesc.getDbName().toLowerCase()); statsDesc.setTableName(statsDesc.getTableName().toLowerCase()); statsDesc.setPartName(lowerCaseConvertPartName(statsDesc.getPartName())); long time = System.currentTimeMillis() / 1000; statsDesc.setLastAnalyzed(time); for (ColumnStatisticsObj statsObj : colStats.getStatsObj()) { statsObj.setColName(statsObj.getColName().toLowerCase()); statsObj.setColType(statsObj.getColType().toLowerCase()); } colStats.setStatsDesc(statsDesc); colStats.setStatsObj(colStats.getStatsObj()); }
@Override public TableStatsResult get_table_statistics_req(TableStatsRequest request) throws TException { String catName = request.isSetCatName() ? request.getCatName().toLowerCase() : getDefaultCatalog(conf); String dbName = request.getDbName().toLowerCase(); String tblName = request.getTblName().toLowerCase(); startFunction("get_table_statistics_req", ": table=" + TableName.getQualified(catName, dbName, tblName)); TableStatsResult result = null; List<String> lowerCaseColNames = new ArrayList<>(request.getColNames().size()); for (String colName : request.getColNames()) { lowerCaseColNames.add(colName.toLowerCase()); } try { ColumnStatistics cs = getMS().getTableColumnStatistics( catName, dbName, tblName, lowerCaseColNames, request.getValidWriteIdList()); // Note: stats compliance is not propagated to the client; instead, we just return nothing // if stats are not compliant for now. This won't work for stats merging, but that // is currently only done on metastore size (see set_aggr...). // For some optimizations we might make use of incorrect stats that are "better than // nothing", so this may change in future. result = new TableStatsResult((cs == null || cs.getStatsObj() == null || (cs.isSetIsStatsCompliant() && !cs.isIsStatsCompliant())) ? Lists.newArrayList() : cs.getStatsObj()); } finally { endFunction("get_table_statistics_req", result == null, null, tblName); } return result; }
static private ColumnStatistics updateStatsForPart(RawStore rawStore, Table before, String catalogName, String dbName, String tableName, Partition part) throws Exception { ColumnStatistics colStats; List<String> deletedCols = new ArrayList<>(); colStats = HiveAlterHandler.updateOrGetPartitionColumnStats(rawStore, catalogName, dbName, tableName, part.getValues(), part.getSd().getCols(), before, part, null, deletedCols); for (String column : deletedCols) { sharedCache.removePartitionColStatsFromCache(catalogName, dbName, tableName, part.getValues(), column); } if (colStats != null) { sharedCache.updatePartitionColStatsInCache(catalogName, dbName, tableName, part.getValues(), colStats.getStatsObj()); } return colStats; }
private boolean updateTempTableColumnStats(String dbName, String tableName, ColumnStatistics colStats) throws MetaException { SessionState ss = SessionState.get(); if (ss == null) { throw new MetaException("No current SessionState, cannot update temporary table stats for " + StatsUtils.getFullyQualifiedTableName(dbName, tableName)); } Map<String, ColumnStatisticsObj> ssTableColStats = getTempTableColumnStatsForTable(dbName, tableName); if (ssTableColStats == null) { // Add new entry for this table ssTableColStats = new HashMap<String, ColumnStatisticsObj>(); ss.getTempTableColStats().put( StatsUtils.getFullyQualifiedTableName(dbName, tableName), ssTableColStats); } mergeColumnStats(ssTableColStats, colStats); List<String> colNames = new ArrayList<>(); for (ColumnStatisticsObj obj : colStats.getStatsObj()) { colNames.add(obj.getColName()); } org.apache.hadoop.hive.metastore.api.Table table = getTempTable(dbName, tableName); StatsSetupConst.setColumnStatsState(table.getParameters(), colNames); return true; }
map.put(stat.getStatsDesc().getPartName(), stat.getStatsObj());
Collections.singletonList(partName), true); Partition partition = partitions.get(0); compareStatsForOneTableOrPartition(partition.getColStats().getStatsObj(), i, colMap);
private void compareStatsForTable(String catName, String dbName, String tableName, Map<String, Column> colMap) throws TException { List<ColumnStatisticsObj> objs = catName.equals(NO_CAT) ? client.getTableColumnStatistics(dbName, tableName, new ArrayList<>(colMap.keySet())) : client.getTableColumnStatistics(catName, dbName, tableName, new ArrayList<>(colMap.keySet())); compareStatsForOneTableOrPartition(objs, 0, colMap); // Test the statistics obtained through getTable call. Table table = catName.equals(NO_CAT) ? client.getTable(dbName, tableName, true) : client.getTable(catName, dbName, tableName, null, true); Assert.assertTrue(table.isSetColStats()); compareStatsForOneTableOrPartition(table.getColStats().getStatsObj(), 0, colMap); // Test that getTable call doesn't get the statistics when not explicitly requested. table = catName.equals(NO_CAT) ? client.getTable(dbName, tableName, false) : client.getTable(catName, dbName, tableName, null, false); Assert.assertFalse(table.isSetColStats()); }
private void updateTableColStats(RawStore rawStore, String catName, String dbName, String tblName) { boolean committed = false; rawStore.openTransaction(); try { Table table = rawStore.getTable(catName, dbName, tblName); if (!table.isSetPartitionKeys()) { List<String> colNames = MetaStoreUtils.getColumnNamesForTable(table); Deadline.startTimer("getTableColumnStatistics"); ColumnStatistics tableColStats = rawStore.getTableColumnStatistics(catName, dbName, tblName, colNames); Deadline.stopTimer(); if (tableColStats != null) { sharedCache.refreshTableColStatsInCache(StringUtils.normalizeIdentifier(catName), StringUtils.normalizeIdentifier(dbName), StringUtils.normalizeIdentifier(tblName), tableColStats.getStatsObj()); // Update the table to get consistent stats state. sharedCache.alterTableInCache(catName, dbName, tblName, table); } } committed = rawStore.commitTransaction(); } catch (MetaException | NoSuchObjectException e) { LOG.info("Unable to refresh table column stats for table: " + tblName, e); } finally { if (!committed) { sharedCache.removeAllTableColStatsFromCache(catName, dbName, tblName); rawStore.rollbackTransaction(); } } }
@Override public Map<String, String> updatePartitionColumnStatistics(ColumnStatistics colStats, List<String> partVals, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { Map<String, String> newParams = rawStore.updatePartitionColumnStatistics( colStats, partVals, validWriteIds, writeId); // in case of event based cache update, cache is updated during commit txn if (newParams != null && !canUseEvents) { String catName = colStats.getStatsDesc().isSetCatName() ? normalizeIdentifier(colStats.getStatsDesc().getCatName()) : DEFAULT_CATALOG_NAME; String dbName = normalizeIdentifier(colStats.getStatsDesc().getDbName()); String tblName = normalizeIdentifier(colStats.getStatsDesc().getTableName()); if (!shouldCacheTable(catName, dbName, tblName)) { return newParams; } Partition part = getPartition(catName, dbName, tblName, partVals); part.setParameters(newParams); sharedCache.alterPartitionInCache(catName, dbName, tblName, partVals, part); sharedCache.updatePartitionColStatsInCache(catName, dbName, tblName, partVals, colStats.getStatsObj()); } return newParams; }
table.setParameters(newParams); sharedCache.alterTableInCache(catName, dbName, tblName, table); sharedCache.updateTableColStatsInCache(catName, dbName, tblName, colStats.getStatsObj());
colStatsDesc.setTableName(tblDesc.getTableName()); colStatsDesc.setDbName(tblDesc.getDatabaseName()); partDesc.setColStats(new ColumnStatistics(colStatsDesc, colStats.getStatsObj()));