public static int estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema) { List<String> neededColumns = new ArrayList<>(); for (ColumnInfo ci : schema) { neededColumns.add(ci.getInternalName()); } return estimateRowSizeFromSchema(conf, schema, neededColumns); }
private static long getNumRows(HiveConf conf, List<ColumnInfo> schema, List<String> neededColumns, Table table, long ds) { long nr = getNumRows(table); // number of rows -1 means that statistics from metastore is not reliable // and 0 means statistics gathering is disabled if (nr <= 0) { int avgRowSize = estimateRowSizeFromSchema(conf, schema, neededColumns); if (avgRowSize > 0) { if (LOG.isDebugEnabled()) { LOG.debug("Estimated average row size: " + avgRowSize); } nr = ds / avgRowSize; } } return nr == 0 ? 1 : nr; }
StatsUtils.estimateRowSizeFromSchema(conf, jop.getSchema().getSignature(), neededColumns); newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows));
basicStatsFactory.addEnhancer(new BasicStats.RowNumEstimator(estimateRowSizeFromSchema(conf, schema)));
int restColumnsDefaultSize = StatsUtils.estimateRowSizeFromSchema(conf, jop.getSchema().getSignature(), neededColumns); newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows));
basicStatsFactory.addEnhancer(new BasicStats.RowNumEstimator(estimateRowSizeFromSchema(conf, schema))); basicStatsFactory.addEnhancer(new BasicStats.SetMinRowNumber01()); basicStatsFactory.addEnhancer(new BasicStats.RowNumEstimator(estimateRowSizeFromSchema(conf, schema)));
ds = (long) (ds * deserFactor); int avgRowSize = estimateRowSizeFromSchema(conf, schema, neededColumns); if (avgRowSize > 0) { setUnknownRcDsToAverage(rowCounts, dataSizes, avgRowSize);
int avgRowSize = estimateRowSizeFromSchema(conf, schema, neededColumns); if (avgRowSize > 0) { if (LOG.isDebugEnabled()) { ds = (long) (ds * deserFactor); int avgRowSize = estimateRowSizeFromSchema(conf, schema, neededColumns); if (avgRowSize > 0) { setUnknownRcDsToAverage(rowCounts, dataSizes, avgRowSize);