public static List<Long> safeMult(List<Long> l, float b) { List<Long> ret = new ArrayList<>(); for (Long a : l) { ret.add(safeMult(a, b)); } return ret; }
private static void setUnknownRcDsToAverage( List<Long> rowCounts, List<Long> dataSizes, int avgRowSize) { if (LOG.isDebugEnabled()) { LOG.debug("Estimated average row size: " + avgRowSize); } for (int i = 0; i < rowCounts.size(); i++) { long rc = rowCounts.get(i); long s = dataSizes.get(i); if (rc <= 0 && s > 0) { rc = s / avgRowSize; rowCounts.set(i, rc); } if (s <= 0 && rc > 0) { s = safeMult(rc, avgRowSize); dataSizes.set(i, s); } } }
private long computeRowCountAssumingInnerJoin(List<Long> rowCountParents, long denom, CommonJoinOperator<? extends JoinDesc> join) { double factor = 0.0d; long result = 1; long max = rowCountParents.get(0); long maxIdx = 0; // To avoid long overflow, we will divide the max row count by denominator // and use that factor to multiply with other row counts for (int i = 1; i < rowCountParents.size(); i++) { if (rowCountParents.get(i) > max) { max = rowCountParents.get(i); maxIdx = i; } } denom = denom == 0 ? 1 : denom; factor = (double) max / (double) denom; for (int i = 0; i < rowCountParents.size(); i++) { if (i != maxIdx) { result = StatsUtils.safeMult(result, rowCountParents.get(i)); } } result = (long) (result * factor); return result; }
private long getDenominatorForUnmatchedRows(List<Long> distinctVals) { if (distinctVals.isEmpty()) { return 2; } // simple join from 2 relations: denom = min(v1, v2) if (distinctVals.size() <= 2) { return Collections.min(distinctVals); } else { // remember max value and ignore it from the denominator long maxNDV = distinctVals.get(0); int maxIdx = 0; for (int i = 1; i < distinctVals.size(); i++) { if (distinctVals.get(i) > maxNDV) { maxNDV = distinctVals.get(i); maxIdx = i; } } // join from multiple relations: // denom = Product of all NDVs except the greatest of all long denom = 1; for (int i = 0; i < distinctVals.size(); i++) { if (i != maxIdx) { denom = StatsUtils.safeMult(denom, distinctVals.get(i)); } } return denom; } }
for (int i = 0; i < distinctVals.size(); i++) { if (i != minIdx) { denom = StatsUtils.safeMult(denom, distinctVals.get(i));
for (int i = 0; i < distinctVals.size(); i++) { if (i != minIdx) { denom = StatsUtils.safeMult(denom, distinctVals.get(i));
public Statistics scaleToRowCount(long newRowCount, boolean downScaleOnly) { Statistics ret = clone(); if (numRows == 0) { return ret; } if (downScaleOnly && newRowCount >= numRows) { return ret; } // FIXME: using real scaling by new/old ration might yield better results? ret.numRows = newRowCount; ret.dataSize = StatsUtils.safeMult(getAvgRowSize(), newRowCount); return ret; }
private static long estimateNDV(long numRows, List<ColStatistics> columnStats) { // If there is a single column, return the number of distinct values if (columnStats.size() == 1) { return columnStats.get(0).getCountDistint(); } // The expected number of distinct values when choosing p values // with replacement from n integers is n . (1 - ((n - 1) / n) ^ p). // // If we have several uniformly distributed attributes A1 ... Am // with N1 ... Nm distinct values, they behave as one uniformly // distributed attribute with N1 * ... * Nm distinct values. long n = 1L; for (ColStatistics cs : columnStats) { final long ndv = cs.getCountDistint(); if (ndv > 1) { n = StatsUtils.safeMult(n, ndv); } } final double nn = n; final double a = (nn - 1d) / nn; if (a == 1d) { // A under-flows if nn is large. return numRows; } final double v = nn * (1d - Math.pow(a, numRows)); // Cap at fact-row-count, because numerical artifacts can cause it // to go a few % over. return Math.min(Math.round(v), numRows); }
private static List<Entry<Operator<?>, Long>> rankOpsByAccumulatedSize(Set<Operator<?>> opsSet) { Map<Operator<?>, Long> opToTotalSize = new HashMap<>(); for (Operator<?> op : opsSet) { long size = op.getStatistics() != null ? op.getStatistics().getDataSize() : 0L; opToTotalSize.put(op, StatsUtils.safeMult(op.getChildOperators().size(), size)); } List<Entry<Operator<?>, Long>> sortedOps = new ArrayList<>(opToTotalSize.entrySet()); Collections.sort(sortedOps, Collections.reverseOrder( new Comparator<Map.Entry<Operator<?>, Long>>() { @Override public int compare(Map.Entry<Operator<?>, Long> o1, Map.Entry<Operator<?>, Long> o2) { int valCmp = o1.getValue().compareTo(o2.getValue()); if (valCmp == 0) { return o1.getKey().toString().compareTo(o2.getKey().toString()); } return valCmp; } })); return sortedOps; }
final long ndv = cs.getCountDistint(); if (ndv > 1) { n = StatsUtils.safeMult(n, ndv);
return StatsUtils.safeMult(inputRowCount / distinctVal, distinctUnmatched);
overhead = JavaDataModel.get().object(); result = StatsUtils.safeAdd(StatsUtils.safeMult(nonNullCount, overhead), result);
result = StatsUtils.safeMult(result, rowCountParents.get(i));
sizeOf = cs.getAvgColLen(); result = safeAdd(result, safeMult(nonNullCount, sizeOf));
sizeOf = cs.getAvgColLen(); result = safeAdd(result, safeMult(nonNullCount, sizeOf));
int restColumnsDefaultSize = StatsUtils.estimateRowSizeFromSchema(conf, jop.getSchema().getSignature(), neededColumns); newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows));
newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows));
long dataSize = StatsUtils.safeMult(parentStats.getDataSize(), udtfFactor); st.setNumRows(numRows); st.setDataSize(dataSize);
@Override public void apply(BasicStats stats) { // FIXME: there were different logic for part/table; merge these logics later if (stats.partish.getPartition() == null) { if (stats.getNumRows() < 0 && avgRowSize > 0) { stats.setNumRows(stats.getDataSize() / avgRowSize); } } else { if (avgRowSize > 0) { long rc = stats.getNumRows(); long s = stats.getDataSize(); if (rc <= 0 && s > 0) { rc = s / avgRowSize; stats.setNumRows(rc); } if (s <= 0 && rc > 0) { s = StatsUtils.safeMult(rc, avgRowSize); stats.setDataSize(s); } } } if (stats.getNumRows() > 0) { // FIXME: this promotion process should be removed later if (State.PARTIAL.morePreciseThan(stats.state)) { stats.state = State.PARTIAL; } } } }
long numRows = limit; long avgRowSize = parentStats.getAvgRowSize(); long dataSize = StatsUtils.safeMult(avgRowSize, limit); wcStats.setNumRows(numRows); wcStats.setDataSize(dataSize);