private GTScanRange mergeKeyRange(List<GTScanRange> ranges) { GTScanRange first = ranges.get(0); if (ranges.size() == 1) return first; GTRecord start = first.pkStart; GTRecord end = first.pkEnd; Set<GTRecord> newFuzzyKeys = Sets.newLinkedHashSet(); boolean hasNonFuzzyRange = false; for (GTScanRange range : ranges) { hasNonFuzzyRange = hasNonFuzzyRange || range.fuzzyKeys.isEmpty(); newFuzzyKeys.addAll(range.fuzzyKeys); end = rangeEndComparator.max(end, range.pkEnd); } // if any range is non-fuzzy, then all fuzzy keys must be cleared // too many fuzzy keys will slow down HBase scan if (hasNonFuzzyRange || newFuzzyKeys.size() > maxFuzzyKeys) { if (newFuzzyKeys.size() > maxFuzzyKeys) { logger.debug("too many FuzzyKeys, clean it!"); } newFuzzyKeys.clear(); } return new GTScanRange(start, end, Lists.newArrayList(newFuzzyKeys)); }
private List<GTScanRange> splitFuzzyKeys(List<GTScanRange> mergedRanges) { List<GTScanRange> result = Lists.newArrayList(); for (GTScanRange range : mergedRanges) { // if the fuzzy key is huge but still within in split range, then we split fuzzy keys to multiple ones. if (range.fuzzyKeys.size() > maxFuzzyKeysPerSplit && range.fuzzyKeys.size() <= maxFuzzyKeys) { List<GTRecord> fuzzyKeys = range.fuzzyKeys; Collections.sort(fuzzyKeys); int nSplit = (fuzzyKeys.size() - 1) / maxFuzzyKeysPerSplit + 1; int nFuzzyKeysPerSplit = fuzzyKeys.size() / nSplit; int startIndex = 0; for (int i = 1; i <= nSplit; i++) { int endIndex = i == nSplit ? fuzzyKeys.size() : i * nFuzzyKeysPerSplit; List<GTRecord> subFuzzyKeys = fuzzyKeys.subList(startIndex, endIndex); result.add(new GTScanRange(range.pkStart, range.pkEnd, subFuzzyKeys)); startIndex = endIndex; } logger.debug(String.format(Locale.ROOT, "large FuzzyKeys split size : %d", result.size())); } else { result.add(range); } } return result; }
public GTScanRange replaceGTInfo(final GTInfo gtInfo) { List<GTRecord> newFuzzyKeys = Lists.newArrayList(); for (GTRecord input : fuzzyKeys) { newFuzzyKeys.add(new GTRecord(gtInfo, input.cols)); } return new GTScanRange(new GTRecord(gtInfo, pkStart.cols), // new GTRecord(gtInfo, pkEnd.cols), // newFuzzyKeys); }
protected GTScanRange newScanRange(Collection<ColumnRange> andDimRanges) { GTRecord pkStart = new GTRecord(gtInfo); GTRecord pkEnd = new GTRecord(gtInfo); Map<Integer, Set<ByteArray>> fuzzyValues = Maps.newHashMap(); for (ColumnRange range : andDimRanges) { int col = range.column.getColumnDesc().getZeroBasedIndex(); if (!gtInfo.getPrimaryKey().get(col)) continue; pkStart.set(col, range.begin); pkEnd.set(col, range.end); if (range.valueSet != null && !range.valueSet.isEmpty()) { fuzzyValues.put(col, range.valueSet); } } List<GTRecord> fuzzyKeys = buildFuzzyKeys(fuzzyValues); return new GTScanRange(pkStart, pkEnd, fuzzyKeys); }
this.info = info; if (ranges == null) { this.ranges = Lists.newArrayList(new GTScanRange(new GTRecord(info), new GTRecord(info))); } else { this.ranges = ranges;
sFuzzyKeys.add(deserializeGTRecord(in, sInfo)); GTScanRange sRange = new GTScanRange(sPkStart, sPkEnd, sFuzzyKeys); sRanges.add(sRange);
private List<GTScanRange> splitFuzzyKeys(List<GTScanRange> mergedRanges) { List<GTScanRange> result = Lists.newArrayList(); for (GTScanRange range : mergedRanges) { // if the fuzzy key is huge but still within in split range, then we split fuzzy keys to multiple ones. if (range.fuzzyKeys.size() > maxFuzzyKeysPerSplit && range.fuzzyKeys.size() <= maxFuzzyKeys) { List<GTRecord> fuzzyKeys = range.fuzzyKeys; Collections.sort(fuzzyKeys); int nSplit = (fuzzyKeys.size() - 1) / maxFuzzyKeysPerSplit + 1; int nFuzzyKeysPerSplit = fuzzyKeys.size() / nSplit; int startIndex = 0; for (int i = 1; i <= nSplit; i++) { int endIndex = i == nSplit ? fuzzyKeys.size() : i * nFuzzyKeysPerSplit; List<GTRecord> subFuzzyKeys = fuzzyKeys.subList(startIndex, endIndex); result.add(new GTScanRange(range.pkStart, range.pkEnd, subFuzzyKeys)); startIndex = endIndex; } logger.debug(String.format(Locale.ROOT, "large FuzzyKeys split size : %d", result.size())); } else { result.add(range); } } return result; }
private GTScanRange mergeKeyRange(List<GTScanRange> ranges) { GTScanRange first = ranges.get(0); if (ranges.size() == 1) return first; GTRecord start = first.pkStart; GTRecord end = first.pkEnd; Set<GTRecord> newFuzzyKeys = Sets.newLinkedHashSet(); boolean hasNonFuzzyRange = false; for (GTScanRange range : ranges) { hasNonFuzzyRange = hasNonFuzzyRange || range.fuzzyKeys.isEmpty(); newFuzzyKeys.addAll(range.fuzzyKeys); end = rangeEndComparator.max(end, range.pkEnd); } // if any range is non-fuzzy, then all fuzzy keys must be cleared // too many fuzzy keys will slow down HBase scan if (hasNonFuzzyRange || newFuzzyKeys.size() > maxFuzzyKeys) { if (newFuzzyKeys.size() > maxFuzzyKeys) { logger.debug("too many FuzzyKeys, clean it!"); } newFuzzyKeys.clear(); } return new GTScanRange(start, end, Lists.newArrayList(newFuzzyKeys)); }
public GTScanRange replaceGTInfo(final GTInfo gtInfo) { List<GTRecord> newFuzzyKeys = Lists.newArrayList(); for (GTRecord input : fuzzyKeys) { newFuzzyKeys.add(new GTRecord(gtInfo, input.cols)); } return new GTScanRange(new GTRecord(gtInfo, pkStart.cols), // new GTRecord(gtInfo, pkEnd.cols), // newFuzzyKeys); }
protected GTScanRange newScanRange(Collection<ColumnRange> andDimRanges) { GTRecord pkStart = new GTRecord(gtInfo); GTRecord pkEnd = new GTRecord(gtInfo); Map<Integer, Set<ByteArray>> fuzzyValues = Maps.newHashMap(); for (ColumnRange range : andDimRanges) { int col = range.column.getColumnDesc().getZeroBasedIndex(); if (!gtInfo.getPrimaryKey().get(col)) continue; pkStart.set(col, range.begin); pkEnd.set(col, range.end); if (range.valueSet != null && !range.valueSet.isEmpty()) { fuzzyValues.put(col, range.valueSet); } } List<GTRecord> fuzzyKeys = buildFuzzyKeys(fuzzyValues); return new GTScanRange(pkStart, pkEnd, fuzzyKeys); }
this.info = info; if (ranges == null) { this.ranges = Lists.newArrayList(new GTScanRange(new GTRecord(info), new GTRecord(info))); } else { this.ranges = ranges;
sFuzzyKeys.add(deserializeGTRecord(in, sInfo)); GTScanRange sRange = new GTScanRange(sPkStart, sPkEnd, sFuzzyKeys); sRanges.add(sRange);