private Set<Integer> selectRowGroups(StripeInformation stripe, Map<StreamId, List<RowGroupIndex>> columnIndexes) { int rowsInStripe = toIntExact(stripe.getNumberOfRows()); int groupsInStripe = ceil(rowsInStripe, rowsInRowGroup); ImmutableSet.Builder<Integer> selectedRowGroups = ImmutableSet.builder(); int remainingRows = rowsInStripe; for (int rowGroup = 0; rowGroup < groupsInStripe; ++rowGroup) { int rows = Math.min(remainingRows, rowsInRowGroup); Map<Integer, ColumnStatistics> statistics = getRowGroupStatistics(types.get(0), columnIndexes, rowGroup); if (predicate.matches(rows, statistics)) { selectedRowGroups.add(rowGroup); } remainingRows -= rows; } return selectedRowGroups.build(); }
private static boolean isStripeIncluded( OrcType rootStructType, StripeInformation stripe, Optional<StripeStatistics> stripeStats, OrcPredicate predicate) { // if there are no stats, include the column if (!stripeStats.isPresent()) { return true; } return predicate.matches(stripe.getNumberOfRows(), getStatisticsByColumnOrdinal(rootStructType, stripeStats.get().getColumnStatistics())); }
currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows(); validateWriteStripe(stripeInformation.getNumberOfRows());
stripes.add(stripe); stripeFilePositions.add(fileRowCount); totalRowCount += stripe.getNumberOfRows(); fileRowCount += stripe.getNumberOfRows();
.mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()) .sum();
closedStripes.add(closedStripe); closedStripesRetainedBytes += closedStripe.getRetainedSizeInBytes(); recordValidation(validation -> validation.addStripe(stripeInformation.getNumberOfRows())); stats.recordStripeWritten(flushReason, stripeInformation.getTotalLength(), stripeInformation.getNumberOfRows(), dictionaryCompressionOptimizer.getDictionaryMemoryBytes());
if ((stripe.getNumberOfRows() > rowsInRowGroup) || hasRowGroupDictionary) { stripe.getNumberOfRows(), streams, valueStreams, columnEncodings); return new Stripe(stripe.getNumberOfRows(), columnEncodings, rowGroups, dictionaryStreamSources); builder.put(entry.getKey(), new ValueInputStreamSource<>(entry.getValue())); RowGroup rowGroup = new RowGroup(0, 0, stripe.getNumberOfRows(), minAverageRowBytes, new InputStreamSources(builder.build())); return new Stripe(stripe.getNumberOfRows(), columnEncodings, ImmutableList.of(rowGroup), dictionaryStreamSources);
private static OrcProto.StripeInformation toStripeInformation(StripeInformation stripe) { return OrcProto.StripeInformation.newBuilder() .setNumberOfRows(stripe.getNumberOfRows()) .setOffset(stripe.getOffset()) .setIndexLength(stripe.getIndexLength()) .setDataLength(stripe.getDataLength()) .setFooterLength(stripe.getFooterLength()) .build(); }
private static DwrfProto.StripeInformation toStripeInformation(StripeInformation stripe) { return DwrfProto.StripeInformation.newBuilder() .setNumberOfRows(stripe.getNumberOfRows()) .setOffset(stripe.getOffset()) .setIndexLength(stripe.getIndexLength()) .setDataLength(stripe.getDataLength()) .setFooterLength(stripe.getFooterLength()) .build(); }
private Set<Integer> selectRowGroups(StripeInformation stripe, Map<StreamId, List<RowGroupIndex>> columnIndexes) { int rowsInStripe = toIntExact(stripe.getNumberOfRows()); int groupsInStripe = ceil(rowsInStripe, rowsInRowGroup); ImmutableSet.Builder<Integer> selectedRowGroups = ImmutableSet.builder(); int remainingRows = rowsInStripe; for (int rowGroup = 0; rowGroup < groupsInStripe; ++rowGroup) { int rows = Math.min(remainingRows, rowsInRowGroup); Map<Integer, ColumnStatistics> statistics = getRowGroupStatistics(types.get(0), columnIndexes, rowGroup); if (predicate.matches(rows, statistics)) { selectedRowGroups.add(rowGroup); } remainingRows -= rows; } return selectedRowGroups.build(); }
private static boolean isStripeIncluded( OrcType rootStructType, StripeInformation stripe, Optional<StripeStatistics> stripeStats, OrcPredicate predicate) { // if there are no stats, include the column if (!stripeStats.isPresent()) { return true; } return predicate.matches(stripe.getNumberOfRows(), getStatisticsByColumnOrdinal(rootStructType, stripeStats.get().getColumnStatistics())); }
private Set<Integer> selectRowGroups(StripeInformation stripe, Map<Integer, List<RowGroupIndex>> columnIndexes) throws IOException { int rowsInStripe = Ints.checkedCast(stripe.getNumberOfRows()); int groupsInStripe = ceil(rowsInStripe, rowsInRowGroup); ImmutableSet.Builder<Integer> selectedRowGroups = ImmutableSet.builder(); int remainingRows = rowsInStripe; for (int rowGroup = 0; rowGroup < groupsInStripe; ++rowGroup) { int rows = Math.min(remainingRows, rowsInRowGroup); Map<Integer, ColumnStatistics> statistics = getRowGroupStatistics(types.get(0), columnIndexes, rowGroup); if (predicate.matches(rows, statistics)) { selectedRowGroups.add(rowGroup); } remainingRows -= rows; } return selectedRowGroups.build(); }
private static boolean isStripeIncluded( OrcType rootStructType, StripeInformation stripe, Optional<StripeStatistics> stripeStats, OrcPredicate predicate) { // if there are no stats, include the column if (!stripeStats.isPresent()) { return true; } return predicate.matches(stripe.getNumberOfRows(), getStatisticsByColumnOrdinal(rootStructType, stripeStats.get().getColumnStatistics())); }
private void advanceToNextStripe() throws IOException { currentStripeSystemMemoryContext.close(); currentStripeSystemMemoryContext = systemMemoryUsage.newAggregatedMemoryContext(); rowGroups = ImmutableList.<RowGroup>of().iterator(); currentStripe++; if (currentStripe >= stripes.size()) { return; } if (currentStripe > 0) { currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows(); } StripeInformation stripeInformation = stripes.get(currentStripe); Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext); if (stripe != null) { // Give readers access to dictionary streams StreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources(); List<ColumnEncoding> columnEncodings = stripe.getColumnEncodings(); for (StreamReader column : streamReaders) { if (column != null) { column.startStripe(dictionaryStreamSources, columnEncodings); } } rowGroups = stripe.getRowGroups().iterator(); } }
.mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()) .sum();
currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows(); validateWriteStripe(stripeInformation.getNumberOfRows());
closedStripes.add(closedStripe); closedStripesRetainedBytes += closedStripe.getRetainedSizeInBytes(); recordValidation(validation -> validation.addStripe(stripeInformation.getNumberOfRows())); stats.recordStripeWritten(flushReason, stripeInformation.getTotalLength(), stripeInformation.getNumberOfRows(), dictionaryCompressionOptimizer.getDictionaryMemoryBytes());
if ((stripe.getNumberOfRows() > rowsInRowGroup) || hasRowGroupDictionary) { stripe.getNumberOfRows(), streams, valueStreams, columnEncodings); return new Stripe(stripe.getNumberOfRows(), columnEncodings, rowGroups, dictionaryStreamSources); builder.put(entry.getKey(), new ValueStreamSource<>(entry.getValue())); RowGroup rowGroup = new RowGroup(0, 0, stripe.getNumberOfRows(), new StreamSources(builder.build())); return new Stripe(stripe.getNumberOfRows(), columnEncodings, ImmutableList.of(rowGroup), dictionaryStreamSources);
private static DwrfProto.StripeInformation toStripeInformation(StripeInformation stripe) { return DwrfProto.StripeInformation.newBuilder() .setNumberOfRows(stripe.getNumberOfRows()) .setOffset(stripe.getOffset()) .setIndexLength(stripe.getIndexLength()) .setDataLength(stripe.getDataLength()) .setFooterLength(stripe.getFooterLength()) .build(); }
private static OrcProto.StripeInformation toStripeInformation(StripeInformation stripe) { return OrcProto.StripeInformation.newBuilder() .setNumberOfRows(stripe.getNumberOfRows()) .setOffset(stripe.getOffset()) .setIndexLength(stripe.getIndexLength()) .setDataLength(stripe.getDataLength()) .setFooterLength(stripe.getFooterLength()) .build(); }