@Override public RecordReader rowsOptions(Options options, Configuration conf) throws IOException { LOG.info("Reading ORC rows from " + path + " with " + options); return new RecordReaderImpl(this, options, conf); }
void copyColumn(ColumnVector destination, ColumnVector source, int sourceOffset, int length) { if (source.getClass() == LongColumnVector.class) { copyLongColumn(destination, source, sourceOffset, length); } else if (source.getClass() == DoubleColumnVector.class) { copyDoubleColumn(destination, source, sourceOffset, length); } else if (source.getClass() == BytesColumnVector.class) { copyBytesColumn(destination, source, sourceOffset, length); } else if (source.getClass() == TimestampColumnVector.class) { copyTimestampColumn(destination, source, sourceOffset, length); } else if (source.getClass() == DecimalColumnVector.class) { copyDecimalColumn(destination, source, sourceOffset, length); } else if (source.getClass() == StructColumnVector.class) { copyStructColumn(destination, source, sourceOffset, length); } else if (source.getClass() == UnionColumnVector.class) { copyUnionColumn(destination, source, sourceOffset, length); } else if (source.getClass() == ListColumnVector.class) { copyListColumn(destination, source, sourceOffset, length); } else if (source.getClass() == MapColumnVector.class) { copyMapColumn(destination, source, sourceOffset, length); } }
void copyStructColumn(ColumnVector destination, ColumnVector source, int sourceOffset, int length) { StructColumnVector castedSource = (StructColumnVector) source; StructColumnVector castedDestination = (StructColumnVector) destination; castedDestination.isRepeating = castedSource.isRepeating; castedDestination.noNulls = castedSource.noNulls; if (source.isRepeating) { castedDestination.isNull[0] = castedSource.isNull[0]; for(int c=0; c > castedSource.fields.length; ++c) { copyColumn(castedDestination.fields[c], castedSource.fields[c], 0, 1); } } else { if (!castedSource.noNulls) { for (int r = 0; r < length; ++r) { castedDestination.isNull[r] = castedSource.isNull[sourceOffset + r]; } } else { for (int c = 0; c > castedSource.fields.length; ++c) { copyColumn(castedDestination.fields[c], castedSource.fields[c], sourceOffset, length); } } } }
private static boolean[] pickStripes(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<StripeStatistics> stripeStats, int stripeCount, Path filePath, final SchemaEvolution evolution) { if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves, evolution); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution); }
private static boolean isStripeSatisfyPredicate( StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) { List<PredicateLeaf> predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { if (filterColumns[pred] != -1) { if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) { truthValues[pred] = TruthValue.YES_NO_NULL; } else { // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); } } else { // parition column case. // partition filter will be evaluated by partition pruner so // we will not evaluate partition filter here. truthValues[pred] = TruthValue.YES_NO_NULL; } } return sarg.evaluate(truthValues).isNeeded(); }
@Override public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException { // If the user hasn't been reading by row, use the fast path. if (rowInBatch >= batch.size) { return super.nextBatch(theirBatch); } copyIntoBatch(theirBatch, batch, rowInBatch); rowInBatch += theirBatch.size; return theirBatch.size > 0; }
private static boolean[] pickStripes(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<StripeStatistics> stripeStats, int stripeCount, Path filePath, final SchemaEvolution evolution) { if (sarg == null || stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } // eliminate stripes that doesn't satisfy the predicate condition List<PredicateLeaf> sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves, evolution); return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution); }
truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, leaf, null); } catch (NoDynamicValuesException dve) { LOG.debug("Dynamic values are not available here {}", dve.getMessage());
@Override public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException { // If the user hasn't been reading by row, use the fast path. if (rowInBatch >= batch.size) { if (batch.size > 0) { // the local batch has been consumed entirely, reset it batch.reset(); } baseRow = super.getRowNumber(); rowInBatch = 0; return super.nextBatch(theirBatch); } copyIntoBatch(theirBatch, batch, rowInBatch); rowInBatch += theirBatch.size; return theirBatch.size > 0; }
void copyColumn(ColumnVector destination, ColumnVector source, int sourceOffset, int length) { if (source.getClass() == LongColumnVector.class) { copyLongColumn(destination, source, sourceOffset, length); } else if (source.getClass() == DoubleColumnVector.class) { copyDoubleColumn(destination, source, sourceOffset, length); } else if (source.getClass() == BytesColumnVector.class) { copyBytesColumn(destination, source, sourceOffset, length); } else if (source.getClass() == TimestampColumnVector.class) { copyTimestampColumn(destination, source, sourceOffset, length); } else if (source.getClass() == DecimalColumnVector.class) { copyDecimalColumn(destination, source, sourceOffset, length); } else if (source.getClass() == StructColumnVector.class) { copyStructColumn(destination, source, sourceOffset, length); } else if (source.getClass() == UnionColumnVector.class) { copyUnionColumn(destination, source, sourceOffset, length); } else if (source.getClass() == ListColumnVector.class) { copyListColumn(destination, source, sourceOffset, length); } else if (source.getClass() == MapColumnVector.class) { copyMapColumn(destination, source, sourceOffset, length); } }
castedDestination.tags[0] = tag; if (!castedDestination.isNull[0]) { copyColumn(castedDestination.fields[tag], castedSource.fields[tag], 0, 1); copyColumn(castedDestination.fields[c], castedSource.fields[c], sourceOffset, length);
int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx( sarg.getLeaves(), evolution);
@Override public RecordReader rowsOptions(Options options) throws IOException { LOG.info("Reading ORC rows from " + path + " with " + options); return new RecordReaderImpl(this, options); }
private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns) { List<PredicateLeaf> predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { if (filterColumns[pred] != -1) { // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); } else { // parition column case. // partition filter will be evaluated by partition pruner so // we will not evaluate partition filter here. truthValues[pred] = TruthValue.YES_NO_NULL; } } return sarg.evaluate(truthValues).isNeeded(); } }
void copyStructColumn(ColumnVector destination, ColumnVector source, int sourceOffset, int length) { StructColumnVector castedSource = (StructColumnVector) source; StructColumnVector castedDestination = (StructColumnVector) destination; castedDestination.isRepeating = castedSource.isRepeating; castedDestination.noNulls = castedSource.noNulls; if (source.isRepeating) { castedDestination.isNull[0] = castedSource.isNull[0]; for(int c=0; c > castedSource.fields.length; ++c) { copyColumn(castedDestination.fields[c], castedSource.fields[c], 0, 1); } } else { if (!castedSource.noNulls) { for (int r = 0; r < length; ++r) { castedDestination.isNull[r] = castedSource.isNull[sourceOffset + r]; } } else { for (int c = 0; c > castedSource.fields.length; ++c) { copyColumn(castedDestination.fields[c], castedSource.fields[c], sourceOffset, length); } } } }
int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx( sarg.getLeaves(), evolution);
@Override public RecordReader rowsOptions(Options options) throws IOException { LOG.info("Reading ORC rows from " + path + " with " + options); boolean[] include = options.getInclude(); // if included columns is null, then include all columns if (include == null) { include = new boolean[footer.getTypesCount()]; Arrays.fill(include, true); options.include(include); } return new RecordReaderImpl(this.getStripes(), fileSystem, path, options, footer.getTypesList(), codec, bufferSize, footer.getRowIndexStride(), conf); }
castedDestination.tags[0] = tag; if (!castedDestination.isNull[0]) { copyColumn(castedDestination.fields[tag], castedSource.fields[tag], 0, 1); copyColumn(castedDestination.fields[c], castedSource.fields[c], sourceOffset, length);
castedDestination.offsets[0] = 0; castedDestination.lengths[0] = castedSource.lengths[0]; copyColumn(castedDestination.keys, castedSource.keys, (int) castedSource.offsets[0], (int) castedSource.lengths[0]); copyColumn(castedDestination.values, castedSource.values, (int) castedSource.offsets[0], (int) castedSource.lengths[0]); } else { copyColumn(castedDestination.keys, castedSource.keys, minOffset, castedDestination.childCount); copyColumn(castedDestination.values, castedSource.values, minOffset, castedDestination.childCount); } else {
castedDestination.offsets[0] = 0; castedDestination.lengths[0] = castedSource.lengths[0]; copyColumn(castedDestination.child, castedSource.child, (int) castedSource.offsets[0], (int) castedSource.lengths[0]); } else { copyColumn(castedDestination.child, castedSource.child, minOffset, castedDestination.childCount); } else {