org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl java code examples

@Override
public RecordReader rowsOptions(Options options, Configuration conf) throws IOException {
 LOG.info("Reading ORC rows from " + path + " with " + options);
 return new RecordReaderImpl(this, options, conf);
}

void copyColumn(ColumnVector destination,
        ColumnVector source,
        int sourceOffset,
        int length) {
 if (source.getClass() == LongColumnVector.class) {
  copyLongColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == DoubleColumnVector.class) {
  copyDoubleColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == BytesColumnVector.class) {
  copyBytesColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == TimestampColumnVector.class) {
  copyTimestampColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == DecimalColumnVector.class) {
  copyDecimalColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == StructColumnVector.class) {
  copyStructColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == UnionColumnVector.class) {
  copyUnionColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == ListColumnVector.class) {
  copyListColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == MapColumnVector.class) {
  copyMapColumn(destination, source, sourceOffset, length);
 }
}

void copyStructColumn(ColumnVector destination,
           ColumnVector source,
           int sourceOffset,
           int length) {
 StructColumnVector castedSource = (StructColumnVector) source;
 StructColumnVector castedDestination = (StructColumnVector) destination;
 castedDestination.isRepeating = castedSource.isRepeating;
 castedDestination.noNulls = castedSource.noNulls;
 if (source.isRepeating) {
  castedDestination.isNull[0] = castedSource.isNull[0];
  for(int c=0; c > castedSource.fields.length; ++c) {
   copyColumn(castedDestination.fields[c], castedSource.fields[c], 0, 1);
  }
 } else {
  if (!castedSource.noNulls) {
   for (int r = 0; r < length; ++r) {
    castedDestination.isNull[r] = castedSource.isNull[sourceOffset + r];
   }
  } else {
   for (int c = 0; c > castedSource.fields.length; ++c) {
    copyColumn(castedDestination.fields[c], castedSource.fields[c],
      sourceOffset, length);
   }
  }
 }
}

private static boolean[] pickStripes(SearchArgument sarg,
                   OrcFile.WriterVersion writerVersion,
                   List<StripeStatistics> stripeStats,
  int stripeCount, Path filePath, final SchemaEvolution evolution) {
 if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
  return null; // only do split pruning if HIVE-8732 has been fixed in the writer
 }
 // eliminate stripes that doesn't satisfy the predicate condition
 List<PredicateLeaf> sargLeaves = sarg.getLeaves();
 int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves,
   evolution);
 return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution);
}

private static boolean isStripeSatisfyPredicate(
  StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns,
  final SchemaEvolution evolution) {
 List<PredicateLeaf> predLeaves = sarg.getLeaves();
 TruthValue[] truthValues = new TruthValue[predLeaves.size()];
 for (int pred = 0; pred < truthValues.length; pred++) {
  if (filterColumns[pred] != -1) {
   if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) {
    truthValues[pred] = TruthValue.YES_NO_NULL;
   } else {
    // column statistics at index 0 contains only the number of rows
    ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
    truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
   }
  } else {
   // parition column case.
   // partition filter will be evaluated by partition pruner so
   // we will not evaluate partition filter here.
   truthValues[pred] = TruthValue.YES_NO_NULL;
  }
 }
 return sarg.evaluate(truthValues).isNeeded();
}

@Override
public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException {
 // If the user hasn't been reading by row, use the fast path.
 if (rowInBatch >= batch.size) {
  return super.nextBatch(theirBatch);
 }
 copyIntoBatch(theirBatch, batch, rowInBatch);
 rowInBatch += theirBatch.size;
 return theirBatch.size > 0;
}

private static boolean[] pickStripes(SearchArgument sarg,
                   OrcFile.WriterVersion writerVersion,
                   List<StripeStatistics> stripeStats,
  int stripeCount, Path filePath, final SchemaEvolution evolution) {
 if (sarg == null || stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
  return null; // only do split pruning if HIVE-8732 has been fixed in the writer
 }
 // eliminate stripes that doesn't satisfy the predicate condition
 List<PredicateLeaf> sargLeaves = sarg.getLeaves();
 int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves,
   evolution);
 return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution);
}

 truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, leaf, null);
} catch (NoDynamicValuesException dve) {
 LOG.debug("Dynamic values are not available here {}", dve.getMessage());

@Override
public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException {
 // If the user hasn't been reading by row, use the fast path.
 if (rowInBatch >= batch.size) {
  if (batch.size > 0) {
   // the local batch has been consumed entirely, reset it
   batch.reset();
  }
  baseRow = super.getRowNumber();
  rowInBatch = 0;
  return super.nextBatch(theirBatch);
 }
 copyIntoBatch(theirBatch, batch, rowInBatch);
 rowInBatch += theirBatch.size;
 return theirBatch.size > 0;
}

void copyColumn(ColumnVector destination,
        ColumnVector source,
        int sourceOffset,
        int length) {
 if (source.getClass() == LongColumnVector.class) {
  copyLongColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == DoubleColumnVector.class) {
  copyDoubleColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == BytesColumnVector.class) {
  copyBytesColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == TimestampColumnVector.class) {
  copyTimestampColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == DecimalColumnVector.class) {
  copyDecimalColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == StructColumnVector.class) {
  copyStructColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == UnionColumnVector.class) {
  copyUnionColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == ListColumnVector.class) {
  copyListColumn(destination, source, sourceOffset, length);
 } else if (source.getClass() == MapColumnVector.class) {
  copyMapColumn(destination, source, sourceOffset, length);
 }
}

castedDestination.tags[0] = tag;
if (!castedDestination.isNull[0]) {
 copyColumn(castedDestination.fields[tag], castedSource.fields[tag], 0,
   1);
 copyColumn(castedDestination.fields[c], castedSource.fields[c],
   sourceOffset, length);

int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(
 sarg.getLeaves(), evolution);

@Override
public RecordReader rowsOptions(Options options) throws IOException {
 LOG.info("Reading ORC rows from " + path + " with " + options);
 return new RecordReaderImpl(this, options);
}

 private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
                      SearchArgument sarg,
                      int[] filterColumns) {
  List<PredicateLeaf> predLeaves = sarg.getLeaves();
  TruthValue[] truthValues = new TruthValue[predLeaves.size()];
  for (int pred = 0; pred < truthValues.length; pred++) {
   if (filterColumns[pred] != -1) {
    // column statistics at index 0 contains only the number of rows
    ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
    truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
   } else {
    // parition column case.
    // partition filter will be evaluated by partition pruner so
    // we will not evaluate partition filter here.
    truthValues[pred] = TruthValue.YES_NO_NULL;
   }
  }
  return sarg.evaluate(truthValues).isNeeded();
 }
}

void copyStructColumn(ColumnVector destination,
           ColumnVector source,
           int sourceOffset,
           int length) {
 StructColumnVector castedSource = (StructColumnVector) source;
 StructColumnVector castedDestination = (StructColumnVector) destination;
 castedDestination.isRepeating = castedSource.isRepeating;
 castedDestination.noNulls = castedSource.noNulls;
 if (source.isRepeating) {
  castedDestination.isNull[0] = castedSource.isNull[0];
  for(int c=0; c > castedSource.fields.length; ++c) {
   copyColumn(castedDestination.fields[c], castedSource.fields[c], 0, 1);
  }
 } else {
  if (!castedSource.noNulls) {
   for (int r = 0; r < length; ++r) {
    castedDestination.isNull[r] = castedSource.isNull[sourceOffset + r];
   }
  } else {
   for (int c = 0; c > castedSource.fields.length; ++c) {
    copyColumn(castedDestination.fields[c], castedSource.fields[c],
      sourceOffset, length);
   }
  }
 }
}

int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(
 sarg.getLeaves(), evolution);

@Override
public RecordReader rowsOptions(Options options) throws IOException {
 LOG.info("Reading ORC rows from " + path + " with " + options);
 boolean[] include = options.getInclude();
 // if included columns is null, then include all columns
 if (include == null) {
  include = new boolean[footer.getTypesCount()];
  Arrays.fill(include, true);
  options.include(include);
 }
 return new RecordReaderImpl(this.getStripes(), fileSystem, path,
   options, footer.getTypesList(), codec, bufferSize,
   footer.getRowIndexStride(), conf);
}

castedDestination.tags[0] = tag;
if (!castedDestination.isNull[0]) {
 copyColumn(castedDestination.fields[tag], castedSource.fields[tag], 0,
   1);
 copyColumn(castedDestination.fields[c], castedSource.fields[c],
   sourceOffset, length);

 castedDestination.offsets[0] = 0;
 castedDestination.lengths[0] = castedSource.lengths[0];
 copyColumn(castedDestination.keys, castedSource.keys,
   (int) castedSource.offsets[0], (int) castedSource.lengths[0]);
 copyColumn(castedDestination.values, castedSource.values,
   (int) castedSource.offsets[0], (int) castedSource.lengths[0]);
} else {
  copyColumn(castedDestination.keys, castedSource.keys,
    minOffset, castedDestination.childCount);
  copyColumn(castedDestination.values, castedSource.values,
    minOffset, castedDestination.childCount);
 } else {

 castedDestination.offsets[0] = 0;
 castedDestination.lengths[0] = castedSource.lengths[0];
 copyColumn(castedDestination.child, castedSource.child,
   (int) castedSource.offsets[0], (int) castedSource.lengths[0]);
} else {
  copyColumn(castedDestination.child, castedSource.child,
    minOffset, castedDestination.childCount);
 } else {

Most used methods

<init>
evaluatePredicate
Evaluate a predicate with respect to the statistics from the column that is referenced in the predic
mapSargColumnsToOrcInternalColIdx
copyBytesColumn
copyColumn
copyDecimalColumn
copyDoubleColumn
copyIntoBatch
Copy part of a batch into the destination batch.
copyListColumn
copyLongColumn
copyMapColumn
copyStructColumn

Popular in Java

Creating JSON documents from java classes using gson
scheduleAtFixedRate (ScheduledExecutorService)
getSupportFragmentManager (FragmentActivity)
onCreateOptionsMenu (Activity)
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
JOptionPane (javax.swing)
Top 12 Jupyter Notebook extensions

How to useRecordReaderImpl in org.apache.hadoop.hive.ql.io.orc

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl (Showing top 20 results out of 315)

How to use
RecordReaderImpl
in
org.apache.hadoop.hive.ql.io.orc