org.apache.orc java code examples

@Override
public void close() throws IOException {
  if (orcRowsReader != null) {
    this.orcRowsReader.close();
  }
  this.orcRowsReader = null;
}

/**
 * Creates an OrcRowInputFormat.
 *
 * @param path The path to read ORC files from.
 * @param schemaString The schema of the ORC files as String.
 * @param orcConfig The configuration to read the ORC files with.
 * @param batchSize The number of Row objects to read in a batch.
 */
public OrcRowInputFormat(String path, String schemaString, Configuration orcConfig, int batchSize) {
  this(path, TypeDescription.fromString(schemaString), orcConfig, batchSize);
}

public OrcStripeMetadata(OrcBatchKey stripeKey, OrcProto.StripeFooter footer,
  OrcIndex orcIndex, StripeInformation stripe) throws IOException {
 this.stripeKey = stripeKey;
 streams = footer.getStreamsList();
 encodings = footer.getColumnsList();
 writerTimezone = footer.getWriterTimezone();
 rowCount = stripe.getNumberOfRows();
 rowIndex = orcIndex;
}

private static void addColumnToIncludes(TypeDescription child, boolean[] result) {
 for(int col = child.getId(); col <= child.getMaximumId(); ++col) {
  result[col] = true;
 }
}

/**
 * Set the schema for the file. This is a required parameter.
 * @param schema the schema for the file.
 * @return this
 */
public WriterOptions setSchema(TypeDescription schema) {
 if (schema != null) {
  this.explicitSchema = true;
  super.setSchema(schema);
 }
 return this;
}

private long getRowCount(OrcProto.RowIndexEntry rowIndexEntry) {
 return rowIndexEntry.getStatistics().getNumberOfValues();
}

public StreamContext(OrcProto.Stream stream, long streamOffset, int streamIndexOffset) {
 this.kind = stream.getKind();
 this.length = stream.getLength();
 this.offset = streamOffset;
 this.streamIndexOffset = streamIndexOffset;
}

public static boolean isOriginal(Footer footer) {
 for(OrcProto.UserMetadataItem item: footer.getMetadataList()) {
  if (item.hasName() && item.getName().equals(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) {
   return true;
  }
 }
 return false;
}

/**
 * Sets the generic compression that is used to compress the data.
 */
public WriterOptions compress(org.apache.orc.CompressionKind value) {
 super.compress(value);
 return this;
}

/**
 * Set the distance between entries in the row index. The minimum value is
 * 1000 to prevent the index from overwhelming the data. If the stride is
 * set to 0, no indexes will be included in the file.
 */
public WriterOptions rowIndexStride(int value) {
 super.rowIndexStride(value);
 return this;
}

/**
 * The size of the memory buffers used for compressing and storing the
 * stripe in memory.
 */
public WriterOptions bufferSize(int value) {
 super.bufferSize(value);
 return this;
}

/**
 * Set the stripe size for the file. The writer stores the contents of the
 * stripe in memory until this memory limit is reached and the stripe
 * is flushed to the HDFS file and the next stripe started.
 */
public WriterOptions stripeSize(long value) {
 super.stripeSize(value);
 return this;
}

/**
 * Set the file system block size for the file. For optimal performance,
 * set the block size to be multiple factors of stripe size.
 */
public WriterOptions blockSize(long value) {
 super.blockSize(value);
 return this;
}

/**
 * Sets the encoding strategy that is used to encode the data.
 */
public WriterOptions encodingStrategy(EncodingStrategy strategy) {
 super.encodingStrategy(strategy);
 return this;
}

/**
 * Sets whether the HDFS blocks are padded to prevent stripes from
 * straddling blocks. Padding improves locality and thus the speed of
 * reading, but costs space.
 */
public WriterOptions blockPadding(boolean value) {
 super.blockPadding(value);
 return this;
}

/**
 * Comma separated values of column names for which bloom filter is to be created.
 */
public WriterOptions bloomFilterColumns(String columns) {
 super.bloomFilterColumns(columns);
 return this;
}

/**
 * Specify the false positive probability for bloom filter.
 * @param fpp - false positive probability
 * @return this
 */
public WriterOptions bloomFilterFpp(double fpp) {
 super.bloomFilterFpp(fpp);
 return this;
}

public ReaderOptions fileMetadata(FileMetadata metadata) {
 super.fileMetadata(metadata);
 return this;
}

/**
 * Creates an OrcRowInputFormat.
 *
 * @param path The path to read ORC files from.
 * @param schemaString The schema of the ORC files as String.
 * @param orcConfig The configuration to read the ORC files with.
 */
public OrcRowInputFormat(String path, String schemaString, Configuration orcConfig) {
  this(path, TypeDescription.fromString(schemaString), orcConfig, DEFAULT_BATCH_SIZE);
}

/**
 * Sets the ORC schema of the files to read as a String.
 *
 * @param orcSchema The ORC schema of the files to read as a String.
 * @return The builder.
 */
public Builder forOrcSchema(String orcSchema) {
  Preconditions.checkNotNull(orcSchema, "ORC schema must not be null.");
  this.schema = TypeDescription.fromString(orcSchema);
  return this;
}

How to use org.apache.orc

Best Java code snippets using org.apache.orc (Showing top 20 results out of 315)