@Override public void close() throws IOException { if (orcRowsReader != null) { this.orcRowsReader.close(); } this.orcRowsReader = null; }
/** * Creates an OrcRowInputFormat. * * @param path The path to read ORC files from. * @param schemaString The schema of the ORC files as String. * @param orcConfig The configuration to read the ORC files with. * @param batchSize The number of Row objects to read in a batch. */ public OrcRowInputFormat(String path, String schemaString, Configuration orcConfig, int batchSize) { this(path, TypeDescription.fromString(schemaString), orcConfig, batchSize); }
public OrcStripeMetadata(OrcBatchKey stripeKey, OrcProto.StripeFooter footer, OrcIndex orcIndex, StripeInformation stripe) throws IOException { this.stripeKey = stripeKey; streams = footer.getStreamsList(); encodings = footer.getColumnsList(); writerTimezone = footer.getWriterTimezone(); rowCount = stripe.getNumberOfRows(); rowIndex = orcIndex; }
private static void addColumnToIncludes(TypeDescription child, boolean[] result) { for(int col = child.getId(); col <= child.getMaximumId(); ++col) { result[col] = true; } }
/** * Set the schema for the file. This is a required parameter. * @param schema the schema for the file. * @return this */ public WriterOptions setSchema(TypeDescription schema) { if (schema != null) { this.explicitSchema = true; super.setSchema(schema); } return this; }
private long getRowCount(OrcProto.RowIndexEntry rowIndexEntry) { return rowIndexEntry.getStatistics().getNumberOfValues(); }
public static boolean isOriginal(Footer footer) { for(OrcProto.UserMetadataItem item: footer.getMetadataList()) { if (item.hasName() && item.getName().equals(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) { return true; } } return false; }
/** * Sets the generic compression that is used to compress the data. */ public WriterOptions compress(org.apache.orc.CompressionKind value) { super.compress(value); return this; }
/** * Set the distance between entries in the row index. The minimum value is * 1000 to prevent the index from overwhelming the data. If the stride is * set to 0, no indexes will be included in the file. */ public WriterOptions rowIndexStride(int value) { super.rowIndexStride(value); return this; }
/** * The size of the memory buffers used for compressing and storing the * stripe in memory. */ public WriterOptions bufferSize(int value) { super.bufferSize(value); return this; }
/** * Set the stripe size for the file. The writer stores the contents of the * stripe in memory until this memory limit is reached and the stripe * is flushed to the HDFS file and the next stripe started. */ public WriterOptions stripeSize(long value) { super.stripeSize(value); return this; }
/** * Set the file system block size for the file. For optimal performance, * set the block size to be multiple factors of stripe size. */ public WriterOptions blockSize(long value) { super.blockSize(value); return this; }
/** * Sets the encoding strategy that is used to encode the data. */ public WriterOptions encodingStrategy(EncodingStrategy strategy) { super.encodingStrategy(strategy); return this; }
/** * Sets whether the HDFS blocks are padded to prevent stripes from * straddling blocks. Padding improves locality and thus the speed of * reading, but costs space. */ public WriterOptions blockPadding(boolean value) { super.blockPadding(value); return this; }
/** * Comma separated values of column names for which bloom filter is to be created. */ public WriterOptions bloomFilterColumns(String columns) { super.bloomFilterColumns(columns); return this; }
/** * Specify the false positive probability for bloom filter. * @param fpp - false positive probability * @return this */ public WriterOptions bloomFilterFpp(double fpp) { super.bloomFilterFpp(fpp); return this; }
public ReaderOptions fileMetadata(FileMetadata metadata) { super.fileMetadata(metadata); return this; }
/** * Creates an OrcRowInputFormat. * * @param path The path to read ORC files from. * @param schemaString The schema of the ORC files as String. * @param orcConfig The configuration to read the ORC files with. */ public OrcRowInputFormat(String path, String schemaString, Configuration orcConfig) { this(path, TypeDescription.fromString(schemaString), orcConfig, DEFAULT_BATCH_SIZE); }
/** * Sets the ORC schema of the files to read as a String. * * @param orcSchema The ORC schema of the files to read as a String. * @return The builder. */ public Builder forOrcSchema(String orcSchema) { Preconditions.checkNotNull(orcSchema, "ORC schema must not be null."); this.schema = TypeDescription.fromString(orcSchema); return this; }