/** * Ensure codec is created for the split, to decode values from cache. Can only be called * after initializing fileMetadata. */ private void ensureCodecFromFileMetadata() { if (codec != null) return; codec = WriterImpl.createCodec(fileMetadata.getCompressionKind()); }
@Override public void addRowBatch(VectorizedRowBatch batch) throws IOException { flushInternalBatch(); super.addRowBatch(batch); }
@Override public void close() throws IOException { flushInternalBatch(); super.close(); } }
private long writeFooter() throws IOException { writeMetadata(); OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder(); builder.setNumberOfRows(rowCount); builder.setRowIndexStride(rowIndexStride); rawDataSize = computeRawDataSize(); // serialize the types writeTypes(builder, schema); // add the stripe information for(OrcProto.StripeInformation stripe: stripes) { builder.addStripes(stripe); } // add the column statistics writeFileStatistics(builder, treeWriter); // add all of the user metadata for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) { builder.addMetadata(OrcProto.UserMetadataItem.newBuilder() .setName(entry.getKey()).setValue(entry.getValue())); } builder.setWriter(OrcFile.WriterImplementation.ORC_JAVA.getId()); physicalWriter.writeFileFooter(builder); return writePostScript(); }
@Override public long writeIntermediateFooter() throws IOException { flushInternalBatch(); return super.writeIntermediateFooter(); }
@Override public long writeIntermediateFooter() throws IOException { // flush any buffered rows flushStripe(); // write a footer if (stripesAtLastFlush != stripes.size()) { if (callback != null) { callback.preFooterWrite(callbackContext); } lastFlushOffset = writeFooter(); stripesAtLastFlush = stripes.size(); physicalWriter.flush(); } return lastFlushOffset; }
@Override public void addRowBatch(VectorizedRowBatch batch) throws IOException { if (buildIndex) { // Batch the writes up to the rowIndexStride so that we can get the // right size indexes. int posn = 0; while (posn < batch.size) { int chunkSize = Math.min(batch.size - posn, rowIndexStride - rowsInIndex); treeWriter.writeRootBatch(batch, posn, chunkSize); posn += chunkSize; rowsInIndex += chunkSize; rowsInStripe += chunkSize; if (rowsInIndex >= rowIndexStride) { createRowIndexEntry(); } } } else { rowsInStripe += batch.size; treeWriter.writeRootBatch(batch, 0, batch.size); } memoryManager.addedRow(batch.size); }
@Override public boolean checkMemory(double newScale) throws IOException { long limit = Math.round(adjustedStripeSize * newScale); long size = treeWriter.estimateMemory(); if (LOG.isDebugEnabled()) { LOG.debug("ORC writer " + physicalWriter + " size = " + size + " limit = " + limit); } if (size > limit) { flushStripe(); return true; } return false; }
@Override public void appendStripe(byte[] stripe, int offset, int length, StripeInformation stripeInfo, OrcProto.StripeStatistics stripeStatistics) throws IOException { checkArgument(stripe != null, "Stripe must not be null"); checkArgument(length <= stripe.length, "Specified length must not be greater specified array length"); checkArgument(stripeInfo != null, "Stripe information must not be null"); checkArgument(stripeStatistics != null, "Stripe statistics must not be null"); rowsInStripe = stripeInfo.getNumberOfRows(); // update stripe information OrcProto.StripeInformation.Builder dirEntry = OrcProto.StripeInformation .newBuilder() .setNumberOfRows(rowsInStripe) .setIndexLength(stripeInfo.getIndexLength()) .setDataLength(stripeInfo.getDataLength()) .setFooterLength(stripeInfo.getFooterLength()); physicalWriter.appendRawStripe(ByteBuffer.wrap(stripe, offset, length), dirEntry); // since we have already written the stripe, just update stripe statistics treeWriter.updateFileStatistics(stripeStatistics); fileMetadata.addStripeStats(stripeStatistics); stripes.add(dirEntry.build()); // reset it after writing the stripe rowCount += rowsInStripe; rowsInStripe = 0; }
/** * Create an ORC file writer. This is the public interface for creating * writers going forward and new options will only be added to this method. * @param path filename to write to * @param opts the options * @return a new ORC file writer * @throws IOException */ public static Writer createWriter(Path path, WriterOptions opts ) throws IOException { FileSystem fs = opts.getFileSystem() == null ? path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem(); switch (opts.getVersion()) { case V_0_11: case V_0_12: return new WriterImpl(fs, path, opts); case UNSTABLE_PRE_2_0: return new WriterImplV2(fs, path, opts); default: throw new IllegalArgumentException("Unknown version " + opts.getVersion()); } }
@Override public long writeIntermediateFooter() throws IOException { flushInternalBatch(); return super.writeIntermediateFooter(); }
@Override public void close() throws IOException { if (callback != null) { callback.preFooterWrite(callbackContext); } // remove us from the memory manager so that we don't get any callbacks memoryManager.removeWriter(path); // actually close the file flushStripe(); lastFlushOffset = writeFooter(); physicalWriter.close(); }
private void flushStripe() throws IOException { if (buildIndex && rowsInIndex != 0) { createRowIndexEntry();
public void setFileMetadata(ConsumerFileMetadata f) { assert fileMetadata == null; fileMetadata = f; stripes = new ArrayList<>(f.getStripeCount()); codec = WriterImpl.createCodec(fileMetadata.getCompressionKind()); }
void flushInternalBatch() throws IOException { if (internalBatch.size != 0) { super.addRowBatch(internalBatch); internalBatch.reset(); } }
@Override public void close() throws IOException { flushInternalBatch(); super.close(); } }
public EncodedReaderImpl(Object fileKey, List<OrcProto.Type> types, TypeDescription fileSchema, org.apache.orc.CompressionKind kind, WriterVersion version, int bufferSize, long strideRate, DataCache cacheWrapper, DataReader dataReader, PoolFactory pf, IoTrace trace, boolean useCodecPool, String tag) throws IOException { this.fileKey = fileKey; this.compressionKind = kind; this.isCompressed = kind != org.apache.orc.CompressionKind.NONE; this.isCodecFromPool = useCodecPool; this.codec = useCodecPool ? OrcCodecPool.getCodec(kind) : WriterImpl.createCodec(kind); this.types = types; this.fileSchema = fileSchema; // Note: this is redundant with types this.version = version; this.bufferSize = bufferSize; this.rowIndexStride = strideRate; this.cacheWrapper = cacheWrapper; Allocator alloc = cacheWrapper.getAllocator(); this.allocator = alloc instanceof StoppableAllocator ? (StoppableAllocator) alloc : null; this.dataReader = dataReader; this.trace = trace; this.tag = tag; if (POOLS != null) return; if (pf == null) { pf = new NoopPoolFactory(); } Pools pools = createPools(pf); synchronized (POOLS_CREATION_LOCK) { if (POOLS != null) return; POOLS = pools; } }
void flushInternalBatch() throws IOException { if (internalBatch.size != 0) { super.addRowBatch(internalBatch); internalBatch.reset(); } }
CompressionCodec codec = isPool ? OrcCodecPool.getCodec(kind) : WriterImpl.createCodec(kind); boolean isCodecError = true; try {
@Override public void addRowBatch(VectorizedRowBatch batch) throws IOException { flushInternalBatch(); super.addRowBatch(batch); }