org.apache.orc.impl.WriterImpl java code examples

/**
 * Ensure codec is created for the split, to decode values from cache. Can only be called
 * after initializing fileMetadata.
 */
private void ensureCodecFromFileMetadata() {
 if (codec != null) return;
 codec = WriterImpl.createCodec(fileMetadata.getCompressionKind());
}

@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
 flushInternalBatch();
 super.addRowBatch(batch);
}

 @Override
 public void close() throws IOException {
  flushInternalBatch();
  super.close();
 }
}

private long writeFooter() throws IOException {
 writeMetadata();
 OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
 builder.setNumberOfRows(rowCount);
 builder.setRowIndexStride(rowIndexStride);
 rawDataSize = computeRawDataSize();
 // serialize the types
 writeTypes(builder, schema);
 // add the stripe information
 for(OrcProto.StripeInformation stripe: stripes) {
  builder.addStripes(stripe);
 }
 // add the column statistics
 writeFileStatistics(builder, treeWriter);
 // add all of the user metadata
 for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) {
  builder.addMetadata(OrcProto.UserMetadataItem.newBuilder()
   .setName(entry.getKey()).setValue(entry.getValue()));
 }
 builder.setWriter(OrcFile.WriterImplementation.ORC_JAVA.getId());
 physicalWriter.writeFileFooter(builder);
 return writePostScript();
}

@Override
public long writeIntermediateFooter() throws IOException {
 flushInternalBatch();
 return super.writeIntermediateFooter();
}

@Override
public long writeIntermediateFooter() throws IOException {
 // flush any buffered rows
 flushStripe();
 // write a footer
 if (stripesAtLastFlush != stripes.size()) {
  if (callback != null) {
   callback.preFooterWrite(callbackContext);
  }
  lastFlushOffset = writeFooter();
  stripesAtLastFlush = stripes.size();
  physicalWriter.flush();
 }
 return lastFlushOffset;
}

@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
 if (buildIndex) {
  // Batch the writes up to the rowIndexStride so that we can get the
  // right size indexes.
  int posn = 0;
  while (posn < batch.size) {
   int chunkSize = Math.min(batch.size - posn,
     rowIndexStride - rowsInIndex);
   treeWriter.writeRootBatch(batch, posn, chunkSize);
   posn += chunkSize;
   rowsInIndex += chunkSize;
   rowsInStripe += chunkSize;
   if (rowsInIndex >= rowIndexStride) {
    createRowIndexEntry();
   }
  }
 } else {
  rowsInStripe += batch.size;
  treeWriter.writeRootBatch(batch, 0, batch.size);
 }
 memoryManager.addedRow(batch.size);
}

@Override
public boolean checkMemory(double newScale) throws IOException {
 long limit = Math.round(adjustedStripeSize * newScale);
 long size = treeWriter.estimateMemory();
 if (LOG.isDebugEnabled()) {
  LOG.debug("ORC writer " + physicalWriter + " size = " + size +
    " limit = " + limit);
 }
 if (size > limit) {
  flushStripe();
  return true;
 }
 return false;
}

@Override
public void appendStripe(byte[] stripe, int offset, int length,
  StripeInformation stripeInfo,
  OrcProto.StripeStatistics stripeStatistics) throws IOException {
 checkArgument(stripe != null, "Stripe must not be null");
 checkArgument(length <= stripe.length,
   "Specified length must not be greater specified array length");
 checkArgument(stripeInfo != null, "Stripe information must not be null");
 checkArgument(stripeStatistics != null,
   "Stripe statistics must not be null");
 rowsInStripe = stripeInfo.getNumberOfRows();
 // update stripe information
 OrcProto.StripeInformation.Builder dirEntry = OrcProto.StripeInformation
   .newBuilder()
   .setNumberOfRows(rowsInStripe)
   .setIndexLength(stripeInfo.getIndexLength())
   .setDataLength(stripeInfo.getDataLength())
   .setFooterLength(stripeInfo.getFooterLength());
 physicalWriter.appendRawStripe(ByteBuffer.wrap(stripe, offset, length),
   dirEntry);
 // since we have already written the stripe, just update stripe statistics
 treeWriter.updateFileStatistics(stripeStatistics);
 fileMetadata.addStripeStats(stripeStatistics);
 stripes.add(dirEntry.build());
 // reset it after writing the stripe
 rowCount += rowsInStripe;
 rowsInStripe = 0;
}

/**
 * Create an ORC file writer. This is the public interface for creating
 * writers going forward and new options will only be added to this method.
 * @param path filename to write to
 * @param opts the options
 * @return a new ORC file writer
 * @throws IOException
 */
public static Writer createWriter(Path path,
                 WriterOptions opts
                 ) throws IOException {
 FileSystem fs = opts.getFileSystem() == null ?
   path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem();
 switch (opts.getVersion()) {
  case V_0_11:
  case V_0_12:
   return new WriterImpl(fs, path, opts);
  case UNSTABLE_PRE_2_0:
   return new WriterImplV2(fs, path, opts);
  default:
   throw new IllegalArgumentException("Unknown version " +
     opts.getVersion());
 }
}

@Override
public long writeIntermediateFooter() throws IOException {
 flushInternalBatch();
 return super.writeIntermediateFooter();
}

@Override
public void close() throws IOException {
 if (callback != null) {
  callback.preFooterWrite(callbackContext);
 }
 // remove us from the memory manager so that we don't get any callbacks
 memoryManager.removeWriter(path);
 // actually close the file
 flushStripe();
 lastFlushOffset = writeFooter();
 physicalWriter.close();
}

private void flushStripe() throws IOException {
 if (buildIndex && rowsInIndex != 0) {
  createRowIndexEntry();

public void setFileMetadata(ConsumerFileMetadata f) {
 assert fileMetadata == null;
 fileMetadata = f;
 stripes = new ArrayList<>(f.getStripeCount());
 codec = WriterImpl.createCodec(fileMetadata.getCompressionKind());
}

void flushInternalBatch() throws IOException {
 if (internalBatch.size != 0) {
  super.addRowBatch(internalBatch);
  internalBatch.reset();
 }
}

 @Override
 public void close() throws IOException {
  flushInternalBatch();
  super.close();
 }
}

public EncodedReaderImpl(Object fileKey, List<OrcProto.Type> types,
  TypeDescription fileSchema, org.apache.orc.CompressionKind kind, WriterVersion version,
  int bufferSize, long strideRate, DataCache cacheWrapper, DataReader dataReader,
  PoolFactory pf, IoTrace trace, boolean useCodecPool, String tag) throws IOException {
 this.fileKey = fileKey;
 this.compressionKind = kind;
 this.isCompressed = kind != org.apache.orc.CompressionKind.NONE;
 this.isCodecFromPool = useCodecPool;
 this.codec = useCodecPool ? OrcCodecPool.getCodec(kind) : WriterImpl.createCodec(kind);
 this.types = types;
 this.fileSchema = fileSchema; // Note: this is redundant with types
 this.version = version;
 this.bufferSize = bufferSize;
 this.rowIndexStride = strideRate;
 this.cacheWrapper = cacheWrapper;
 Allocator alloc = cacheWrapper.getAllocator();
 this.allocator = alloc instanceof StoppableAllocator ? (StoppableAllocator) alloc : null;
 this.dataReader = dataReader;
 this.trace = trace;
 this.tag = tag;
 if (POOLS != null) return;
 if (pf == null) {
  pf = new NoopPoolFactory();
 }
 Pools pools = createPools(pf);
 synchronized (POOLS_CREATION_LOCK) {
  if (POOLS != null) return;
  POOLS = pools;
 }
}

void flushInternalBatch() throws IOException {
 if (internalBatch.size != 0) {
  super.addRowBatch(internalBatch);
  internalBatch.reset();
 }
}

CompressionCodec codec = isPool ? OrcCodecPool.getCodec(kind) : WriterImpl.createCodec(kind);
boolean isCodecError = true;
try {

@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
 flushInternalBatch();
 super.addRowBatch(batch);
}

Javadoc

An ORC file writer. The file is divided into stripes, which is the natural unit of work when reading. Each stripe is buffered in memory until the memory reaches the stripe size and then it is written out broken down by columns. Each column is written by a TreeWriter that is specific to that type of column. TreeWriters may have children TreeWriters that handle the sub-types. Each of the TreeWriters writes the column's data as a set of streams. This class is unsynchronized like most Stream objects, so from the creation of an OrcFile and all access to a single instance has to be from a single thread. There are no known cases where these happen between different threads today. Caveat: the MemoryManager is created during WriterOptions create, that has to be confined to a single thread as well.

Most used methods

Popular in Java

Making http post requests using okhttp
getResourceAsStream (ClassLoader)
scheduleAtFixedRate (ScheduledExecutorService)
scheduleAtFixedRate (Timer)
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
JLabel (javax.swing)
Top Sublime Text plugins

How to useWriterImpl in org.apache.orc.impl

Best Java code snippets using org.apache.orc.impl.WriterImpl (Showing top 20 results out of 315)

How to use
WriterImpl
in
org.apache.orc.impl