org.apache.hadoop.hive.ql.io.orc.encoded.Reader$OrcEncodedColumnBatch java code examples

OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch();
ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
 ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
 if (!hasAllData && splitIncludes[colIx]) {
   cb.incRef();
   cb.setCacheBuffers(stream.data);
   ecb.setStreamData(colIx, streamIx, cb);

@Override
public void returnData(OrcEncodedColumnBatch ecb) {
 for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) {
  if (!ecb.hasData(colIx)) continue;
  // TODO: reuse columnvector-s on hasBatch - save the array by column? take apart each list.
  ColumnStreamData[] datas = ecb.getColumnData(colIx);
  for (ColumnStreamData data : datas) {
   if (data == null || data.decRef() != 0) continue;
   if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) {
    for (MemoryBuffer buf : data.getCacheBuffers()) {
     LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf);
    }
   }
   bufferManager.decRefBuffers(data.getCacheBuffers());
   if (useObjectPools) {
    CSD_POOL.offer(data);
   }
  }
 }
 // We can offer ECB even with some streams not discarded; reset() will clear the arrays.
 if (useObjectPools) {
  ECB_POOL.offer(ecb);
 }
}

for (int childIx = 0; childIx < childCount; ++childIx) {
 int batchColIx = children.get(childIx).getId();
 if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) {
  if (LOG.isDebugEnabled()) {
   LOG.debug("Column at " + childIx + " " + children.get(childIx).getId()
for (int schemaChildIx = 0, inclChildIx = -1; schemaChildIx < childCount; ++schemaChildIx) {
 int batchColIx = children.get(schemaChildIx).getId();
 if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) continue;
 childReaders[++inclChildIx] = createEncodedTreeReader(
   schema.getChildren().get(schemaChildIx), encodings, batch, codec, context);

OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch();
ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
 ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
 if (!hasAllData && splitIncludes[colIx]) {
   cb.incRef();
   cb.setCacheBuffers(stream.data);
   ecb.setStreamData(colIx, streamIx, cb);

OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch();
ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
int vectorsIx = 0;
for (int colIx = 0; colIx < writerIncludes.length; ++colIx) {
   LlapIoImpl.LOG.trace("Processing vectors for column " + colIx + ": " + vectors);
  ecb.initColumnWithVectors(colIx, vectors);
 } else {
  ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
  processColumnCacheData(cacheBuffers, ecb, colIx);

@Override
public void returnData(OrcEncodedColumnBatch ecb) {
 for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) {
  if (!ecb.hasData(colIx)) continue;
  // TODO: reuse columnvector-s on hasBatch - save the array by column? take apart each list.
  ColumnStreamData[] datas = ecb.getColumnData(colIx);
  for (ColumnStreamData data : datas) {
   if (data == null || data.decRef() != 0) continue;
   if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) {
    for (MemoryBuffer buf : data.getCacheBuffers()) {
     LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf);
    }
   }
   bufferManager.decRefBuffers(data.getCacheBuffers());
   if (useObjectPools) {
    CSD_POOL.offer(data);
   }
  }
 }
 // We can offer ECB even with some streams not discarded; reset() will clear the arrays.
 if (useObjectPools) {
  ECB_POOL.offer(ecb);
 }
}

@Override
public void returnData(OrcEncodedColumnBatch ecb) {
 for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) {
  if (!ecb.hasData(colIx)) continue;
  ColumnStreamData[] datas = ecb.getColumnData(colIx);
  for (ColumnStreamData data : datas) {
   if (data == null || data.decRef() != 0) continue;
   if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) {
    for (MemoryBuffer buf : data.getCacheBuffers()) {
     LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf);
    }
   }
   bufferManager.decRefBuffers(data.getCacheBuffers());
   if (useObjectPools) {
    CSD_POOL.offer(data);
   }
  }
 }
 // We can offer ECB even with some streams not discarded; reset() will clear the arrays.
 if (useObjectPools) {
  ECB_POOL.offer(ecb);
 }
}

private void processColumnCacheData(LlapSerDeDataBuffer[][][] cacheBuffers,
  OrcEncodedColumnBatch ecb, int colIx) {
 // The column has been obtained from cache.
 LlapSerDeDataBuffer[][] colData = cacheBuffers[colIx];
 if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
  LlapIoImpl.CACHE_LOGGER.trace("Processing cache data for column " + colIx + ": "
    + SerDeLowLevelCacheImpl.toString(colData));
 }
 for (int streamIx = 0; streamIx < colData.length; ++streamIx) {
  if (colData[streamIx] == null) continue;
  ColumnStreamData cb = useObjectPools ? CSD_POOL.take() : new ColumnStreamData();
  cb.incRef();
  cb.setCacheBuffers(Lists.<MemoryBuffer>newArrayList(colData[streamIx]));
  ecb.setStreamData(colIx, streamIx, cb);
 }
}

@Override
public OrcEncodedColumnBatch create() {
 return new OrcEncodedColumnBatch();
}
@Override

@Override
public OrcEncodedColumnBatch create() {
 return new OrcEncodedColumnBatch();
}
@Override

 ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, included.length);
 consumer.consumeData(ecb);
} else {
ecb.init(fileKey, stripeIx, rgIx, included.length);
boolean isRGSelected = true;
for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
 ecb.initOrcColumn(ctx.colIx);
 for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
  StreamContext sctx = ctx.streams[streamIx];
   ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
  } catch (Exception ex) {
   DiskRangeList drl = toRead == null ? null : toRead.next;

ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, physicalFileIncludes.length);
try {
 consumer.consumeData(ecb);
boolean hasErrorForEcb = true;
try {
 ecb.init(fileKey, stripeIx, rgIx, physicalFileIncludes.length);
 for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
  ColumnReadContext ctx = colCtxs[colIx];
   LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
  ecb.initOrcColumn(ctx.colIx);
  trace.logStartCol(ctx.colIx);
  for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
     ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);

ColumnStreamData[] streamBuffers = null;
List<ColumnVector> vectors = null;
if (batch.hasData(columnIndex)) {
 streamBuffers = batch.getColumnData(columnIndex);
} else if (batch.hasVectors(columnIndex)) {
 vectors = batch.getColumnVectors(columnIndex);
} else {
 throw new AssertionError("Batch has no data for " + columnIndex + ": " + batch);

ColumnStreamData[] streamBuffers = null;
List<ColumnVector> vectors = null;
if (batch.hasData(columnIndex)) {
 streamBuffers = batch.getColumnData(columnIndex);
} else if (batch.hasVectors(columnIndex)) {
 vectors = batch.getColumnVectors(columnIndex);
} else {

OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch();
ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
int vectorsIx = 0;
for (int colIx = 0; colIx < writerIncludes.length; ++colIx) {
   LlapIoImpl.LOG.trace("Processing vectors for column " + colIx + ": " + vectors);
  ecb.initColumnWithVectors(colIx, vectors);
 } else {
  ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
  processColumnCacheData(cacheBuffers, ecb, colIx);

public static StructTreeReader createRootTreeReader(TypeDescription[] batchSchemas,
 List<OrcProto.ColumnEncoding> encodings, OrcEncodedColumnBatch batch,
 CompressionCodec codec, Context context, final boolean useDecimal64ColumnVectors) throws IOException {
 // Note: we only look at the schema here to deal with complex types. Somebody has set up the
 //       reader with whatever ideas they had to the schema and we just trust the reader to
 //       produce the CVBs that was asked for. However, we only need to look at top level columns.
 int includedCount = batch.getColumnsWithDataCount();
 if (batchSchemas.length > includedCount) {
  throw new AssertionError("For " + Arrays.toString(batchSchemas) + ", only received "
    + includedCount + " columns");
 }
 TreeReader[] childReaders = new TreeReader[batchSchemas.length];
 for (int i = 0; i < batchSchemas.length; ++i) {
  int batchColIx = batchSchemas[i].getId();
  if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) {
   throw new AssertionError("No data for column " + batchColIx + ": " + batchSchemas[i]);
  }
  childReaders[i] = createEncodedTreeReader(batchSchemas[i], encodings, batch, codec, context, useDecimal64ColumnVectors);
 }
 // TODO: do we actually need this reader? the caller just extracts child readers.
 return StructStreamReader.builder()
   .setColumnIndex(0)
   .setCompressionCodec(codec)
   .setColumnEncoding(encodings.get(0))
   .setChildReaders(childReaders)
   .setContext(context)
   .build();
}

@Override
public void returnData(OrcEncodedColumnBatch ecb) {
 for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) {
  if (!ecb.hasData(colIx)) continue;
  ColumnStreamData[] datas = ecb.getColumnData(colIx);
  for (ColumnStreamData data : datas) {
   if (data == null || data.decRef() != 0) continue;
   if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) {
    for (MemoryBuffer buf : data.getCacheBuffers()) {
     LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf);
    }
   }
   bufferManager.decRefBuffers(data.getCacheBuffers());
   if (useObjectPools) {
    CSD_POOL.offer(data);
   }
  }
 }
 // We can offer ECB even with some streams not discarded; reset() will clear the arrays.
 if (useObjectPools) {
  ECB_POOL.offer(ecb);
 }
}

private void releaseEcbRefCountsOnError(OrcEncodedColumnBatch ecb) {
 try {
  if (isTracingEnabled) {
   LOG.trace("Unlocking the batch not sent to consumer, on error");
  }
  // We cannot send the ecb to consumer. Discard whatever is already there.
  for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) {
   if (!ecb.hasData(colIx)) continue;
   ColumnStreamData[] datas = ecb.getColumnData(colIx);
   for (ColumnStreamData data : datas) {
    if (data == null || data.decRef() != 0) continue;
    for (MemoryBuffer buf : data.getCacheBuffers()) {
     if (buf == null) continue;
     cacheWrapper.releaseBuffer(buf);
    }
   }
  }
 } catch (Throwable t) {
  LOG.error("Error during the cleanup of an error; ignoring", t);
 }
}

private void processColumnCacheData(LlapSerDeDataBuffer[][][] cacheBuffers,
  OrcEncodedColumnBatch ecb, int colIx) {
 // The column has been obtained from cache.
 LlapSerDeDataBuffer[][] colData = cacheBuffers[colIx];
 if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
  LlapIoImpl.CACHE_LOGGER.trace("Processing cache data for column " + colIx + ": "
    + SerDeLowLevelCacheImpl.toString(colData));
 }
 for (int streamIx = 0; streamIx < colData.length; ++streamIx) {
  if (colData[streamIx] == null) continue;
  ColumnStreamData cb = useObjectPools ? CSD_POOL.take() : new ColumnStreamData();
  cb.incRef();
  cb.setCacheBuffers(Lists.<MemoryBuffer>newArrayList(colData[streamIx]));
  ecb.setStreamData(colIx, streamIx, cb);
 }
}

 public List<ColumnVector> getColumnVectors(int colIx) {
  if (!hasVectors(colIx)) throw new AssertionError("No data for column " + colIx);
  return columnVectors[colIx];
 }
}

Javadoc

Implementation of EncodedColumnBatch for ORC.

Most used methods

Popular in Java

Running tasks concurrently on multiple threads
addToBackStack (FragmentTransaction)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
findViewById (Activity)
PrintStream (java.io)
Fake signature of an existing Java class.
Socket (java.net)
Provides a client-side TCP socket.
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Top plugins for Android Studio

How to useReader$OrcEncodedColumnBatch in org.apache.hadoop.hive.ql.io.orc.encoded

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.encoded.Reader$OrcEncodedColumnBatch (Showing top 20 results out of 315)

How to use
Reader$OrcEncodedColumnBatch
in
org.apache.hadoop.hive.ql.io.orc.encoded