OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch(); ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length); ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS); if (!hasAllData && splitIncludes[colIx]) { cb.incRef(); cb.setCacheBuffers(stream.data); ecb.setStreamData(colIx, streamIx, cb);
@Override public void returnData(OrcEncodedColumnBatch ecb) { for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) { if (!ecb.hasData(colIx)) continue; // TODO: reuse columnvector-s on hasBatch - save the array by column? take apart each list. ColumnStreamData[] datas = ecb.getColumnData(colIx); for (ColumnStreamData data : datas) { if (data == null || data.decRef() != 0) continue; if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) { for (MemoryBuffer buf : data.getCacheBuffers()) { LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf); } } bufferManager.decRefBuffers(data.getCacheBuffers()); if (useObjectPools) { CSD_POOL.offer(data); } } } // We can offer ECB even with some streams not discarded; reset() will clear the arrays. if (useObjectPools) { ECB_POOL.offer(ecb); } }
for (int childIx = 0; childIx < childCount; ++childIx) { int batchColIx = children.get(childIx).getId(); if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) { if (LOG.isDebugEnabled()) { LOG.debug("Column at " + childIx + " " + children.get(childIx).getId() for (int schemaChildIx = 0, inclChildIx = -1; schemaChildIx < childCount; ++schemaChildIx) { int batchColIx = children.get(schemaChildIx).getId(); if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) continue; childReaders[++inclChildIx] = createEncodedTreeReader( schema.getChildren().get(schemaChildIx), encodings, batch, codec, context);
OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch(); ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length); ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS); if (!hasAllData && splitIncludes[colIx]) { cb.incRef(); cb.setCacheBuffers(stream.data); ecb.setStreamData(colIx, streamIx, cb);
OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch(); ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length); int vectorsIx = 0; for (int colIx = 0; colIx < writerIncludes.length; ++colIx) { LlapIoImpl.LOG.trace("Processing vectors for column " + colIx + ": " + vectors); ecb.initColumnWithVectors(colIx, vectors); } else { ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS); processColumnCacheData(cacheBuffers, ecb, colIx);
@Override public void returnData(OrcEncodedColumnBatch ecb) { for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) { if (!ecb.hasData(colIx)) continue; // TODO: reuse columnvector-s on hasBatch - save the array by column? take apart each list. ColumnStreamData[] datas = ecb.getColumnData(colIx); for (ColumnStreamData data : datas) { if (data == null || data.decRef() != 0) continue; if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) { for (MemoryBuffer buf : data.getCacheBuffers()) { LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf); } } bufferManager.decRefBuffers(data.getCacheBuffers()); if (useObjectPools) { CSD_POOL.offer(data); } } } // We can offer ECB even with some streams not discarded; reset() will clear the arrays. if (useObjectPools) { ECB_POOL.offer(ecb); } }
@Override public void returnData(OrcEncodedColumnBatch ecb) { for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) { if (!ecb.hasData(colIx)) continue; ColumnStreamData[] datas = ecb.getColumnData(colIx); for (ColumnStreamData data : datas) { if (data == null || data.decRef() != 0) continue; if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) { for (MemoryBuffer buf : data.getCacheBuffers()) { LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf); } } bufferManager.decRefBuffers(data.getCacheBuffers()); if (useObjectPools) { CSD_POOL.offer(data); } } } // We can offer ECB even with some streams not discarded; reset() will clear the arrays. if (useObjectPools) { ECB_POOL.offer(ecb); } }
private void processColumnCacheData(LlapSerDeDataBuffer[][][] cacheBuffers, OrcEncodedColumnBatch ecb, int colIx) { // The column has been obtained from cache. LlapSerDeDataBuffer[][] colData = cacheBuffers[colIx]; if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) { LlapIoImpl.CACHE_LOGGER.trace("Processing cache data for column " + colIx + ": " + SerDeLowLevelCacheImpl.toString(colData)); } for (int streamIx = 0; streamIx < colData.length; ++streamIx) { if (colData[streamIx] == null) continue; ColumnStreamData cb = useObjectPools ? CSD_POOL.take() : new ColumnStreamData(); cb.incRef(); cb.setCacheBuffers(Lists.<MemoryBuffer>newArrayList(colData[streamIx])); ecb.setStreamData(colIx, streamIx, cb); } }
@Override public OrcEncodedColumnBatch create() { return new OrcEncodedColumnBatch(); } @Override
@Override public OrcEncodedColumnBatch create() { return new OrcEncodedColumnBatch(); } @Override
ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, included.length); consumer.consumeData(ecb); } else { ecb.init(fileKey, stripeIx, rgIx, included.length); boolean isRGSelected = true; for (int colIx = 0; colIx < colCtxs.length; ++colIx) { ecb.initOrcColumn(ctx.colIx); for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) { StreamContext sctx = ctx.streams[streamIx]; ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb); } catch (Exception ex) { DiskRangeList drl = toRead == null ? null : toRead.next;
ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, physicalFileIncludes.length); try { consumer.consumeData(ecb); boolean hasErrorForEcb = true; try { ecb.init(fileKey, stripeIx, rgIx, physicalFileIncludes.length); for (int colIx = 0; colIx < colCtxs.length; ++colIx) { ColumnReadContext ctx = colCtxs[colIx]; LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount); ecb.initOrcColumn(ctx.colIx); trace.logStartCol(ctx.colIx); for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) { ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
ColumnStreamData[] streamBuffers = null; List<ColumnVector> vectors = null; if (batch.hasData(columnIndex)) { streamBuffers = batch.getColumnData(columnIndex); } else if (batch.hasVectors(columnIndex)) { vectors = batch.getColumnVectors(columnIndex); } else { throw new AssertionError("Batch has no data for " + columnIndex + ": " + batch);
ColumnStreamData[] streamBuffers = null; List<ColumnVector> vectors = null; if (batch.hasData(columnIndex)) { streamBuffers = batch.getColumnData(columnIndex); } else if (batch.hasVectors(columnIndex)) { vectors = batch.getColumnVectors(columnIndex); } else {
OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch(); ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length); int vectorsIx = 0; for (int colIx = 0; colIx < writerIncludes.length; ++colIx) { LlapIoImpl.LOG.trace("Processing vectors for column " + colIx + ": " + vectors); ecb.initColumnWithVectors(colIx, vectors); } else { ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS); processColumnCacheData(cacheBuffers, ecb, colIx);
public static StructTreeReader createRootTreeReader(TypeDescription[] batchSchemas, List<OrcProto.ColumnEncoding> encodings, OrcEncodedColumnBatch batch, CompressionCodec codec, Context context, final boolean useDecimal64ColumnVectors) throws IOException { // Note: we only look at the schema here to deal with complex types. Somebody has set up the // reader with whatever ideas they had to the schema and we just trust the reader to // produce the CVBs that was asked for. However, we only need to look at top level columns. int includedCount = batch.getColumnsWithDataCount(); if (batchSchemas.length > includedCount) { throw new AssertionError("For " + Arrays.toString(batchSchemas) + ", only received " + includedCount + " columns"); } TreeReader[] childReaders = new TreeReader[batchSchemas.length]; for (int i = 0; i < batchSchemas.length; ++i) { int batchColIx = batchSchemas[i].getId(); if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) { throw new AssertionError("No data for column " + batchColIx + ": " + batchSchemas[i]); } childReaders[i] = createEncodedTreeReader(batchSchemas[i], encodings, batch, codec, context, useDecimal64ColumnVectors); } // TODO: do we actually need this reader? the caller just extracts child readers. return StructStreamReader.builder() .setColumnIndex(0) .setCompressionCodec(codec) .setColumnEncoding(encodings.get(0)) .setChildReaders(childReaders) .setContext(context) .build(); }
@Override public void returnData(OrcEncodedColumnBatch ecb) { for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) { if (!ecb.hasData(colIx)) continue; ColumnStreamData[] datas = ecb.getColumnData(colIx); for (ColumnStreamData data : datas) { if (data == null || data.decRef() != 0) continue; if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) { for (MemoryBuffer buf : data.getCacheBuffers()) { LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf); } } bufferManager.decRefBuffers(data.getCacheBuffers()); if (useObjectPools) { CSD_POOL.offer(data); } } } // We can offer ECB even with some streams not discarded; reset() will clear the arrays. if (useObjectPools) { ECB_POOL.offer(ecb); } }
private void releaseEcbRefCountsOnError(OrcEncodedColumnBatch ecb) { try { if (isTracingEnabled) { LOG.trace("Unlocking the batch not sent to consumer, on error"); } // We cannot send the ecb to consumer. Discard whatever is already there. for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) { if (!ecb.hasData(colIx)) continue; ColumnStreamData[] datas = ecb.getColumnData(colIx); for (ColumnStreamData data : datas) { if (data == null || data.decRef() != 0) continue; for (MemoryBuffer buf : data.getCacheBuffers()) { if (buf == null) continue; cacheWrapper.releaseBuffer(buf); } } } } catch (Throwable t) { LOG.error("Error during the cleanup of an error; ignoring", t); } }
private void processColumnCacheData(LlapSerDeDataBuffer[][][] cacheBuffers, OrcEncodedColumnBatch ecb, int colIx) { // The column has been obtained from cache. LlapSerDeDataBuffer[][] colData = cacheBuffers[colIx]; if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) { LlapIoImpl.CACHE_LOGGER.trace("Processing cache data for column " + colIx + ": " + SerDeLowLevelCacheImpl.toString(colData)); } for (int streamIx = 0; streamIx < colData.length; ++streamIx) { if (colData[streamIx] == null) continue; ColumnStreamData cb = useObjectPools ? CSD_POOL.take() : new ColumnStreamData(); cb.incRef(); cb.setCacheBuffers(Lists.<MemoryBuffer>newArrayList(colData[streamIx])); ecb.setStreamData(colIx, streamIx, cb); } }
public List<ColumnVector> getColumnVectors(int colIx) { if (!hasVectors(colIx)) throw new AssertionError("No data for column " + colIx); return columnVectors[colIx]; } }