/** * Create an ORC file writer. This is the public interface for creating * writers going forward and new options will only be added to this method. * @param path filename to write to * @param opts the options * @return a new ORC file writer * @throws IOException */ public static Writer createWriter(Path path, WriterOptions opts ) throws IOException { FileSystem fs = opts.getFileSystem() == null ? path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem(); return new WriterImpl(fs, path, opts); }
@Override public void addRow(Object row) throws IOException { int rowId = internalBatch.size++; if (fields != null) { StructObjectInspector soi = (StructObjectInspector) inspector; for(int i=0; i < fields.length; ++i) { setColumn(rowId, internalBatch.cols[i], fields[i].getFieldObjectInspector(), soi.getStructFieldData(row, fields[i])); } } else { setColumn(rowId, internalBatch.cols[0], inspector, row); } if (internalBatch.size == internalBatch.getMaxSize()) { flushInternalBatch(); } }
this.memoryManager = memoryManager; buildIndex = rowIndexStride > 0; codec = createCodec(compress); String allColumns = conf.get(IOConstants.COLUMNS); if (allColumns == null) { allColumns = getColumnNamesFromInspector(inspector); this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize); if (version == OrcFile.Version.V_0_11) { treeWriter = createTreeWriter(inspector, streamFactory, false); if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) { throw new IllegalArgumentException("Row stride must be at least " +
@Override public long writeIntermediateFooter() throws IOException { flushInternalBatch(); return super.writeIntermediateFooter(); }
private int writeFooter(long bodyLength) throws IOException { getStream(); OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder(); builder.setContentLength(bodyLength); builder.setHeaderLength(headerLength); builder.setNumberOfRows(rowCount); builder.setRowIndexStride(rowIndexStride); // populate raw data size rawDataSize = computeRawDataSize(); // serialize the types writeTypes(builder, treeWriter); // add the stripe information for(OrcProto.StripeInformation stripe: stripes) { builder.addStripes(stripe); } // add the column statistics writeFileStatistics(builder, treeWriter); // add all of the user metadata for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) { builder.addMetadata(OrcProto.UserMetadataItem.newBuilder() .setName(entry.getKey()).setValue(entry.getValue())); } long startPosn = rawWriter.getPos(); OrcProto.Footer footer = builder.build(); footer.writeTo(protobufWriter); protobufWriter.flush(); writer.flush(); return (int) (rawWriter.getPos() - startPosn); }
WriterImpl(FileSystem fs, Path path, OrcFile.WriterOptions opts) throws IOException { super(fs, path, opts); this.inspector = opts.getInspector(); this.internalBatch = opts.getSchema().createRowBatch(opts.getBatchSize()); this.fields = initializeFieldsFromOi(inspector); }
for (int c = 0; c < vector.fields.length; ++c) { StructField field = fields.get(c); setColumn(rowId, vector.fields[c], field.getFieldObjectInspector(), oi.getStructFieldData(obj, field)); int tag = oi.getTag(obj); vector.tags[rowId] = tag; setColumn(rowId, vector.fields[tag], oi.getObjectInspectors().get(tag), oi.getField(obj)); break; vector.childCount += length; for (int c = 0; c < length; ++c) { setColumn(offset + c, vector.child, oi.getListElementObjectInspector(), oi.getListElement(obj, c)); for (Object item: map) { Map.Entry pair = (Map.Entry) item; setColumn(offset, vector.keys, oi.getMapKeyObjectInspector(), pair.getKey()); setColumn(offset, vector.values, oi.getMapValueObjectInspector(), pair.getValue()); offset += 1;
@Override public synchronized boolean checkMemory(double newScale) throws IOException { long limit = (long) Math.round(adjustedStripeSize * newScale); long size = estimateStripeSize(); if (LOG.isDebugEnabled()) { LOG.debug("ORC writer " + path + " size = " + size + " limit = " + limit); } if (size > limit) { flushStripe(); return true; } return false; }
private void flushStripe() throws IOException { getStream(); if (buildIndex && rowsInIndex != 0) { createRowIndexEntry();
@Override public void addRow(Object row) throws IOException { synchronized (this) { treeWriter.write(row); rowsInStripe += 1; if (buildIndex) { rowsInIndex += 1; if (rowsInIndex >= rowIndexStride) { createRowIndexEntry(); } } } memoryManager.addedRow(); }
MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, ByteBuffer footerBuffer) throws IOException { this.compressionKind = CompressionKind.valueOf(codecStr); this.bufferSize = bufferSize; this.codec = WriterImpl.createCodec(compressionKind); this.metadataSize = metadataSize; int position = footerBuffer.position(); int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize; footerBuffer.limit(position + metadataSize); InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList( new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize); this.metadata = OrcProto.Metadata.parseFrom(instream); footerBuffer.position(position + metadataSize); footerBuffer.limit(position + metadataSize + footerBufferSize); instream = InStream.create("footer", Lists.<DiskRange>newArrayList( new BufferChunk(footerBuffer, 0)), footerBufferSize, codec, bufferSize); this.footer = OrcProto.Footer.parseFrom(instream); footerBuffer.position(position); this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList()); } }
@Override public void close() throws IOException { flushInternalBatch(); super.close(); } }
WriterImpl(FileSystem fs, Path path, OrcFile.WriterOptions opts) throws IOException { super(fs, path, opts); this.inspector = opts.getInspector(); boolean useDecimal64ColumnVectors = opts.getConfiguration() != null && HiveConf.getVar(opts.getConfiguration(), HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED) .equalsIgnoreCase("decimal_64"); if (useDecimal64ColumnVectors) { this.internalBatch = opts.getSchema().createRowBatch(TypeDescription.RowBatchVersion.USE_DECIMAL64, opts.getBatchSize()); } else { this.internalBatch = opts.getSchema().createRowBatch(opts.getBatchSize()); } this.fields = initializeFieldsFromOi(inspector); }
for (int c = 0; c < vector.fields.length; ++c) { StructField field = fields.get(c); setColumn(rowId, vector.fields[c], field.getFieldObjectInspector(), oi.getStructFieldData(obj, field)); int tag = oi.getTag(obj); vector.tags[rowId] = tag; setColumn(rowId, vector.fields[tag], oi.getObjectInspectors().get(tag), oi.getField(obj)); break; vector.childCount += length; for (int c = 0; c < length; ++c) { setColumn(offset + c, vector.child, oi.getListElementObjectInspector(), oi.getListElement(obj, c)); for (Object item: map) { Map.Entry pair = (Map.Entry) item; setColumn(offset, vector.keys, oi.getMapKeyObjectInspector(), pair.getKey()); setColumn(offset, vector.values, oi.getMapValueObjectInspector(), pair.getValue()); offset += 1;
@Override public void addRow(Object row) throws IOException { int rowId = internalBatch.size++; if (fields != null) { StructObjectInspector soi = (StructObjectInspector) inspector; for(int i=0; i < fields.length; ++i) { setColumn(rowId, internalBatch.cols[i], fields[i].getFieldObjectInspector(), soi.getStructFieldData(row, fields[i])); } } else { setColumn(rowId, internalBatch.cols[0], inspector, row); } if (internalBatch.size == internalBatch.getMaxSize()) { flushInternalBatch(); } }
/** * Create an ORC file writer. This is the public interface for creating * writers going forward and new options will only be added to this method. * @param path filename to write to * @param opts the options * @return a new ORC file writer * @throws IOException */ public static Writer createWriter(Path path, WriterOptions opts ) throws IOException { FileSystem fs = opts.getFileSystem() == null ? path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem(); return new WriterImpl(fs, path, opts); }
@Override public void addRowBatch(VectorizedRowBatch batch) throws IOException { flushInternalBatch(); super.addRowBatch(batch); }
/** * Create an ORC file writer. This is the public interface for creating * writers going forward and new options will only be added to this method. * @param path filename to write to * @param opts the options * @return a new ORC file writer * @throws IOException */ public static Writer createWriter(Path path, WriterOptions opts ) throws IOException { FileSystem fs = opts.fileSystemValue == null ? path.getFileSystem(opts.configuration) : opts.fileSystemValue; return new WriterImpl(fs, path, opts.configuration, opts.inspectorValue, opts.stripeSizeValue, opts.compressValue, opts.bufferSizeValue, opts.rowIndexStrideValue, opts.memoryManagerValue, opts.blockPaddingValue, opts.versionValue, opts.callback, opts.encodingStrategy, opts.compressionStrategy, opts.paddingTolerance, opts.blockSizeValue, opts.bloomFilterColumns, opts.bloomFilterFpp); }
@Override public long writeIntermediateFooter() throws IOException { flushInternalBatch(); return super.writeIntermediateFooter(); }
@Override public void addRowBatch(VectorizedRowBatch batch) throws IOException { flushInternalBatch(); super.addRowBatch(batch); }