public OrcColumnVectorProducer(MetadataCache metadataCache, LowLevelCache lowLevelCache, BufferUsageManager bufferManager, Configuration conf, LlapDaemonCacheMetrics cacheMetrics, LlapDaemonIOMetrics ioMetrics, FixedSizedObjectPool<IoTrace> tracePool) { LlapIoImpl.LOG.info("Initializing ORC column vector producer"); this.metadataCache = metadataCache; this.lowLevelCache = lowLevelCache; this.bufferManager = bufferManager; this.conf = conf; this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); this.cacheMetrics = cacheMetrics; this.ioMetrics = ioMetrics; this.tracePool = tracePool; }
.schema(schema) .range(offsetAndLength.f0, offsetAndLength.f1) .useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf)) .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf)) .tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
boolean useZeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(daemonConf); if (useZeroCopy != OrcConf.USE_ZEROCOPY.getBoolean(jobConf)) { jobConf = new Configuration(jobConf); jobConf.setBoolean(OrcConf.USE_ZEROCOPY.getAttribute(), useZeroCopy);
private void ensureRawDataReader(boolean isOpen) throws IOException { ensureOrcReader(); if (rawDataReader != null) { if (!isRawDataReaderOpen && isOpen) { long startTime = counters.startTimeCounter(); rawDataReader.open(); counters.incrWallClockCounter(LlapIOCounters.HDFS_TIME_NS, startTime); } return; } long startTime = counters.startTimeCounter(); boolean useZeroCopy = (daemonConf != null) && OrcConf.USE_ZEROCOPY.getBoolean(daemonConf); rawDataReader = RecordReaderUtils.createDefaultDataReader( DataReaderProperties.builder().withBufferSize(orcReader.getCompressionSize()) .withCompression(orcReader.getCompressionKind()) .withFileSystem(fs).withPath(path) .withTypeCount(orcReader.getSchema().getMaximumId() + 1) .withZeroCopy(useZeroCopy) .build()); if (isOpen) { rawDataReader.open(); isRawDataReaderOpen = true; } counters.incrWallClockCounter(LlapIOCounters.HDFS_TIME_NS, startTime); }
boolean addBlockPadding = OrcConf.BLOCK_PADDING.getBoolean(conf);
public Options(Configuration conf) { useZeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(conf); skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf); forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf); isSchemaEvolutionCaseAware = OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf); }
public boolean getBoolean(Configuration conf) { return getBoolean(null, conf); }
public OrcColumnVectorProducer(MetadataCache metadataCache, LowLevelCache lowLevelCache, BufferUsageManager bufferManager, Configuration conf, LlapDaemonCacheMetrics cacheMetrics, LlapDaemonIOMetrics ioMetrics, FixedSizedObjectPool<IoTrace> tracePool) { LlapIoImpl.LOG.info("Initializing ORC column vector producer"); this.metadataCache = metadataCache; this.lowLevelCache = lowLevelCache; this.bufferManager = bufferManager; this.conf = conf; this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); this.cacheMetrics = cacheMetrics; this.ioMetrics = ioMetrics; this.tracePool = tracePool; }
boolean useZeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(hiveConf); logger.info("ORC Zero-Copy {}.", useZeroCopy ? "enabled" : "disabled");
boolean useZeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(hiveConf); if (useZeroCopy) { logger.warn("ORC zero-copy feature has been manually enabled. This is not recommended.");
Reader.Options options = reader.options() .range(start, length) .useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf)) .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf)) .tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf)); if (schema != null) { options.schema(schema);
protected WriterOptions(Properties tableProperties, Configuration conf) { configuration = conf; memoryManagerValue = getStaticMemoryManager(conf); overwrite = OrcConf.OVERWRITE_OUTPUT_FILE.getBoolean(tableProperties, conf); stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf); blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf); conf); blockPaddingValue = OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf); compressValue = CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties, conf).toUpperCase()); enforceBufferSize = OrcConf.ENFORCE_COMPRESSION_BUFFER_SIZE.getBoolean(tableProperties, conf); String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties, conf); shims = HadoopShimsFactory.get(); writeVariableLengthBlocks = OrcConf.WRITE_VARIABLE_LENGTH_BLOCKS.getBoolean(tableProperties,conf); directEncodingColumns = OrcConf.DIRECT_ENCODING_COLUMNS.getString( tableProperties, conf);
if (readerClass == HiveOrcReader.class) { if (OrcConf.USE_ZEROCOPY.getBoolean(configuration)) { if (!NativeCodeLoader.isNativeCodeLoaded()) { throw UserException.dataReadError()
StringBaseTreeWriter(int columnId, TypeDescription schema, WriterContext writer, boolean nullable) throws IOException { super(columnId, schema, writer, nullable); this.isDirectV2 = isNewWriteFormat(writer); directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA); stringOutput = writer.createStream(id, OrcProto.Stream.Kind.DICTIONARY_DATA); lengthOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); rowOutput = createIntegerWriter(directStreamOutput, false, isDirectV2, writer); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } rowIndexValueCount.add(0L); buildIndex = writer.buildIndex(); Configuration conf = writer.getConfiguration(); dictionaryKeySizeThreshold = writer.getDictionaryKeySizeThreshold(columnId); strideDictionaryCheck = OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf); if (dictionaryKeySizeThreshold <= 0.0) { useDictionaryEncoding = false; doneDictionaryCheck = true; } else { doneDictionaryCheck = false; } }
StringBaseTreeWriter(int columnId, TypeDescription schema, StreamFactory writer, boolean nullable) throws IOException { super(columnId, schema, writer, nullable); this.isDirectV2 = isNewWriteFormat(writer); stringOutput = writer.createStream(id, OrcProto.Stream.Kind.DICTIONARY_DATA); lengthOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); rowOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.DATA), false, isDirectV2, writer); recordPosition(rowIndexPosition); rowIndexValueCount.add(0L); buildIndex = writer.buildIndex(); directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA); directLengthOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); Configuration conf = writer.getConfiguration(); dictionaryKeySizeThreshold = org.apache.orc.OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf); strideDictionaryCheck = org.apache.orc.OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf); doneDictionaryCheck = false; }
StringBaseTreeWriter(int columnId, TypeDescription schema, StreamFactory writer, boolean nullable) throws IOException { super(columnId, schema, writer, nullable); this.isDirectV2 = isNewWriteFormat(writer); stringOutput = writer.createStream(id, OrcProto.Stream.Kind.DICTIONARY_DATA); lengthOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); rowOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.DATA), false, isDirectV2, writer); recordPosition(rowIndexPosition); rowIndexValueCount.add(0L); buildIndex = writer.buildIndex(); directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA); directLengthOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); Configuration conf = writer.getConfiguration(); dictionaryKeySizeThreshold = org.apache.orc.OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf); strideDictionaryCheck = org.apache.orc.OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf); doneDictionaryCheck = false; }
protected WriterOptions(Properties tableProperties, Configuration conf) { configuration = conf; memoryManagerValue = getStaticMemoryManager(conf); stripeSizeValue = org.apache.orc.OrcConf.STRIPE_SIZE.getLong(tableProperties, conf); blockSizeValue = org.apache.orc.OrcConf.BLOCK_SIZE.getLong(tableProperties, conf); rowIndexStrideValue = (int) org.apache.orc.OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf); bufferSizeValue = (int) org.apache.orc.OrcConf.BUFFER_SIZE.getLong(tableProperties, conf); blockPaddingValue = org.apache.orc.OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf); compressValue = CompressionKind.valueOf(org.apache.orc.OrcConf.COMPRESS.getString(tableProperties, conf)); String versionName = org.apache.orc.OrcConf.WRITE_FORMAT.getString(tableProperties, conf); versionValue = Version.byName(versionName); String enString = org.apache.orc.OrcConf.ENCODING_STRATEGY.getString(tableProperties, conf); encodingStrategy = EncodingStrategy.valueOf(enString); String compString = org.apache.orc.OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf); compressionStrategy = CompressionStrategy.valueOf(compString); paddingTolerance = org.apache.orc.OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf); bloomFilterColumns = org.apache.orc.OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties, conf); bloomFilterFpp = org.apache.orc.OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties, conf); }
protected WriterOptions(Properties tableProperties, Configuration conf) { configuration = conf; memoryManagerValue = getStaticMemoryManager(conf); stripeSizeValue = org.apache.orc.OrcConf.STRIPE_SIZE.getLong(tableProperties, conf); blockSizeValue = org.apache.orc.OrcConf.BLOCK_SIZE.getLong(tableProperties, conf); rowIndexStrideValue = (int) org.apache.orc.OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf); bufferSizeValue = (int) org.apache.orc.OrcConf.BUFFER_SIZE.getLong(tableProperties, conf); blockPaddingValue = org.apache.orc.OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf); compressValue = CompressionKind.valueOf(org.apache.orc.OrcConf.COMPRESS.getString(tableProperties, conf)); String versionName = org.apache.orc.OrcConf.WRITE_FORMAT.getString(tableProperties, conf); versionValue = Version.byName(versionName); String enString = org.apache.orc.OrcConf.ENCODING_STRATEGY.getString(tableProperties, conf); encodingStrategy = EncodingStrategy.valueOf(enString); String compString = org.apache.orc.OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf); compressionStrategy = CompressionStrategy.valueOf(compString); paddingTolerance = org.apache.orc.OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf); bloomFilterColumns = org.apache.orc.OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties, conf); bloomFilterFpp = org.apache.orc.OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties, conf); }
private void ensureRawDataReader(boolean isOpen) throws IOException { ensureOrcReader(); if (rawDataReader != null) { if (!isRawDataReaderOpen && isOpen) { long startTime = counters.startTimeCounter(); rawDataReader.open(); counters.incrTimeCounter(LlapIOCounters.HDFS_TIME_NS, startTime); } return; } long startTime = counters.startTimeCounter(); boolean useZeroCopy = (daemonConf != null) && OrcConf.USE_ZEROCOPY.getBoolean(daemonConf); rawDataReader = RecordReaderUtils.createDefaultDataReader( DataReaderProperties.builder().withBufferSize(orcReader.getCompressionSize()) .withCompression(orcReader.getCompressionKind()) .withFileSystem(fs).withPath(path) .withTypeCount(orcReader.getSchema().getMaximumId() + 1) .withZeroCopy(useZeroCopy) .build()); if (isOpen) { rawDataReader.open(); isRawDataReaderOpen = true; } counters.incrTimeCounter(LlapIOCounters.HDFS_TIME_NS, startTime); }
/** * This function builds the options for the ORC Writer based on the JobConf. * @param conf the job configuration * @return a new options object */ public static OrcFile.WriterOptions buildOptions(Configuration conf) { return OrcFile.writerOptions(conf) .version(OrcFile.Version.byName(OrcConf.WRITE_FORMAT.getString(conf))) .setSchema(TypeDescription.fromString(OrcConf.MAPRED_OUTPUT_SCHEMA .getString(conf))) .compress(CompressionKind.valueOf(OrcConf.COMPRESS.getString(conf))) .encodingStrategy(OrcFile.EncodingStrategy.valueOf (OrcConf.ENCODING_STRATEGY.getString(conf))) .bloomFilterColumns(OrcConf.BLOOM_FILTER_COLUMNS.getString(conf)) .bloomFilterFpp(OrcConf.BLOOM_FILTER_FPP.getDouble(conf)) .blockSize(OrcConf.BLOCK_SIZE.getLong(conf)) .blockPadding(OrcConf.BLOCK_PADDING.getBoolean(conf)) .stripeSize(OrcConf.STRIPE_SIZE.getLong(conf)) .rowIndexStride((int) OrcConf.ROW_INDEX_STRIDE.getLong(conf)) .bufferSize((int) OrcConf.BUFFER_SIZE.getLong(conf)) .paddingTolerance(OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(conf)); }