final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue(); final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue()); final AtomicReference<Schema> hiveAvroSchema = new AtomicReference<>(null); final AtomicInteger totalRecordCount = new AtomicInteger(0);
@Override public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema) throws IOException, SchemaNotFoundException { final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue(); final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue()); final boolean normalizeForHive = context.getProperty(HIVE_FIELD_NAMES).asBoolean(); TypeInfo orcSchema = NiFiOrcUtils.getOrcSchema(schema, normalizeForHive); final Writer orcWriter = NiFiOrcUtils.createWriter(path, conf, orcSchema, stripeSize, compressionType, bufferSize); final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet() ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue() : NiFiOrcUtils.normalizeHiveTableName(schema.getIdentifier().getName().orElse("unknown")); final boolean hiveFieldNames = context.getProperty(HIVE_FIELD_NAMES).asBoolean(); return new ORCHDFSRecordWriter(orcWriter, schema, hiveTableName, hiveFieldNames); } }
public OrcStorage(String options) { String[] optsArr = options.split(" "); try { CommandLine configuredOptions = parser.parse(validOptions, optsArr); if (configuredOptions.hasOption('s')) { stripeSize = Long.parseLong(configuredOptions.getOptionValue('s')); } if (configuredOptions.hasOption('r')) { rowIndexStride = Integer.parseInt(configuredOptions.getOptionValue('r')); } if (configuredOptions.hasOption('b')) { bufferSize = Integer.parseInt(configuredOptions.getOptionValue('b')); } blockPadding = configuredOptions.hasOption('p'); if (configuredOptions.hasOption('c')) { compress = CompressionKind.valueOf(configuredOptions.getOptionValue('c')); } if (configuredOptions.hasOption('v')) { version = Version.byName(configuredOptions.getOptionValue('v')); } } catch (ParseException e) { log.error("Exception in OrcStorage", e); log.error("OrcStorage called with arguments " + options); warn("ParseException in OrcStorage", PigWarning.UDF_WARNING_1); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("OrcStorage(',', '[options]')", validOptions); throw new RuntimeException(e); } } @Override
MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, ByteBuffer footerBuffer) throws IOException { this.compressionKind = CompressionKind.valueOf(codecStr); this.bufferSize = bufferSize; this.codec = WriterImpl.createCodec(compressionKind); this.metadataSize = metadataSize; int position = footerBuffer.position(); int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize; footerBuffer.limit(position + metadataSize); InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList( new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize); this.metadata = OrcProto.Metadata.parseFrom(instream); footerBuffer.position(position + metadataSize); footerBuffer.limit(position + metadataSize + footerBufferSize); instream = InStream.create("footer", Lists.<DiskRange>newArrayList( new BufferChunk(footerBuffer, 0)), footerBufferSize, codec, bufferSize); this.footer = OrcProto.Footer.parseFrom(instream); footerBuffer.position(position); this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList()); } }
final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue(); final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue()); final AtomicReference<Schema> hiveAvroSchema = new AtomicReference<>(null); final AtomicInteger totalRecordCount = new AtomicInteger(0);
private List<String> writeSmallOrcFiles(Location baseLocation, int numInputFiles) throws IOException { TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct<key:string>"); ObjectInspector objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); Configuration hConf = new Configuration(); FileSystem fileSystem = FileSystem.get(hConf); long stripeSize = HiveConf.getLongVar(hConf, HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE); CompressionKind compressionKind = CompressionKind.valueOf(HiveConf.getVar(hConf, HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS)); int bufferSize = HiveConf.getIntVar(hConf, HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE); int rowIndexStride = HiveConf.getIntVar(hConf, HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE); List<String> writtenData = new ArrayList<>(); for (int i = 0; i < numInputFiles; i++) { Location childFile = baseLocation.append("child_" + i); Writer orcWriter = OrcFile.createWriter(fileSystem, new Path(childFile.toURI()), hConf, objectInspector, stripeSize, compressionKind, bufferSize, rowIndexStride); try { String toWrite = "outputData" + i; orcWriter.addRow(Collections.singletonList(toWrite)); writtenData.add(toWrite); } finally { orcWriter.close(); } } Collections.sort(writtenData); return writtenData; }
bufferSizeValue = HiveConf.getIntVar(conf, HIVE_ORC_DEFAULT_BUFFER_SIZE); blockPaddingValue = HiveConf.getBoolVar(conf, HIVE_ORC_DEFAULT_BLOCK_PADDING); compressValue = CompressionKind.valueOf(HiveConf.getVar(conf, HIVE_ORC_DEFAULT_COMPRESS)); String versionName = HiveConf.getVar(conf, HIVE_ORC_WRITE_FORMAT); if (versionName == null) {
options.compress(CompressionKind.valueOf(propVal));