/** * Create an object of OrcStruct given a TypeInfo and a list of objects * * @param typeInfo The TypeInfo object representing the ORC record schema * @param objs ORC objects/Writables * @return an OrcStruct containing the specified objects for the specified schema */ public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) { SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct .createObjectInspector(typeInfo); List<StructField> fields = (List<StructField>) oi.getAllStructFieldRefs(); OrcStruct result = (OrcStruct) oi.create(); result.setNumFields(fields.size()); for (int i = 0; i < fields.size(); i++) { oi.setStructFieldData(result, fields.get(i), objs[i]); } return result; }
private static DataSize writeRcFileColumnOld(File outputFile, Format format, Compression compression, Type type, Iterator<?> values) throws Exception { ObjectInspector columnObjectInspector = getJavaObjectInspector(type); RecordWriter recordWriter = createRcFileWriterOld(outputFile, compression, columnObjectInspector); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); @SuppressWarnings("deprecation") Serializer serializer = format.createSerializer(); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", "test"); tableProperties.setProperty("columns.types", objectInspector.getTypeName()); serializer.initialize(new JobConf(false), tableProperties); while (values.hasNext()) { Object value = values.next(); value = preprocessWriteValueOld(type, value); objectInspector.setStructFieldData(row, fields.get(0), value); Writable record = serializer.serialize(row, objectInspector); recordWriter.write(record); } recordWriter.close(false); return new DataSize(outputFile.length(), BYTE).convertToMostSuccinctDataSize(); }
private Object convertStruct(Object struct, ObjectInspector inspector) { SettableStructObjectInspector structOI = (SettableStructObjectInspector) inspector; Object result = structOI.create(); for (StructField field : structOI.getAllStructFieldRefs()) { Object value = structOI.getStructFieldData(struct, field); structOI.setStructFieldData(result, field, convert(value, field.getFieldObjectInspector())); } return result; }
@Override public Object setValue(Object row, ColumnVector column, int columnRow) throws HiveException { // NULLs are handled by each individual base writer setter // We could handle NULLs centrally here but that would result in spurious allocs Object fieldValue = this.settableObjInspector.getStructFieldData(row, fieldRef); fieldValue = baseWriter.setValue(fieldValue, column, columnRow); return this.settableObjInspector.setStructFieldData(row, fieldRef, fieldValue); }
@Override public void setField(Block block, int position) { value.set(getHiveDecimal(decimalType, block, position)); rowInspector.setStructFieldData(row, field, value); } }
public StructConverter(ObjectInspector inputOI, SettableStructObjectInspector outputOI) { if (inputOI instanceof StructObjectInspector) { this.inputOI = (StructObjectInspector)inputOI; this.outputOI = outputOI; inputFields = this.inputOI.getAllStructFieldRefs(); outputFields = outputOI.getAllStructFieldRefs(); // If the output has some extra fields, set them to NULL. int minFields = Math.min(inputFields.size(), outputFields.size()); fieldConverters = new ArrayList<Converter>(minFields); for (int f = 0; f < minFields; f++) { fieldConverters.add(getConverter(inputFields.get(f) .getFieldObjectInspector(), outputFields.get(f) .getFieldObjectInspector())); } output = outputOI.create(); } else if (!(inputOI instanceof VoidObjectInspector)) { throw new RuntimeException("Hive internal error: conversion of " + inputOI.getTypeName() + " to " + outputOI.getTypeName() + "not supported yet."); } }
public StructConverter(StructObjectInspector inputOI, SettableStructObjectInspector outputOI) { this.inputOI = inputOI; this.outputOI = outputOI; inputFields = inputOI.getAllStructFieldRefs(); outputFields = outputOI.getAllStructFieldRefs(); assert (inputFields.size() == outputFields.size()); fieldConverters = new ArrayList<Converter>(inputFields.size()); for (int f = 0; f < inputFields.size(); f++) { fieldConverters.add(getConverter(inputFields.get(f) .getFieldObjectInspector(), outputFields.get(f) .getFieldObjectInspector())); } output = outputOI.create(); }
@Override public Object convert(Object input) { if (input == null) { return null; } Object output = outputOI.create(); int minFields = Math.min(inputFields.size(), outputFields.size()); // Convert the fields for (int f = 0; f < minFields; f++) { Object inputFieldValue = inputOI.getStructFieldData(input, inputFields.get(f)); Object outputFieldValue = fieldConverters.get(f).convert(inputFieldValue); outputOI.setStructFieldData(output, outputFields.get(f), outputFieldValue); } // set the extra fields to null for (int f = minFields; f < outputFields.size(); f++) { outputOI.setStructFieldData(output, outputFields.get(f), null); } return output; } }
/** * Note that all column indexes are with respect to your record structure, not the Hive table structure. Bucket column * indexes must be presented in the same order as they are in the Hive table definition. */ public BucketIdResolverImpl(ObjectInspector objectInspector, int recordIdColumn, int totalBuckets, int[] bucketColumns) { this.totalBuckets = totalBuckets; if (!(objectInspector instanceof SettableStructObjectInspector)) { throw new IllegalArgumentException("Serious problem, expected a StructObjectInspector, " + "but got a " + objectInspector.getClass().getName()); } if (bucketColumns.length < 1) { throw new IllegalArgumentException("No bucket column indexes set."); } structObjectInspector = (SettableStructObjectInspector) objectInspector; List<? extends StructField> structFields = structObjectInspector.getAllStructFieldRefs(); recordIdentifierField = structFields.get(recordIdColumn); bucketFields = new StructField[bucketColumns.length]; for (int i = 0; i < bucketColumns.length; i++) { int bucketColumnsIndex = bucketColumns[i]; bucketFields[i] = structFields.get(bucketColumnsIndex); } }
public StructConverter(ObjectInspector inputOI, SettableStructObjectInspector outputOI) { if (inputOI instanceof StructObjectInspector) { this.inputOI = (StructObjectInspector) inputOI; this.outputOI = outputOI; inputFields = this.inputOI.getAllStructFieldRefs(); outputFields = outputOI.getAllStructFieldRefs(); // If the output has some extra fields, set them to NULL. int minFields = Math.min(inputFields.size(), outputFields.size()); fieldConverters = new ArrayList<Converter>(minFields); for (int f = 0; f < minFields; f++) { fieldConverters.add(getConverter(inputFields.get(f).getFieldObjectInspector(), outputFields.get(f).getFieldObjectInspector())); } } else if (!(inputOI instanceof VoidObjectInspector)) { throw new UnsupportedOperationException( "Hive internal error: conversion of " + inputOI.getTypeName() + " to " + outputOI.getTypeName() + "not supported yet."); } }
@Override public Object initValue(Object ignored) { return ((SettableStructObjectInspector) this.objectInspector).create(); }
@Override protected Object toJavaObjectInternal(Object value) throws UnexpectedTypeException { OrcStruct struct = (OrcStruct) value; List<Object> result = new ArrayList<>(struct.getNumFields()); int i = 0; for (StructField field : inspector.getAllStructFieldRefs()) { result.add(converters.get(i).toJavaObject(inspector.getStructFieldData(struct, field))); i++; } return result; }
public VectorExpressionWriter init(SettableStructObjectInspector objInspector) throws HiveException { super.init(objInspector); obj = initValue(null); vectorExtractRow = new VectorExtractRow(); structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName()); return this; }
@Override public int computeBucketId(Object record) { Object[] bucketFieldValues = new Object[bucketFields.length]; ObjectInspector[] bucketFiledInspectors = new ObjectInspector[bucketFields.length]; for (int columnIndex = 0; columnIndex < bucketFields.length; columnIndex++) { bucketFieldValues[columnIndex] = structObjectInspector.getStructFieldData(record, bucketFields[columnIndex]); bucketFiledInspectors[columnIndex] = bucketFields[columnIndex].getFieldObjectInspector(); } return ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketFiledInspectors, totalBuckets); }
@Override public Object initValue(Object struct) throws HiveException { Object initValue = this.baseWriter.initValue(null); this.settableObjInspector.setStructFieldData(struct, fieldRef, initValue); return struct; } }
public StructConverter(ObjectInspector inputOI, SettableStructObjectInspector outputOI) { if (inputOI instanceof StructObjectInspector) { this.inputOI = (StructObjectInspector)inputOI; this.outputOI = outputOI; inputFields = this.inputOI.getAllStructFieldRefs(); outputFields = outputOI.getAllStructFieldRefs(); // If the output has some extra fields, set them to NULL. int minFields = Math.min(inputFields.size(), outputFields.size()); fieldConverters = new ArrayList<Converter>(minFields); for (int f = 0; f < minFields; f++) { fieldConverters.add(getConverter(inputFields.get(f) .getFieldObjectInspector(), outputFields.get(f) .getFieldObjectInspector())); } output = outputOI.create(); } else if (!(inputOI instanceof VoidObjectInspector)) { throw new RuntimeException("Hive internal error: conversion of " + inputOI.getTypeName() + " to " + outputOI.getTypeName() + "not supported yet."); } }
@VisibleForTesting OrcFileWriter(List<Long> columnIds, List<Type> columnTypes, File target, boolean writeMetadata) { this.columnTypes = ImmutableList.copyOf(requireNonNull(columnTypes, "columnTypes is null")); checkArgument(columnIds.size() == columnTypes.size(), "ids and types mismatch"); checkArgument(isUnique(columnIds), "ids must be unique"); List<StorageType> storageTypes = ImmutableList.copyOf(toStorageTypes(columnTypes)); Iterable<String> hiveTypeNames = storageTypes.stream().map(StorageType::getHiveTypeName).collect(toList()); List<String> columnNames = ImmutableList.copyOf(transform(columnIds, toStringFunction())); Properties properties = new Properties(); properties.setProperty(META_TABLE_COLUMNS, Joiner.on(',').join(columnNames)); properties.setProperty(META_TABLE_COLUMN_TYPES, Joiner.on(':').join(hiveTypeNames)); serializer = createSerializer(properties); recordWriter = createRecordWriter(new Path(target.toURI()), columnIds, columnTypes, writeMetadata); tableInspector = getStandardStructObjectInspector(columnNames, getJavaObjectInspectors(storageTypes)); structFields = ImmutableList.copyOf(tableInspector.getAllStructFieldRefs()); orcRow = tableInspector.create(); }
@Override public Object convert(Object input) { if (input == null) { return null; } Object output = outputOI.create(); int minFields = Math.min(inputFields.size(), outputFields.size()); // Convert the fields for (int f = 0; f < minFields; f++) { Object inputFieldValue = inputOI.getStructFieldData(input, inputFields.get(f)); Object outputFieldValue = fieldConverters.get(f).convert(inputFieldValue); outputOI.setStructFieldData(output, outputFields.get(f), outputFieldValue); } // set the extra fields to null for (int f = minFields; f < outputFields.size(); f++) { outputOI.setStructFieldData(output, outputFields.get(f), null); } return output; } }
@Override public Object setValue(Object row, ColumnVector column, int columnRow) throws HiveException { // NULLs are handled by each individual base writer setter // We could handle NULLs centrally here but that would result in spurious allocs Object fieldValue = this.settableObjInspector.getStructFieldData(row, fieldRef); fieldValue = baseWriter.setValue(fieldValue, column, columnRow); return this.settableObjInspector.setStructFieldData(row, fieldRef, fieldValue); }
public static VectorExpressionWriter[] getSettableExpressionWriters( SettableStructObjectInspector objInspector) throws HiveException { List<? extends StructField> fieldsRef = objInspector.getAllStructFieldRefs(); VectorExpressionWriter[] writers = new VectorExpressionWriter[fieldsRef.size()]; for(int i=0; i<writers.length; ++i) { StructField fieldRef = fieldsRef.get(i); VectorExpressionWriter baseWriter = genVectorExpressionWritable( fieldRef.getFieldObjectInspector()); writers[i] = genVectorExpressionWritable(objInspector, fieldRef, baseWriter); } return writers; }