/** * Note that all column indexes are with respect to your record structure, not the Hive table structure. Bucket column * indexes must be presented in the same order as they are in the Hive table definition. */ public BucketIdResolverImpl(ObjectInspector objectInspector, int recordIdColumn, int totalBuckets, int[] bucketColumns) { this.totalBuckets = totalBuckets; if (!(objectInspector instanceof SettableStructObjectInspector)) { throw new IllegalArgumentException("Serious problem, expected a StructObjectInspector, " + "but got a " + objectInspector.getClass().getName()); } if (bucketColumns.length < 1) { throw new IllegalArgumentException("No bucket column indexes set."); } structObjectInspector = (SettableStructObjectInspector) objectInspector; List<? extends StructField> structFields = structObjectInspector.getAllStructFieldRefs(); recordIdentifierField = structFields.get(recordIdColumn); bucketFields = new StructField[bucketColumns.length]; for (int i = 0; i < bucketColumns.length; i++) { int bucketColumnsIndex = bucketColumns[i]; bucketFields[i] = structFields.get(bucketColumnsIndex); } }
public static VectorExpressionWriter[] getSettableExpressionWriters( SettableStructObjectInspector objInspector) throws HiveException { List<? extends StructField> fieldsRef = objInspector.getAllStructFieldRefs(); VectorExpressionWriter[] writers = new VectorExpressionWriter[fieldsRef.size()]; for(int i=0; i<writers.length; ++i) { StructField fieldRef = fieldsRef.get(i); VectorExpressionWriter baseWriter = genVectorExpressionWritable( fieldRef.getFieldObjectInspector()); writers[i] = genVectorExpressionWritable(objInspector, fieldRef, baseWriter); } return writers; }
public static VectorExpressionWriter[] getSettableExpressionWriters( SettableStructObjectInspector objInspector) throws HiveException { List<? extends StructField> fieldsRef = objInspector.getAllStructFieldRefs(); VectorExpressionWriter[] writers = new VectorExpressionWriter[fieldsRef.size()]; for(int i=0; i<writers.length; ++i) { StructField fieldRef = fieldsRef.get(i); VectorExpressionWriter baseWriter = genVectorExpressionWritable( fieldRef.getFieldObjectInspector()); writers[i] = genVectorExpressionWritable(objInspector, fieldRef, baseWriter); } return writers; }
/** * Create an object of OrcStruct given a TypeInfo and a list of objects * * @param typeInfo The TypeInfo object representing the ORC record schema * @param objs ORC objects/Writables * @return an OrcStruct containing the specified objects for the specified schema */ public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) { SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct .createObjectInspector(typeInfo); List<StructField> fields = (List<StructField>) oi.getAllStructFieldRefs(); OrcStruct result = (OrcStruct) oi.create(); result.setNumFields(fields.size()); for (int i = 0; i < fields.size(); i++) { oi.setStructFieldData(result, fields.get(i), objs[i]); } return result; }
public StructConverter(ObjectInspector inputOI, SettableStructObjectInspector outputOI) { if (inputOI instanceof StructObjectInspector) { this.inputOI = (StructObjectInspector)inputOI; this.outputOI = outputOI; inputFields = this.inputOI.getAllStructFieldRefs(); outputFields = outputOI.getAllStructFieldRefs(); // If the output has some extra fields, set them to NULL. int minFields = Math.min(inputFields.size(), outputFields.size()); fieldConverters = new ArrayList<Converter>(minFields); for (int f = 0; f < minFields; f++) { fieldConverters.add(getConverter(inputFields.get(f) .getFieldObjectInspector(), outputFields.get(f) .getFieldObjectInspector())); } output = outputOI.create(); } else if (!(inputOI instanceof VoidObjectInspector)) { throw new RuntimeException("Hive internal error: conversion of " + inputOI.getTypeName() + " to " + outputOI.getTypeName() + "not supported yet."); } }
/** * Create an object of OrcStruct given a TypeInfo and a list of objects * * @param typeInfo The TypeInfo object representing the ORC record schema * @param objs ORC objects/Writables * @return an OrcStruct containing the specified objects for the specified schema */ @SuppressWarnings("unchecked") public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) { SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct .createObjectInspector(typeInfo); List<StructField> fields = (List<StructField>) oi.getAllStructFieldRefs(); OrcStruct result = (OrcStruct) oi.create(); result.setNumFields(fields.size()); for (int i = 0; i < fields.size(); i++) { oi.setStructFieldData(result, fields.get(i), objs[i]); } return result; }
private Object convertStruct(Object struct, ObjectInspector inspector) { SettableStructObjectInspector structOI = (SettableStructObjectInspector) inspector; Object result = structOI.create(); for (StructField field : structOI.getAllStructFieldRefs()) { Object value = structOI.getStructFieldData(struct, field); structOI.setStructFieldData(result, field, convert(value, field.getFieldObjectInspector())); } return result; }
private Object convertStructRowColumn( ColumnVector colVector, int batchIndex, Field field) throws IOException { final SettableStructObjectInspector structOI = (SettableStructObjectInspector) field.objectInspector; final List<? extends StructField> structFields = structOI.getAllStructFieldRefs(); final StructComplexTypeHelper structHelper = (StructComplexTypeHelper) field.getComplexHelper(); final Field[] fields = structHelper.getFields(); final StructColumnVector structColumnVector = (StructColumnVector) colVector; final Object struct = structOI.create(); for (int i = 0; i < fields.length; i++) { final Object fieldObject = convertComplexFieldRowColumn(structColumnVector.fields[i], batchIndex, fields[i]); structOI.setStructFieldData(struct, structFields.get(i), fieldObject); } deserializeRead.finishComplexVariableFieldsType(); return struct; }
private Object convertStruct(Object struct, ObjectInspector inspector) { SettableStructObjectInspector structOI = (SettableStructObjectInspector) inspector; Object result = structOI.create(); for (StructField field : structOI.getAllStructFieldRefs()) { Object value = structOI.getStructFieldData(struct, field); structOI.setStructFieldData(result, field, convert(value, field.getFieldObjectInspector())); } return result; }
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0);
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
() -> {}); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); while (stream(valuesByField).allMatch(Iterator::hasNext)) { for (int field = 0; field < fields.size(); field++) {
private static DataSize writeRcFileColumnOld(File outputFile, Format format, Compression compression, Type type, Iterator<?> values) throws Exception { ObjectInspector columnObjectInspector = getJavaObjectInspector(type); RecordWriter recordWriter = createRcFileWriterOld(outputFile, compression, columnObjectInspector); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); @SuppressWarnings("deprecation") Serializer serializer = format.createSerializer(); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", "test"); tableProperties.setProperty("columns.types", objectInspector.getTypeName()); serializer.initialize(new JobConf(false), tableProperties); while (values.hasNext()) { Object value = values.next(); value = preprocessWriteValueOld(type, value); objectInspector.setStructFieldData(row, fields.get(0), value); Writable record = serializer.serialize(row, objectInspector); recordWriter.write(record); } recordWriter.close(false); return new DataSize(outputFile.length(), BYTE).convertToMostSuccinctDataSize(); }
/** * Write a file that contains a given number of maps where each row has 10 entries in total * and some entries have null keys/values. */ private static TempFile createSingleColumnMapFileWithNullValues(Type mapType, int rows) throws IOException, ReflectiveOperationException, SerDeException { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, mapType); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", mapType); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 1; i <= rows; i++) { HashMap<Long, Long> map = new HashMap<>(); for (int j = 1; j <= 8; j++) { Long value = (long) j; map.put(value, value); } // Add null values so that the StreamReader nullVectors are not empty. map.put(null, 0L); map.put(0L, null); objectInspector.setStructFieldData(row, field, map); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
private static void createSequentialFile(File file, int count) throws IOException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT); @SuppressWarnings("deprecation") Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < count; i++) { objectInspector.setStructFieldData(row, field, (long) i); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
/** * Write a file that contains a number of rows with 1 VARCHAR column, and all values are not null. */ private static TempFile createSingleColumnVarcharFile(int count, int length) throws Exception { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, VARCHAR); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < count; i++) { objectInspector.setStructFieldData(row, field, Strings.repeat("0", length)); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
private static void createMultiStripeFile(File file) throws IOException, ReflectiveOperationException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT); @SuppressWarnings("deprecation") Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < 300; i += 3) { if ((i > 0) && (i % 60 == 0)) { flushWriter(writer); } objectInspector.setStructFieldData(row, field, (long) i); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
/** * Write a file that contains a number of rows with 1 BIGINT column, and some rows have null values. */ private static TempFile createSingleColumnFileWithNullValues(int rows) throws IOException, ReflectiveOperationException, SerDeException { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, BIGINT); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < rows; i++) { if (i % 10 == 0) { objectInspector.setStructFieldData(row, field, null); } else { objectInspector.setStructFieldData(row, field, (long) i); } Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
public static VectorExpressionWriter[] getSettableExpressionWriters( SettableStructObjectInspector objInspector) throws HiveException { List<? extends StructField> fieldsRef = objInspector.getAllStructFieldRefs(); VectorExpressionWriter[] writers = new VectorExpressionWriter[fieldsRef.size()]; for(int i=0; i<writers.length; ++i) { StructField fieldRef = fieldsRef.get(i); VectorExpressionWriter baseWriter = genVectorExpressionWritable( fieldRef.getFieldObjectInspector()); writers[i] = genVectorExpressionWritable(objInspector, fieldRef, baseWriter); } return writers; }