public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects) { final int count = isConvert.length; for (int i = 0; i < count; i++) { if (isConvert[i]) { assignConvertRowColumn(batch, batchIndex, i, objects[i]); } else { assignRowColumn(batch, batchIndex, i, objects[i]); } } }
@Override protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { Object[] values = (Object[]) row; VectorAssignRow va = outputVectorAssignRowMap.get(outputOI); if (va == null) { va = new VectorAssignRow(); va.init((StructObjectInspector) outputOI, vOutContext.getProjectedColumns()); outputVectorAssignRowMap.put(outputOI, va); } va.assignRow(outputBatch, outputBatch.size, values); ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { flushOutput(); } }
allocateArrays(sourceColumnCount); allocateConvertArrays(sourceColumnCount); initTargetEntry(i, i, sourceTypeInfo); initTargetEntry(i, i, targetTypeInfo); initConvertSourceEntry(i, sourceTypeInfo); initTargetEntry(i, i, targetTypeInfo);
public void init(TypeInfo typeInfo, int outputColumnNum) throws HiveException { allocateArrays(1); initTargetEntry(0, outputColumnNum, typeInfo); }
public static VectorRandomBatchSource createInterestingBatches( Random random, VectorRandomRowSource vectorRandomRowSource, Object[][] randomRows, VectorRandomBatchParameters vectorRandomBatchParameters) throws HiveException { VectorAssignRow vectorAssignRow = new VectorAssignRow(); vectorAssignRow.init(vectorRandomRowSource.typeNames()); VectorBatchPatterns vectorBatchPatterns = chooseBatchPatterns(random, vectorRandomRowSource, randomRows); return new VectorRandomBatchSource( vectorRandomRowSource, randomRows, vectorBatchPatterns, vectorAssignRow); }
/** * Assign a row's column object to the ColumnVector at batchIndex in the VectorizedRowBatch. * * @param batch * @param batchIndex * @param logicalColumnIndex * @param object The row column object whose type is the target data type. */ public void assignRowColumn( VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex, Object object) { final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; final TypeInfo targetTypeInfo = targetTypeInfos[logicalColumnIndex]; if (targetTypeInfo == null || targetTypeInfo.getCategory() == null) { /* * This is a column that we don't want (i.e. not included) -- we are done. */ return; } assignRowColumn(batch.cols[projectionColumnNum], batchIndex, targetTypeInfo, object); }
public void init(Configuration hconf) throws Exception { VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); partDeserializer = partDesc.getDeserializer(hconf); if (partDeserializer instanceof OrcSerde) { // UNDONE: We need to get the table schema inspector from self-describing Input File // Formats like ORC. Modify the ORC serde instead? For now, this works. partRawRowObjectInspector = (StructObjectInspector) OrcStruct.createObjectInspector(tableStructTypeInfo); } else { partRawRowObjectInspector = (StructObjectInspector) partDeserializer.getObjectInspector(); } TypeInfo[] dataTypeInfos = vectorPartDesc.getDataTypeInfos(); vectorAssign = new VectorAssignRow(); // Initialize with data type conversion parameters. readerColumnCount = vectorAssign.initConversion(dataTypeInfos, tableRowTypeInfos, dataColumnsToIncludeTruncated); } }
public DruidVectorizedWrapper(DruidQueryRecordReader reader, Configuration jobConf) { this.rbCtx = Utilities.getVectorizedRowBatchCtx(jobConf); if (rbCtx.getDataColumnNums() != null) { projectedColumns = rbCtx.getDataColumnNums(); } else { // case all the columns are selected projectedColumns = new int[rbCtx.getRowColumnTypeInfos().length]; for (int i = 0; i < projectedColumns.length; i++) { projectedColumns[i] = i; } } this.serDe = createAndInitializeSerde(jobConf); this.baseReader = Preconditions.checkNotNull(reader); // row parser and row assigner initializing try { vectorAssignRow.init((StructObjectInspector) serDe.getObjectInspector()); } catch (HiveException e) { throw new RuntimeException(e); } druidWritable = baseReader.createValue(); rowBoat = new Object[rbCtx.getDataColumnCount()]; }
ArrowWrapperWritable serialize(Object obj, ObjectInspector objInspector) { // if row is null, it means there are no more rows (closeOp()). // another case can be that the buffer is full. if (obj == null) { return serializeBatch(vectorizedRowBatch, false); } List<Object> standardObjects = new ArrayList<Object>(); ObjectInspectorUtils.copyToStandardObject(standardObjects, obj, ((StructObjectInspector) objInspector), WRITABLE); vectorAssignRow.assignRow(vectorizedRowBatch, batchSize, standardObjects, fieldSize); batchSize++; if (batchSize == MAX_BUFFERED_ROWS) { return serializeBatch(vectorizedRowBatch, false); } return null; }
/** * Convert row's column object and then assign it the ColumnVector at batchIndex * in the VectorizedRowBatch. * * Public so VectorDeserializeRow can use this method to convert a row's column object. * * @param batch * @param batchIndex * @param logicalColumnIndex * @param object The row column object whose type is the VectorAssignRow.initConversion * source data type. * */ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex, Object object) { Preconditions.checkState(isConvert[logicalColumnIndex]); final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; assignConvertRowColumn( batch.cols[projectionColumnNum], batchIndex, targetTypeInfos[logicalColumnIndex], convertSourceOI[logicalColumnIndex], convertTargetWritables[logicalColumnIndex], object); }
vectorAssignRow = new VectorAssignRow(); try { vectorAssignRow.init(serDe.rowObjectInspector); } catch (HiveException e) { throw new SerDeException(e);
@Override public boolean next(NullWritable nullWritable, VectorizedRowBatch vectorizedRowBatch) throws IOException { vectorizedRowBatch.reset(); int rowsCount = 0; while (rowsCount < vectorizedRowBatch.getMaxSize() && baseReader.next(nullWritable, druidWritable)) { if (projectedColumns.length > 0) { try { serDe.deserializeAsPrimitive(druidWritable, rowBoat); } catch (SerDeException e) { throw new IOException(e); } for (int i : projectedColumns) { vectorAssignRow.assignRowColumn(vectorizedRowBatch, rowsCount, i, rowBoat[i]); } } rowsCount++; } vectorizedRowBatch.size = rowsCount; return rowsCount > 0; }
public void init(List<String> typeNames) throws HiveException { final int count = typeNames.size(); allocateArrays(count); for (int i = 0; i < count; i++) { final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); initTargetEntry(i, i, typeInfo); } }
public void init(Configuration hconf) throws Exception { VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); partDeserializer = partDesc.getDeserializer(hconf); if (partDeserializer instanceof OrcSerde) { // UNDONE: We need to get the table schema inspector from self-describing Input File // Formats like ORC. Modify the ORC serde instead? For now, this works. partRawRowObjectInspector = (StructObjectInspector) OrcStruct.createObjectInspector(tableStructTypeInfo); } else { partRawRowObjectInspector = (StructObjectInspector) partDeserializer.getObjectInspector(); } TypeInfo[] dataTypeInfos = vectorPartDesc.getDataTypeInfos(); vectorAssign = new VectorAssignRow(); // Initialize with data type conversion parameters. readerColumnCount = vectorAssign.initConversion(dataTypeInfos, tableRowTypeInfos, dataColumnsToIncludeTruncated); } }
vectorAssignRow.init((StructObjectInspector) serDe.getObjectInspector()); } catch (HiveException e) { throw new RuntimeException(e);
currentVectorAssign.assignRow(deserializerBatch, deserializerBatch.size++, standardObjects, currentDataColumnCount);
convertVectorAssignRow.assignConvertRowColumn( batch, batchIndex, logicalColumnIndex, convertSourceWritable);
/** * 'forwards' the (row-mode) record into the (vectorized) output batch */ @Override protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { VectorAssignRow va = outputVectorAssignRowMap.get(outputOI); if (va == null) { va = new VectorAssignRow(); va.init((StructObjectInspector) outputOI, vOutContext.getProjectedColumns()); outputVectorAssignRowMap.put(outputOI, va); } if (row instanceof ArrayList) { va.assignRow(outputBatch, outputBatch.size, (ArrayList<Object>) row); } else { va.assignRow(outputBatch, outputBatch.size, (Object[]) row); } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { flushOutput(); } }
public void init() throws HiveException, UDFArgumentException { genericUDF = expr.getGenericUDF(); deferredChildren = new GenericUDF.DeferredObject[expr.getChildren().size()]; childrenOIs = new ObjectInspector[expr.getChildren().size()]; writers = VectorExpressionWriterFactory.getExpressionWriters(expr.getChildren()); for (int i = 0; i < childrenOIs.length; i++) { childrenOIs[i] = writers[i].getObjectInspector(); } MapredContext context = MapredContext.get(); if (context != null) { context.setup(genericUDF); } outputTypeInfo = expr.getTypeInfo(); outputVectorAssignRow = new VectorAssignRow(); outputVectorAssignRow.init(outputTypeInfo, outputColumnNum); genericUDF.initialize(childrenOIs); // Initialize constant arguments for (int i = 0; i < argDescs.length; i++) { if (argDescs[i].isConstant()) { argDescs[i].prepareConstant(); } } }
public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects) { final int count = isConvert.length; for (int i = 0; i < count; i++) { if (isConvert[i]) { assignConvertRowColumn(batch, batchIndex, i, objects[i]); } else { assignRowColumn(batch, batchIndex, i, objects[i]); } } }