/** * Allocates columns to store elements of each field of the schema off heap. * Capacity is the initial capacity of the vector and it will grow as necessary. Capacity is * in number of elements, not number of bytes. */ public static OffHeapColumnVector[] allocateColumns(int capacity, StructType schema) { return allocateColumns(capacity, schema.fields()); }
/** * Allocates columns to store elements of each field of the schema off heap. * Capacity is the initial capacity of the vector and it will grow as necessary. Capacity is * in number of elements, not number of bytes. */ public static OffHeapColumnVector[] allocateColumns(int capacity, StructType schema) { return allocateColumns(capacity, schema.fields()); }
/** * Allocates columns to store elements of each field of the schema on heap. * Capacity is the initial capacity of the vector and it will grow as necessary. Capacity is * in number of elements, not number of bytes. */ public static OnHeapColumnVector[] allocateColumns(int capacity, StructType schema) { return allocateColumns(capacity, schema.fields()); }
/** * Allocates columns to store elements of each field of the schema on heap. * Capacity is the initial capacity of the vector and it will grow as necessary. Capacity is * in number of elements, not number of bytes. */ public static OnHeapColumnVector[] allocateColumns(int capacity, StructType schema) { return allocateColumns(capacity, schema.fields()); }
public AggregateHashMap(StructType schema, int capacity, double loadFactor, int maxSteps) { // We currently only support single key-value pair that are both longs assert (schema.size() == 2 && schema.fields()[0].dataType() == LongType && schema.fields()[1].dataType() == LongType); // capacity should be a power of 2 assert (capacity > 0 && ((capacity & (capacity - 1)) == 0)); this.maxSteps = maxSteps; numBuckets = (int) (capacity / loadFactor); batch = ColumnarBatch.allocate(schema, MemoryMode.ON_HEAP, capacity); buckets = new int[numBuckets]; Arrays.fill(buckets, -1); }
/** * @return true if UnsafeFixedWidthAggregationMap supports aggregation buffers with the given * schema, false otherwise. */ public static boolean supportsAggregationBufferSchema(StructType schema) { for (StructField field: schema.fields()) { if (!UnsafeRow.isMutable(field.dataType())) { return false; } } return true; }
/** * @return true if UnsafeFixedWidthAggregationMap supports aggregation buffers with the given * schema, false otherwise. */ public static boolean supportsAggregationBufferSchema(StructType schema) { for (StructField field: schema.fields()) { if (!UnsafeRow.isMutable(field.dataType())) { return false; } } return true; }
private ColumnarBatch(StructType schema, int maxRows, MemoryMode memMode) { this.schema = schema; this.capacity = maxRows; this.columns = new ColumnVector[schema.size()]; this.nullFilteredColumns = new HashSet<>(); this.filteredRows = new boolean[maxRows]; for (int i = 0; i < schema.fields().length; ++i) { StructField field = schema.fields()[i]; columns[i] = ColumnVector.allocate(maxRows, field.dataType(), memMode); } this.row = new Row(this); } }
/** * @return true if UnsafeFixedWidthAggregationMap supports aggregation buffers with the given * schema, false otherwise. */ public static boolean supportsAggregationBufferSchema(StructType schema) { for (StructField field: schema.fields()) { if (!UnsafeRow.isMutable(field.dataType())) { return false; } } return true; }
public AggregateHashMap(StructType schema, int capacity, double loadFactor, int maxSteps) { // We currently only support single key-value pair that are both longs assert (schema.size() == 2 && schema.fields()[0].dataType() == LongType && schema.fields()[1].dataType() == LongType); // capacity should be a power of 2 assert (capacity > 0 && ((capacity & (capacity - 1)) == 0)); this.maxSteps = maxSteps; numBuckets = (int) (capacity / loadFactor); columnVectors = OnHeapColumnVector.allocateColumns(capacity, schema); aggBufferRow = new MutableColumnarRow(columnVectors); buckets = new int[numBuckets]; Arrays.fill(buckets, -1); }
public AggregateHashMap(StructType schema, int capacity, double loadFactor, int maxSteps) { // We currently only support single key-value pair that are both longs assert (schema.size() == 2 && schema.fields()[0].dataType() == LongType && schema.fields()[1].dataType() == LongType); // capacity should be a power of 2 assert (capacity > 0 && ((capacity & (capacity - 1)) == 0)); this.maxSteps = maxSteps; numBuckets = (int) (capacity / loadFactor); columnVectors = OnHeapColumnVector.allocateColumns(capacity, schema); aggBufferRow = new MutableColumnarRow(columnVectors); buckets = new int[numBuckets]; Arrays.fill(buckets, -1); }
/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { ColumnarBatch batch = ColumnarBatch.allocate(schema, memMode); int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(batch.column(i), schema.fields()[i].dataType(), r, i); } n++; } batch.setNumRows(n); return batch; } }
/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { int capacity = 4 * 1024; WritableColumnVector[] columnVectors; if (memMode == MemoryMode.OFF_HEAP) { columnVectors = OffHeapColumnVector.allocateColumns(capacity, schema); } else { columnVectors = OnHeapColumnVector.allocateColumns(capacity, schema); } int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(columnVectors[i], schema.fields()[i].dataType(), r, i); } n++; } ColumnarBatch batch = new ColumnarBatch(columnVectors); batch.setNumRows(n); return batch; } }
/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { int capacity = 4 * 1024; WritableColumnVector[] columnVectors; if (memMode == MemoryMode.OFF_HEAP) { columnVectors = OffHeapColumnVector.allocateColumns(capacity, schema); } else { columnVectors = OnHeapColumnVector.allocateColumns(capacity, schema); } int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(columnVectors[i], schema.fields()[i].dataType(), r, i); } n++; } ColumnarBatch batch = new ColumnarBatch(columnVectors); batch.setNumRows(n); return batch; } }
InternalRow partitionValues) { StructType batchSchema = new StructType(); for (StructField f: sparkSchema.fields()) { batchSchema = batchSchema.add(f); for (StructField f : partitionColumns.fields()) { batchSchema = batchSchema.add(f); int partitionIdx = sparkSchema.fields().length; for (int i = 0; i < partitionColumns.fields().length; i++) { ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i); columnVectors[i + partitionIdx].setIsConstant();
InternalRow partitionValues) { StructType batchSchema = new StructType(); for (StructField f: sparkSchema.fields()) { batchSchema = batchSchema.add(f); for (StructField f : partitionColumns.fields()) { batchSchema = batchSchema.add(f); int partitionIdx = sparkSchema.fields().length; for (int i = 0; i < partitionColumns.fields().length; i++) { ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i); columnVectors[i + partitionIdx].setIsConstant();
@Override public void write(InternalRow internalRow) throws IOException { try { int i=0; for (StructField field : schema.fields()) { DataType dataType = field.dataType(); if (internalRow.isNullAt(i)) { statement.setNull(i + 1, SparkJdbcUtil.getJdbcType(dataType, PhoenixJdbcDialect$.MODULE$).jdbcNullType()); } else { Row row = SparkJdbcUtil.toRow(schema, internalRow); SparkJdbcUtil.makeSetter(conn, PhoenixJdbcDialect$.MODULE$, dataType).apply(statement, row, i); } ++i; } statement.execute(); } catch (SQLException e) { throw new IOException("Exception while executing Phoenix prepared statement", e); } }
private static void appendValue(ColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChildColumn(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
private static void appendValue(WritableColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChild(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
private static void appendValue(WritableColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChild(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }