private static void setStruct(JSONWriter writer, StructColumnVector batch, TypeDescription schema, int row) throws JSONException { writer.object(); List<String> fieldNames = schema.getFieldNames(); List<TypeDescription> fieldTypes = schema.getChildren(); for (int i = 0; i < fieldTypes.size(); ++i) { writer.key(fieldNames.get(i)); setValue(writer, batch.fields[i], fieldTypes.get(i), row); } writer.endObject(); } }
public static void processRow(JSONWriter writer, VectorizedRowBatch batch, TypeDescription schema, int row) throws JSONException { if (schema.getCategory() == TypeDescription.Category.STRUCT) { List<TypeDescription> fieldTypes = schema.getChildren(); List<String> fieldNames = schema.getFieldNames(); writer.object(); for (int c = 0; c < batch.cols.length; ++c) { writer.key(fieldNames.get(c)); setValue(writer, batch.cols[c], fieldTypes.get(c), row); } writer.endObject(); } else { setValue(writer, batch.cols[0], schema, row); } }
private static void writeBloomFilterStats(JSONWriter writer, BloomFilterIO bf) throws JSONException { int bitCount = bf.getBitSize(); int popCount = 0; for (long l : bf.getBitSet()) { popCount += Long.bitCount(l); } int k = bf.getNumHashFunctions(); float loadFactor = (float) popCount / (float) bitCount; float expectedFpp = (float) Math.pow(loadFactor, k); writer.key("numHashFunctions").value(k); writer.key("bitCount").value(bitCount); writer.key("popCount").value(popCount); writer.key("loadFactor").value(loadFactor); writer.key("expectedFpp").value(expectedFpp); }
private static void writeStripeInformation(JSONWriter writer, StripeInformation stripe) throws JSONException { writer.object(); writer.key("offset").value(stripe.getOffset()); writer.key("indexLength").value(stripe.getIndexLength()); writer.key("dataLength").value(stripe.getDataLength()); writer.key("footerLength").value(stripe.getFooterLength()); writer.key("rowCount").value(stripe.getNumberOfRows()); writer.endObject(); }
static void printStruct(JSONWriter writer, StructColumnVector batch, TypeDescription schema, int row) throws JSONException { writer.object(); List<String> fieldNames = schema.getFieldNames(); List<TypeDescription> fieldTypes = schema.getChildren(); for (int i = 0; i < fieldTypes.size(); ++i) { writer.key(fieldNames.get(i)); printValue(writer, batch.fields[i], fieldTypes.get(i), row); } writer.endObject(); }
static void printStruct(JSONWriter writer, OrcStruct obj, List<OrcProto.Type> types, OrcProto.Type type) throws IOException, JSONException { writer.object(); List<Integer> fieldTypes = type.getSubtypesList(); for(int i=0; i < fieldTypes.size(); ++i) { writer.key(type.getFieldNames(i)); printObject(writer, obj.getFieldValue(i), types, fieldTypes.get(i)); } writer.endObject(); }
private static void printMap(JSONWriter writer, Map<Object, Object> obj, List<OrcProto.Type> types, OrcProto.Type type ) throws IOException, JSONException { writer.array(); int keyType = type.getSubtypes(0); int valueType = type.getSubtypes(1); for(Map.Entry<Object,Object> item: obj.entrySet()) { writer.object(); writer.key("_key"); printObject(writer, item.getKey(), types, keyType); writer.key("_value"); printObject(writer, item.getValue(), types, valueType); writer.endObject(); } writer.endArray(); }
private static void printMap(JSONWriter writer, MapColumnVector vector, TypeDescription schema, int row) throws JSONException { writer.array(); TypeDescription keyType = schema.getChildren().get(0); TypeDescription valueType = schema.getChildren().get(1); int offset = (int) vector.offsets[row]; for (int i = 0; i < vector.lengths[row]; ++i) { writer.object(); writer.key("_key"); printValue(writer, vector.keys, keyType, offset + i); writer.key("_value"); printValue(writer, vector.values, valueType, offset + i); writer.endObject(); } writer.endArray(); }
private static void writeColumnStatistics(JSONWriter writer, ColumnStatistics cs) throws JSONException { if (cs != null) { writer.key("count").value(cs.getNumberOfValues()); writer.key("hasNull").value(cs.hasNull()); if (cs instanceof BinaryColumnStatistics) { writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.BINARY); } else if (cs instanceof BooleanColumnStatistics) { writer.key("trueCount").value(((BooleanColumnStatistics) cs).getTrueCount()); writer.key("falseCount").value(((BooleanColumnStatistics) cs).getFalseCount()); writer.key("type").value(OrcProto.Type.Kind.BOOLEAN); } else if (cs instanceof IntegerColumnStatistics) { writer.key("min").value(((IntegerColumnStatistics) cs).getMinimum()); writer.key("max").value(((IntegerColumnStatistics) cs).getMaximum()); if (((IntegerColumnStatistics) cs).isSumDefined()) { writer.key("sum").value(((IntegerColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.LONG); } else if (cs instanceof DoubleColumnStatistics) { writer.key("min").value(((DoubleColumnStatistics) cs).getMinimum()); writer.key("max").value(((DoubleColumnStatistics) cs).getMaximum()); writer.key("sum").value(((DoubleColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.DOUBLE); } else if (cs instanceof StringColumnStatistics) { writer.key("min").value(((StringColumnStatistics) cs).getMinimum()); writer.key("max").value(((StringColumnStatistics) cs).getMaximum()); writer.key("totalLength").value(((StringColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.STRING); } else if (cs instanceof DateColumnStatistics) {
static void printRow(JSONWriter writer, VectorizedRowBatch batch, TypeDescription schema, int row) throws JSONException { if (schema.getCategory() == TypeDescription.Category.STRUCT) { List<TypeDescription> fieldTypes = schema.getChildren(); List<String> fieldNames = schema.getFieldNames(); writer.object(); for (int c = 0; c < batch.cols.length; ++c) { writer.key(fieldNames.get(c)); printValue(writer, batch.cols[c], fieldTypes.get(c), row); } writer.endObject(); } else { printValue(writer, batch.cols[0], schema, row); } }
private static void writeBloomFilterIndexes(JSONWriter writer, int col, OrcProto.BloomFilterIndex[] bloomFilterIndex) throws JSONException { BloomFilterIO stripeLevelBF = null; if (bloomFilterIndex != null && bloomFilterIndex[col] != null) { int entryIx = 0; writer.key("bloomFilterIndexes").array(); for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) { writer.object(); writer.key("entryId").value(entryIx++); BloomFilterIO toMerge = new BloomFilterIO(bf); writeBloomFilterStats(writer, toMerge); if (stripeLevelBF == null) { stripeLevelBF = toMerge; } else { stripeLevelBF.merge(toMerge); } writer.endObject(); } writer.endArray(); } if (stripeLevelBF != null) { writer.key("stripeLevelBloomFilter"); writer.object(); writeBloomFilterStats(writer, stripeLevelBF); writer.endObject(); } }
private static void writeRowGroupIndexes(JSONWriter writer, int col, OrcProto.RowIndex[] rowGroupIndex) throws JSONException { OrcProto.RowIndex index; if (rowGroupIndex == null || (col >= rowGroupIndex.length) || ((index = rowGroupIndex[col]) == null)) { return; } writer.key("rowGroupIndexes").array(); for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) { writer.object(); writer.key("entryId").value(entryIx); OrcProto.RowIndexEntry entry = index.getEntry(entryIx); if (entry == null) { continue; } OrcProto.ColumnStatistics colStats = entry.getStatistics(); writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats)); writer.key("positions").array(); for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) { writer.value(entry.getPositions(posIx)); } writer.endArray(); writer.endObject(); } writer.endArray(); }