switch (schema.getCategory()) { case BOOLEAN: writer.value(((LongColumnVector) vector).vector[row] != 0); break; case BYTE: case INT: case LONG: writer.value(((LongColumnVector) vector).vector[row]); break; case FLOAT: case DOUBLE: writer.value(((DoubleColumnVector) vector).vector[row]); break; case STRING: case CHAR: case VARCHAR: writer.value(((BytesColumnVector) vector).toString(row)); break; case DECIMAL: writer.value(((DecimalColumnVector) vector).vector[row] .toString()); break; case DATE: writer.value(new DateWritable( (int) ((LongColumnVector) vector).vector[row]) .toString()); break; case TIMESTAMP:
@Override public KeyValue next() throws IOException { boolean endOfBatch = false; StringWriter sw = new StringWriter(); if (rowIndex > batch.size - 1) { endOfBatch = !rows.nextBatch(batch); rowIndex = 0; } if (endOfBatch) { rows.close(); return null; } try { JsonFieldFiller.processRow(new JSONWriter(sw), batch, schema, rowIndex); } catch (JSONException e) { LOG.error("Unable to parse json {}", sw.toString()); return null; } rowIndex++; return new KeyValue(offset++, sw.toString().getBytes("UTF-8")); }
private static void setList(JSONWriter writer, ListColumnVector vector, TypeDescription schema, int row) throws JSONException { writer.array(); int offset = (int) vector.offsets[row]; TypeDescription childType = schema.getChildren().get(0); for (int i = 0; i < vector.lengths[row]; ++i) { setValue(writer, vector.child, childType, offset + i); } writer.endArray(); }
private static void setStruct(JSONWriter writer, StructColumnVector batch, TypeDescription schema, int row) throws JSONException { writer.object(); List<String> fieldNames = schema.getFieldNames(); List<TypeDescription> fieldTypes = schema.getChildren(); for (int i = 0; i < fieldTypes.size(); ++i) { writer.key(fieldNames.get(i)); setValue(writer, batch.fields[i], fieldTypes.get(i), row); } writer.endObject(); } }
private static void printMap(JSONWriter writer, MapColumnVector vector, TypeDescription schema, int row) throws JSONException { writer.array(); TypeDescription keyType = schema.getChildren().get(0); TypeDescription valueType = schema.getChildren().get(1); int offset = (int) vector.offsets[row]; for (int i = 0; i < vector.lengths[row]; ++i) { writer.object(); writer.key("_key"); printValue(writer, vector.keys, keyType, offset + i); writer.key("_value"); printValue(writer, vector.values, valueType, offset + i); writer.endObject(); } writer.endArray(); }
private static void writeStripeInformation(JSONWriter writer, StripeInformation stripe) throws JSONException { writer.object(); writer.key("offset").value(stripe.getOffset()); writer.key("indexLength").value(stripe.getIndexLength()); writer.key("dataLength").value(stripe.getDataLength()); writer.key("footerLength").value(stripe.getFooterLength()); writer.key("rowCount").value(stripe.getNumberOfRows()); writer.endObject(); }
static void printBinary(JSONWriter writer, BytesColumnVector vector, int row) throws JSONException { writer.array(); int offset = vector.start[row]; for(int i=0; i < vector.length[row]; ++i) { writer.value(0xff & (int) vector.vector[row][offset + i]); } writer.endArray(); } static void printValue(JSONWriter writer, ColumnVector vector,
private static void writeBloomFilterStats(JSONWriter writer, BloomFilterIO bf) throws JSONException { int bitCount = bf.getBitSize(); int popCount = 0; for (long l : bf.getBitSet()) { popCount += Long.bitCount(l); } int k = bf.getNumHashFunctions(); float loadFactor = (float) popCount / (float) bitCount; float expectedFpp = (float) Math.pow(loadFactor, k); writer.key("numHashFunctions").value(k); writer.key("bitCount").value(bitCount); writer.key("popCount").value(popCount); writer.key("loadFactor").value(loadFactor); writer.key("expectedFpp").value(expectedFpp); }
for(OrcProto.Type type : types) { writer.object(); writer.key("columnId").value(i++); writer.key("columnType").value(type.getKind()); if (type.getFieldNamesCount() > 0) { writer.key("childColumnNames").array(); for (String field : type.getFieldNamesList()) { writer.value(field); writer.key("childColumnIds").array(); for (Integer colId : type.getSubtypesList()) { writer.value(colId); writer.key("precision").value(type.getPrecision()); writer.key("scale").value(type.getScale()); writer.key("maxLength").value(type.getMaximumLength());
/** * Begin appending a new array. All values until the balancing * <code>endArray</code> will be appended to this array. The * <code>endArray</code> method must be called to mark the array's end.kue * @return this * @throws JSONException If the nesting is too deep, or if the object is * started in the wrong place (for example as a key or after the end of the * outermost array or object). */ public JSONWriter array() throws JSONException { if (this.mode == 'i' || this.mode == 'o' || this.mode == 'a') { this.push('a'); this.append("["); this.comma = false; return this; } throw new JSONException("Misplaced array."); }
/** * Append either the value <code>true</code> or the value * <code>false</code>. * @param b A boolean. * @return this * @throws JSONException JSONException */ public JSONWriter value(boolean b) throws JSONException { return this.append(b ? "true" : "false"); }
/** * End an object. This method most be called to balance calls to * <code>object</code>. * @return this * @throws JSONException If incorrectly nested. */ public JSONWriter endObject() throws JSONException { return this.end('k', '}'); }
/** * End something. * @param m Mode * @param c Closing character * @return this * @throws JSONException If unbalanced. */ private JSONWriter end(char m, char c) throws JSONException { if (this.mode != m) { throw new JSONException(m == 'o' ? "Misplaced endObject." : "Misplaced endArray."); } this.pop(m); try { this.writer.write(c); } catch (IOException e) { throw new JSONException(e); } this.comma = true; return this; }
private static void writeBloomFilterIndexes(JSONWriter writer, int col, OrcProto.BloomFilterIndex[] bloomFilterIndex) throws JSONException { BloomFilterIO stripeLevelBF = null; if (bloomFilterIndex != null && bloomFilterIndex[col] != null) { int entryIx = 0; writer.key("bloomFilterIndexes").array(); for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) { writer.object(); writer.key("entryId").value(entryIx++); BloomFilterIO toMerge = new BloomFilterIO(bf); writeBloomFilterStats(writer, toMerge); if (stripeLevelBF == null) { stripeLevelBF = toMerge; } else { stripeLevelBF.merge(toMerge); } writer.endObject(); } writer.endArray(); } if (stripeLevelBF != null) { writer.key("stripeLevelBloomFilter"); writer.object(); writeBloomFilterStats(writer, stripeLevelBF); writer.endObject(); } }
public static void processRow(JSONWriter writer, VectorizedRowBatch batch, TypeDescription schema, int row) throws JSONException { if (schema.getCategory() == TypeDescription.Category.STRUCT) { List<TypeDescription> fieldTypes = schema.getChildren(); List<String> fieldNames = schema.getFieldNames(); writer.object(); for (int c = 0; c < batch.cols.length; ++c) { writer.key(fieldNames.get(c)); setValue(writer, batch.cols[c], fieldTypes.get(c), row); } writer.endObject(); } else { setValue(writer, batch.cols[0], schema, row); } }
private static void writeColumnStatistics(JSONWriter writer, ColumnStatistics cs) throws JSONException { if (cs != null) { writer.key("count").value(cs.getNumberOfValues()); writer.key("hasNull").value(cs.hasNull()); if (cs instanceof BinaryColumnStatistics) { writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.BINARY); } else if (cs instanceof BooleanColumnStatistics) { writer.key("trueCount").value(((BooleanColumnStatistics) cs).getTrueCount()); writer.key("falseCount").value(((BooleanColumnStatistics) cs).getFalseCount()); writer.key("type").value(OrcProto.Type.Kind.BOOLEAN); } else if (cs instanceof IntegerColumnStatistics) { writer.key("min").value(((IntegerColumnStatistics) cs).getMinimum()); writer.key("max").value(((IntegerColumnStatistics) cs).getMaximum()); if (((IntegerColumnStatistics) cs).isSumDefined()) { writer.key("sum").value(((IntegerColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.LONG); } else if (cs instanceof DoubleColumnStatistics) { writer.key("min").value(((DoubleColumnStatistics) cs).getMinimum()); writer.key("max").value(((DoubleColumnStatistics) cs).getMaximum()); writer.key("sum").value(((DoubleColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.DOUBLE); } else if (cs instanceof StringColumnStatistics) { writer.key("min").value(((StringColumnStatistics) cs).getMinimum()); writer.key("max").value(((StringColumnStatistics) cs).getMaximum()); writer.key("totalLength").value(((StringColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.STRING); } else if (cs instanceof DateColumnStatistics) {
writer.key("fileName").value(filename); Path path = new Path(filename); Reader reader = FileDump.getReader(path, conf, null); if (reader == null) { writer.key("status").value("FAILED"); continue; writer.key("fileVersion").value(reader.getFileVersion().getName()); writer.key("writerVersion").value(reader.getWriterVersion()); RecordReaderImpl rows = (RecordReaderImpl) reader.rows(); writer.key("numberOfRows").value(reader.getNumberOfRows()); writer.key("compression").value(reader.getCompressionKind()); if (reader.getCompressionKind() != CompressionKind.NONE) { writer.key("compressionBufferSize").value(reader.getCompressionSize()); writer.key("schemaString").value(reader.getSchema().toString()); writer.key("schema").array(); writeSchema(writer, reader.getTypes()); writer.endArray(); writer.key("stripeStatistics").array(); List<StripeStatistics> stripeStatistics = reader.getStripeStatistics(); for (int n = 0; n < stripeStatistics.size(); n++) { writer.object(); writer.key("stripeNumber").value(n + 1); StripeStatistics ss = stripeStatistics.get(n); writer.key("columnStatistics").array(); for (int i = 0; i < ss.getColumnStatistics().length; i++) { writer.object();
/** * Begin appending a new object. All keys and values until the balancing * <code>endObject</code> will be appended to this object. The * <code>endObject</code> method must be called to mark the object's end. * @return this * @throws JSONException If the nesting is too deep, or if the object is * started in the wrong place (for example as a key or after the end of the * outermost array or object). */ public JSONWriter object() throws JSONException { if (this.mode == 'i') { this.mode = 'o'; } if (this.mode == 'o' || this.mode == 'a') { this.append("{"); this.push('k'); this.comma = false; return this; } throw new JSONException("Misplaced object."); }
/** * Append a long value. * @param l A long. * @return this * @throws JSONException JSONException */ public JSONWriter value(long l) throws JSONException { return this.append(Long.toString(l)); }
/** * End an array. This method most be called to balance calls to * <code>array</code>. * @return this * @throws JSONException If incorrectly nested. */ public JSONWriter endArray() throws JSONException { return this.end('a', ']'); }