/** * {@inheritDoc} * @see parquet.thrift.ParquetProtocol#writeStructEnd() */ @Override public void writeStructEnd() throws TException { if (DEBUG) LOG.debug("writeStructEnd()"); currentProtocol.writeStructEnd(); }
public static void writeIntLittleEndian(OutputStream out, int v) throws IOException { // TODO: this is duplicated code in LittleEndianDataOutputStream out.write((v >>> 0) & 0xFF); out.write((v >>> 8) & 0xFF); out.write((v >>> 16) & 0xFF); out.write((v >>> 24) & 0xFF); if (Log.DEBUG) LOG.debug("write le int: " + v + " => "+ ((v >>> 0) & 0xFF) + " " + ((v >>> 8) & 0xFF) + " " + ((v >>> 16) & 0xFF) + " " + ((v >>> 24) & 0xFF)); }
/** * {@inheritDoc} * @see parquet.column.values.ValuesReader#initFromPage(byte[], int) */ @Override public void initFromPage(int valueCount, byte[] in, int offset) throws IOException { if (DEBUG) LOG.debug("init from page at offset "+ offset + " for length " + (in.length - offset)); this.in.initFromPage(valueCount, in, offset); }
final protected void startMessage() { // reset state endField = null; if (DEBUG) LOG.debug("startMessage()"); recordConsumer.startMessage(); }
@Override public String[] getPartitionKeys(String location, Job job) throws IOException { if (DEBUG) LOG.debug("LoadMetadata.getPartitionKeys(" + location + ", " + job + ")"); setInput(location, job); return null; }
/** * {@inheritDoc} * @see parquet.thrift.ParquetProtocol#writeSetBegin(org.apache.thrift.protocol.TSet) */ @Override public void writeSetBegin(TSet set) throws TException { if (DEBUG) LOG.debug("writeSetBegin("+set+")"); currentProtocol.writeSetBegin(set); }
/** * {@inheritDoc} * @see parquet.thrift.ParquetProtocol#writeByte(byte) */ @Override public void writeByte(byte b) throws TException { if (DEBUG) LOG.debug("writeByte("+b+")"); currentProtocol.writeByte(b); }
/** * {@inheritDoc} * @see parquet.thrift.ParquetProtocol#writeI16(short) */ @Override public void writeI16(short i16) throws TException { if (DEBUG) LOG.debug("writeI16("+i16+")"); currentProtocol.writeI16(i16); }
public BoundedIntValuesWriter(int bound, int initialCapacity, int pageSize) { if (bound == 0) { throw new ParquetEncodingException("Value bound cannot be 0. Use DevNullColumnWriter instead."); } this.bitWriter = new BitWriter(initialCapacity, pageSize); bitsPerValue = (int)Math.ceil(Math.log(bound + 1)/Math.log(2)); shouldRepeatThreshold = (bitsPerValue + 9)/(1 + bitsPerValue); if (Log.DEBUG) LOG.debug("init column with bit width of " + bitsPerValue + " and repeat threshold of " + shouldRepeatThreshold); }
/** * {@inheritDoc} * @see parquet.thrift.ParquetProtocol#writeMapBegin(org.apache.thrift.protocol.TMap) */ @Override public void writeMapBegin(TMap map) throws TException { if (DEBUG) LOG.debug("writeMapBegin("+toString(map)+")"); currentProtocol.writeMapBegin(map); }
final protected void startGroup(String field, int index) { startField(field, index); if (DEBUG) LOG.debug("startGroup()"); recordConsumer.startGroup(); }
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { if (DEBUG) LOG.debug("LoadMetadata.getSchema(" + location + ", " + job + ")"); setInput(location, job); return new ResourceSchema(schema); }
/** * ends a block once all column chunks have been written * @throws IOException */ public void endBlock() throws IOException { state = state.endBlock(); if (DEBUG) LOG.debug(out.getPos() + ": end block"); currentBlock.setRowCount(currentRecordCount); blocks.add(currentBlock); currentBlock = null; }
final protected void addPrimitiveBINARY(String field, int index, Binary value) { startField(field, index); if (DEBUG) LOG.debug("addBinary("+value+")"); recordConsumer.addBinary(value); endField(field, index); }
final protected void addPrimitiveINT32(String field, int index, int value) { startField(field, index); if (DEBUG) LOG.debug("addInteger("+value+")"); recordConsumer.addInteger(value); endField(field, index); }
private void checkRead() { if (isPageFullyConsumed()) { if (isFullyConsumed()) { if (DEBUG) LOG.debug("end reached"); repetitionLevel = 0; // the next repetition level return; } readPage(); } readRepetitionAndDefinitionLevels(); }
@Override public BytesInput getBytes() { try { out.flush(); } catch (IOException e) { throw new ParquetEncodingException("could not write page", e); } if (Log.DEBUG) LOG.debug("writing a buffer of size " + arrayOut.size()); return BytesInput.from(arrayOut); }
private Type filterMap(GroupType mapType, FieldSchema mapFieldSchema) throws FrontendException { if (DEBUG) LOG.debug("filtering MAP schema:\n" + mapType + "\nwith:\n " + mapFieldSchema); if (mapType.getFieldCount() != 1) { throw new RuntimeException("not unwrapping the right type, this should be a Map: " + mapType); } GroupType nested = mapType.getType(0).asGroupType(); if (nested.getFieldCount() != 2) { throw new RuntimeException("this should be a Map Key/Value: " + mapType); } FieldSchema innerField = mapFieldSchema.schema.getField(0); return mapType.withNewFields(nested.withNewFields(nested.getType(0), filter(nested.getType(1), innerField))); }
private static void serializeFooter(ParquetMetadata footer, FSDataOutputStream out) throws IOException { long footerIndex = out.getPos(); parquet.format.FileMetaData parquetMetadata = new ParquetMetadataConverter().toParquetMetadata(CURRENT_VERSION, footer); writeFileMetaData(parquetMetadata, out); if (DEBUG) LOG.debug(out.getPos() + ": footer length = " + (out.getPos() - footerIndex)); BytesUtils.writeIntLittleEndian(out, (int)(out.getPos() - footerIndex)); out.write(MAGIC); }
@Override public BytesInput getBytes() { try { out.flush(); } catch (IOException e) { throw new ParquetEncodingException("could not write page", e); } if (Log.DEBUG) LOG.debug("writing a buffer of size " + arrayOut.size()); return BytesInput.concat(lengthWriter.getBytes(), BytesInput.from(arrayOut)); }