protected void initialize(int size) throws SerDeException { field = new BytesRefWritable[size]; for (int i = 0; i < size; i++) { field[i] = new BytesRefWritable(); serializeCache.set(i, field[i]); } serializedSize = 0; stats = new SerDeStats(); lastOperationSerialize = false; lastOperationDeserialize = false; }
"NULL".getBytes(StandardCharsets.UTF_8)}; s = new BytesRefArrayWritable(bytesArray.length); s.set(0, new BytesRefWritable("123".getBytes(StandardCharsets.UTF_8))); s.set(1, new BytesRefWritable("456".getBytes(StandardCharsets.UTF_8))); s.set(2, new BytesRefWritable("789".getBytes(StandardCharsets.UTF_8))); s.set(3, new BytesRefWritable("1000".getBytes(StandardCharsets.UTF_8))); s.set(4, new BytesRefWritable("5.3".getBytes(StandardCharsets.UTF_8))); s.set(5, new BytesRefWritable("hive and hadoop".getBytes(StandardCharsets.UTF_8))); s.set(6, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8))); s.set(7, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8))); patialS.set(0, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8))); patialS.set(1, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8))); patialS.set(2, new BytesRefWritable("789".getBytes(StandardCharsets.UTF_8))); patialS.set(3, new BytesRefWritable("1000".getBytes(StandardCharsets.UTF_8))); patialS.set(4, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8))); patialS.set(5, new BytesRefWritable("".getBytes(StandardCharsets.UTF_8))); patialS.set(6, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8))); patialS.set(7, new BytesRefWritable("".getBytes(StandardCharsets.UTF_8)));
BytesRefWritable cu = null; cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); bytes.set(i, cu);
BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length); bytes.set(i, cu);
private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf) throws IOException, SerDeException { cleanup(); RCFileOutputFormat.setColumnNumber(conf, fieldsData.length); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length); for (int i = 0; i < fieldsData.length; i++) { BytesRefWritable cu = null; cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length); bytes.set(i, cu); } for (int i = 0; i < count; i++) { writer.append(bytes); } writer.close(); long fileLen = fs.getFileStatus(file).getLen(); System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen); }
BytesRefWritable cu = null; cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); bytes.set(i, cu);
BytesRefWritable cu = null; cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); bytes.set(i, cu);
private void writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException { fs.delete(file, true); resetRandomGenerators(); RCFileOutputFormat.setColumnNumber(conf, columnNum); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec); byte[][] columnRandom; BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum); columnRandom = new byte[columnNum][]; for (int i = 0; i < columnNum; i++) { BytesRefWritable cu = new BytesRefWritable(); bytes.set(i, cu); } for (int i = 0; i < rowCount; i++) { nextRandomRow(columnRandom, bytes); writer.append(bytes); } writer.close(); }
private void writeSeqenceFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException { byte[][] columnRandom; resetRandomGenerators(); BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum); columnRandom = new byte[columnNum][]; for (int i = 0; i < columnNum; i++) { BytesRefWritable cu = new BytesRefWritable(); bytes.set(i, cu); } // zero length key is not allowed by block compress writer, so we use a byte // writable ByteWritable key = new ByteWritable(); SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, file, ByteWritable.class, BytesRefArrayWritable.class, CompressionType.BLOCK, codec); for (int i = 0; i < rowCount; i++) { nextRandomRow(columnRandom, bytes); seqWriter.append(key, bytes); } seqWriter.close(); }
protected void initialize(int size) throws SerDeException { field = new BytesRefWritable[size]; for (int i = 0; i < size; i++) { field[i] = new BytesRefWritable(); serializeCache.set(i, field[i]); } serializedSize = 0; stats = new SerDeStats(); lastOperationSerialize = false; lastOperationDeserialize = false; }
protected void initialize(int size) throws SerDeException { field = new BytesRefWritable[size]; for (int i = 0; i < size; i++) { field[i] = new BytesRefWritable(); serializeCache.set(i, field[i]); } serializedSize = 0; stats = new SerDeStats(); lastOperationSerialize = false; lastOperationDeserialize = false; }
protected void initialize(int size) throws SerDeException { field = new BytesRefWritable[size]; for (int i = 0; i < size; i++) { field[i] = new BytesRefWritable(); serializeCache.set(i, field[i]); } serializedSize = 0; stats = new SerDeStats(); lastOperationSerialize = false; lastOperationDeserialize = false; }
protected void initialize(int size) throws SerDeException { field = new BytesRefWritable[size]; for (int i = 0; i < size; i++) { field[i] = new BytesRefWritable(); serializeCache.set(i, field[i]); } serializedSize = 0; stats = new SerDeStats(); lastOperationSerialize = false; lastOperationDeserialize = false; }
/** * Initialize the SerDe given the parameters. * * @see SerDe#initialize(Configuration, Properties) */ public void initialize(Configuration job, Properties tbl) throws SerDeException { serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, getClass().getName()); // Create the ObjectInspectors for the fields. Note: Currently // ColumnarObject uses same ObjectInpector as LazyStruct cachedObjectInspector = LazyFactory.createColumnarStructInspector( serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams .getSeparators(), serdeParams.getNullSequence(), serdeParams .isEscaped(), serdeParams.getEscapeChar()); java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(job); cachedLazyStruct = new ColumnarStruct(cachedObjectInspector, notSkipIDs, serdeParams.getNullSequence()); int size = serdeParams.getColumnTypes().size(); field = new BytesRefWritable[size]; for (int i = 0; i < size; i++) { field[i] = new BytesRefWritable(); serializeCache.set(i, field[i]); } LOG.debug("ColumnarSerDe initialized with: columnNames=" + serdeParams.getColumnNames() + " columnTypes=" + serdeParams.getColumnTypes() + " separator=" + Arrays.asList(serdeParams.getSeparators()) + " nullstring=" + serdeParams.getNullString()); }
private void createRCFile(final String fileName, final int numRecords, final int maxColumns, boolean addNullValue) throws IOException { // Write the sequence file SequenceFile.Metadata metadata = getMetadataForRCFile(); Configuration conf = new Configuration(); conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns)); Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName)); RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null, metadata, null); for (int row = 0; row < numRecords; row++) { BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns); dataWrite.resetValid(maxColumns); for (int column = 0; column < maxColumns; column++) { Writable sampleText = new Text( "ROW-NUM:" + row + ", COLUMN-NUM:" + column); // Set the last column of the last row as null if (addNullValue && column == maxColumns - 1 && row == numRecords - 1) { sampleText = NullWritable.get(); } ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput(); sampleText.write(dataOutput); dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray())); } rcFileWriter.append(dataWrite); } rcFileWriter.close(); }