/** * Since there are assertions in the code, when running this test it throws an assertion error * and not the error in a production setup. The Properties.java object that is passed to the serDe * initializer, is passed with empty value "" for "columns.comments" key for hbase backed tables. */ public void testEmptyColumnComment() throws SerDeException { HBaseSerDe serDe = new HBaseSerDe(); Properties tbl = createPropertiesForValueStruct(); tbl.setProperty("columns.comments", ""); serDe.initialize(new Configuration(), tbl); }
public void testHBaseSerDeWithAvroSchemaInline() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroSchemaInline(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING); }
HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForColumnPrefixes();
public void testHBaseSerDeWithBackwardEvolvedSchema() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA_EVOLVED); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroBackwardEvolvedSchema(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING); }
public void testHBaseSerDeWithForwardEvolvedSchema() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, test, true, 42432234234]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroForwardEvolvedSchema(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING_3); }
public void testHBaseSerDeWithAvroExternalSchema() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromClass2(1); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = { new String("test-row1"), new String( "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + "[999, 1234567890], [999, 1234455555]]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroExternalSchema(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING_2); }
public void testHBaseSerDeCompositeKeyWithSeparator() throws SerDeException, TException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualStruct = "struct".getBytes(); TestStruct testStruct = new TestStruct("A", "B", "C", true, (byte) 45); byte[] rowKey = testStruct.getBytes(); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] testData = "This is a test data".getBytes(); kvs.add(new KeyValue(rowKey, cfa, qualStruct, testData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualStruct, testData)); // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForCompositeKeyWithSeparator(); SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerializeHBaseCompositeKey(serDe, r, p); }
public void testHBaseSerDeWithAvroSerClass() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromClass1(1); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = { new String("test-row1"), new String( "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], " + "[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + "[999, 1234567890], [999, 1234455555]]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroSerClass(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING_2); }
}; HBaseSerDe hbaseSerDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveMapHBaseColumnFamilyII_I(); columnFamilies, columnQualifiersAndValues); hbaseSerDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesForHiveMapHBaseColumnFamilyII_II();
HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroColumnFamilyMap();
public void testHBaseSerDeCompositeKeyWithoutSeparator() throws SerDeException, TException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualStruct = "struct".getBytes(); TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0); byte[] rowKey = testStruct.getBytes(); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] testData = "This is a test data".getBytes(); kvs.add(new KeyValue(rowKey, cfa, qualStruct, testData)); Result r = Result.create(kvs); byte[] putRowKey = testStruct.getBytesWithDelimiters(); Put p = new Put(putRowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(putRowKey, cfa, qualStruct, testData)); // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForCompositeKeyWithoutSeparator(); SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerializeHBaseCompositeKey(serDe, r, p); }
new DoubleWritable(Double.MAX_VALUE), new BooleanWritable(true)}}; HBaseSerDe hbaseSerDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveMapHBaseColumnFamily(); columnFamilies, columnQualifiersAndValues); hbaseSerDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesForHiveMapHBaseColumnFamilyII();
public void testHBaseSerDeWithTimestamp() throws SerDeException { HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesI_I();
HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesI_I(); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesI_II(); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesI_III(); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesI_IV();
public void testHBaseSerDeCustomStructValue() throws IOException, SerDeException { byte[] cfa = "cola".getBytes(); byte[] qualStruct = "struct".getBytes(); TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0); byte[] key = testStruct.getBytes(); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] testData = testStruct.getBytes(); kvs.add(new KeyValue(key, cfa, qualStruct, testData)); Result r = Result.create(kvs); byte[] putKey = testStruct.getBytesWithDelimiters(); Put p = new Put(putKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(putKey, cfa, qualStruct, Bytes.padTail(testData, 2))); // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForValueStruct(); serDe.initialize(conf, tbl); deserializeAndSerializeHBaseValueStruct(serDe, r, p); }
HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesII_I(); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesII_II(); serDe = new HBaseSerDe(); conf = new Configuration(); tbl = createPropertiesII_III();
HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS);