/** * Since there are assertions in the code, when running this test it throws an assertion error * and not the error in a production setup. The Properties.java object that is passed to the serDe * initializer, is passed with empty value "" for "columns.comments" key for hbase backed tables. */ public void testEmptyColumnComment() throws SerDeException { HBaseSerDe serDe = new HBaseSerDe(); Properties tbl = createPropertiesForValueStruct(); tbl.setProperty("columns.comments", ""); serDe.initialize(new Configuration(), tbl); }
public void testHBaseSerDeWithAvroSchemaInline() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroSchemaInline(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING); }
public void testHBaseSerDeWithBackwardEvolvedSchema() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA_EVOLVED); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroBackwardEvolvedSchema(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING); }
public void testHBaseSerDeWithForwardEvolvedSchema() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, test, true, 42432234234]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroForwardEvolvedSchema(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING_3); }
public void testHBaseSerDeWithAvroExternalSchema() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromClass2(1); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = { new String("test-row1"), new String( "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + "[999, 1234567890], [999, 1234455555]]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroExternalSchema(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING_2); }
public void testHBaseSerDeWithAvroSerClass() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromClass1(1); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = { new String("test-row1"), new String( "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], " + "[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + "[999, 1234567890], [999, 1234455555]]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroSerClass(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING_2); }
Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroColumnFamilyMap(); serDe.initialize(conf, tbl);
public void testHBaseSerDeCustomStructValue() throws IOException, SerDeException { byte[] cfa = "cola".getBytes(); byte[] qualStruct = "struct".getBytes(); TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0); byte[] key = testStruct.getBytes(); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] testData = testStruct.getBytes(); kvs.add(new KeyValue(key, cfa, qualStruct, testData)); Result r = Result.create(kvs); byte[] putKey = testStruct.getBytesWithDelimiters(); Put p = new Put(putKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(putKey, cfa, qualStruct, Bytes.padTail(testData, 2))); // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForValueStruct(); serDe.initialize(conf, tbl); deserializeAndSerializeHBaseValueStruct(serDe, r, p); }
Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS); serDe.initialize(conf, tbl);