public static ColumnMappings parseColumnsMapping( String columnsMappingSpec, boolean doColumnRegexMatching) throws SerDeException { return parseColumnsMapping(columnsMappingSpec, doColumnRegexMatching, false); } /**
@Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { try { HBaseSerDe.configureJobConf(tableDesc, jobConf);
HBaseSerDe hBaseSerDe, ExprNodeDesc predicate) { ColumnMapping keyMapping = hBaseSerDe.getHBaseSerdeParam().getKeyColumnMapping(); ColumnMapping tsMapping = hBaseSerDe.getHBaseSerdeParam().getTimestampColumnMapping(); IndexPredicateAnalyzer analyzer = HiveHBaseTableInputFormat.newIndexPredicateAnalyzer( keyMapping.columnName, keyMapping.isComparable(),
/** * Since there are assertions in the code, when running this test it throws an assertion error * and not the error in a production setup. The Properties.java object that is passed to the serDe * initializer, is passed with empty value "" for "columns.comments" key for hbase backed tables. */ public void testEmptyColumnComment() throws SerDeException { HBaseSerDe serDe = new HBaseSerDe(); Properties tbl = createPropertiesForValueStruct(); tbl.setProperty("columns.comments", ""); serDe.initialize(new Configuration(), tbl); }
private void deserializeAndSerialize( HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData) throws SerDeException { // Get the row structure StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals(9, fieldRefs.size()); // Deserialize Object row = serDe.deserialize(new ResultWritable(r)); for (int i = 0; i < fieldRefs.size(); i++) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i)); if (fieldData != null) { fieldData = ((LazyPrimitive<?, ?>)fieldData).getWritableObject(); } assertEquals("Field " + i, expectedFieldsData[i], fieldData); } // Serialize assertEquals(PutWritable.class, serDe.getSerializedClass()); PutWritable serializedPut = (PutWritable) serDe.serialize(row, oi); assertEquals("Serialized data", p.toString(),String.valueOf(serializedPut.getPut())); }
private void deserializeAndSerializeHBaseCompositeKey(HBaseSerDe serDe, Result r, Put p) throws SerDeException, IOException { StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); Object row = serDe.deserialize(new ResultWritable(r)); for (int j = 0; j < fieldRefs.size(); j++) { Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j)); assertNotNull(fieldData); } assertEquals( "{\"key\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"},\"astring\":\"This is a test data\"}", SerDeUtils.getJSONString(row, soi)); // Now serialize Put put = ((PutWritable) serDe.serialize(row, soi)).getPut(); assertEquals("Serialized put:", p.toString(), put.toString()); }
columnsMapping = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping); } catch (SerDeException e) { throw new IOException(e); iKey = HBaseSerDe.getRowKeyColumnOffset(columnsMapping); } catch (SerDeException e) { throw new IOException(e);
columnsMapping = parseColumnsMapping(hbaseColumnsMapping); parseColumnStorageTypes(hbaseTableStorageType); setKeyColumnOffset();
private boolean serialize( Object obj, ObjectInspector objInspector, int level, boolean writeBinary) throws IOException { if (objInspector.getCategory() == Category.PRIMITIVE && writeBinary) { LazyUtils.writePrimitive(serializeStream, obj, (PrimitiveObjectInspector) objInspector); return true; } else { return serialize(obj, objInspector, level); } }
((StructTypeInfo) serdeParams.getRowTypeInfo()) .getAllStructFieldNames().size() > 0) ? ((StructObjectInspector)getObjectInspector()).getAllStructFieldRefs() : null; byte [] key = serializeField(iKey, null, fields, list, declaredFields); serializeField(i, put, fields, list, declaredFields);
@Override public DecomposedPredicate decomposePredicate( JobConf jobConf, Deserializer deserializer, ExprNodeDesc predicate) { HBaseKeyFactory keyFactory = ((HBaseSerDe) deserializer).getKeyFactory(); return keyFactory.decomposePredicate(jobConf, deserializer, predicate); }
HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForColumnPrefixes();
private void deserializeAndSerializeHiveAvro(HBaseSerDe serDe, Result r, Put p, Object[] expectedFieldsData, String expectedDeserializedAvroString) throws SerDeException, IOException { StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); Object row = serDe.deserialize(new ResultWritable(r)); for (int j = 0; j < fieldRefs.size(); j++) { Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j)); assertNotNull(fieldData); assertEquals(expectedFieldsData[j], fieldData.toString().trim()); } assertEquals(expectedDeserializedAvroString, SerDeUtils.getJSONString(row, soi)); // Now serialize Put put = ((PutWritable) serDe.serialize(row, soi)).getPut(); assertNotNull(put); assertEquals(p.getFamilyCellMap(), put.getFamilyCellMap()); }
columnsMapping = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping); } catch (SerDeException e) { throw new IOException(e); iKey = HBaseSerDe.getRowKeyColumnOffset(columnsMapping); } catch (SerDeException e) { throw new IOException(e);
public void testHBaseSerDeWithAvroSchemaInline() throws SerDeException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualAvro = "avro".getBytes(); byte[] rowKey = Bytes.toBytes("test-row1"); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA); kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")}; // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForHiveAvroSchemaInline(); serDe.initialize(conf, tbl); deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData, EXPECTED_DESERIALIZED_AVRO_STRING); }
serializeStream.write(separator); serialize(list.get(i), eoi, level + 1); serializeStream.write(separator); serialize(entry.getKey(), koi, level+2); serializeStream.write(keyValueSeparator); serialize(entry.getValue(), voi, level+2); serialize(list.get(i), fields.get(i).getFieldObjectInspector(), level + 1);
@Override public DecomposedPredicate decomposePredicate( JobConf jobConf, Deserializer deserializer, ExprNodeDesc predicate) { HBaseKeyFactory keyFactory = ((HBaseSerDe) deserializer).getKeyFactory(); return keyFactory.decomposePredicate(jobConf, deserializer, predicate); }
public void testHBaseSerDeCompositeKeyWithSeparator() throws SerDeException, TException, IOException { byte[] cfa = "cola".getBytes(); byte[] qualStruct = "struct".getBytes(); TestStruct testStruct = new TestStruct("A", "B", "C", true, (byte) 45); byte[] rowKey = testStruct.getBytes(); // Data List<Cell> kvs = new ArrayList<Cell>(); byte[] testData = "This is a test data".getBytes(); kvs.add(new KeyValue(rowKey, cfa, qualStruct, testData)); Result r = Result.create(kvs); Put p = new Put(rowKey); // Post serialization, separators are automatically inserted between different fields in the // struct. Currently there is not way to disable that. So the work around here is to pad the // data with the separator bytes before creating a "Put" object p.add(new KeyValue(rowKey, cfa, qualStruct, testData)); // Create, initialize, and test the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); Properties tbl = createPropertiesForCompositeKeyWithSeparator(); SerDeUtils.initializeSerDe(serDe, conf, tbl, null); deserializeAndSerializeHBaseCompositeKey(serDe, r, p); }
public static ColumnMappings parseColumnsMapping(String columnsMappingSpec) throws SerDeException { return parseColumnsMapping(columnsMappingSpec, true); }
private void deserializeAndSerializeHBaseValueStruct(HBaseSerDe serDe, Result r, Put p) throws SerDeException, IOException { StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs(); Object row = serDe.deserialize(new ResultWritable(r)); Object fieldData = null; for (int j = 0; j < fieldRefs.size(); j++) { fieldData = soi.getStructFieldData(row, fieldRefs.get(j)); assertNotNull(fieldData); if (fieldData instanceof LazyStruct) { assertEquals(((LazyStruct) fieldData).getField(0).toString(), "A"); assertEquals(((LazyStruct) fieldData).getField(1).toString(), "B"); assertEquals(((LazyStruct) fieldData).getField(2).toString(), "C"); } else { Assert.fail("fieldData should be an instance of LazyStruct"); } } assertEquals( "{\"key\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"},\"astring\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"}}", SerDeUtils.getJSONString(row, soi)); // Now serialize Put put = ((PutWritable) serDe.serialize(row, soi)).getPut(); assertEquals("Serialized put:", p.toString(), put.toString()); }