/** * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required * before you can write out a record via write. * @return an HCatRecord that can be serialized * @throws HCatException */ public HCatRecord getWritable() throws HCatException { DefaultHCatRecord d = new DefaultHCatRecord(); d.copy(this); return d; } }
private static HCatRecord getRecord(int i) { List<Object> list = new ArrayList<Object>(2); list.add("Row #: " + i); list.add(i); return new DefaultHCatRecord(list); }
private HCatRecord getHCatRecord() throws Exception { List<Object> rec_1 = new ArrayList<Object>(4); rec_1.add( new Integer(INT_CONST)); rec_1.add( new Long(LONG_CONST)); rec_1.add( new Double(DOUBLE_CONST)); rec_1.add( new String(STRING_CONST)); return new DefaultHCatRecord(rec_1); }
protected static void generateWriteRecords(int max, int mod, int offset) { writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < max; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("strvalue" + i); objList.add(String.valueOf((i % mod) + offset)); writeRecords.add(new DefaultHCatRecord(objList)); } }
private HCatRecord getGetSet2InpRec() { List<Object> rlist = new ArrayList<Object>(); rlist.add(new byte[]{1, 2, 3}); Map<Short, String> mapcol = new HashMap<Short, String>(3); mapcol.put(new Short("2"), "hcat is cool"); mapcol.put(new Short("3"), "is it?"); mapcol.put(new Short("4"), "or is it not?"); rlist.add(mapcol); List<Integer> listcol = new ArrayList<Integer>(); listcol.add(314); listcol.add(007); rlist.add(listcol);//list rlist.add(listcol);//struct return new DefaultHCatRecord(rlist); }
/** * The purpose of the serialize method is to turn an object-representation * with a provided ObjectInspector into a Writable format, which * the underlying layer can then use to write out. * * In this case, it means that Hive will call this method to convert * an object with appropriate objectinspectors that it knows about, * to write out a HCatRecord. */ @Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { if (objInspector.getCategory() != Category.STRUCT) { throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName()); } return new DefaultHCatRecord((List<Object>) serializeStruct(obj, (StructObjectInspector) objInspector)); }
@Override public void putNext(Tuple tuple) throws IOException { List<Object> outgoing = new ArrayList<Object>(tuple.size()); int i = 0; for (HCatFieldSchema fSchema : computedSchema.getFields()) { outgoing.add(getJavaObj(tuple.get(i++), fSchema)); } try { writer.write(null, new DefaultHCatRecord(outgoing)); } catch (InterruptedException e) { throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e); } }
/** * Takes JSON string in Text form, and has to return an object representation above * it that's readable by the corresponding object inspector. * * For this implementation, since we're using the jackson parser, we can construct * our own object implementation, and we use HCatRecord for it */ @Override public Object deserialize(Writable blob) throws SerDeException { try { Object row = jsonSerde.deserialize(blob); List fatRow = fatLand((Object[]) row); return new DefaultHCatRecord(fatRow); } catch (Exception e) { throw new SerDeException(e); } }
private static HCatRecord getHCat13TypesComplexRecord() { List<Object> rec_hcat13ComplexTypes = new ArrayList<Object>(); Map<HiveDecimal, String> m = new HashMap<HiveDecimal, String>(); m.put(HiveDecimal.create(new BigDecimal("1234.12")), "1234.12"); m.put(HiveDecimal.create(new BigDecimal("1234.13")), "1234.13"); rec_hcat13ComplexTypes.add(m); Map<Timestamp, List<Object>> m2 = new HashMap<Timestamp, List<Object>>(); List<Object> list = new ArrayList<Object>(); list.add(Date.valueOf("2014-01-05")); list.add(new HashMap<HiveDecimal, String>(m)); m2.put(Timestamp.ofEpochMilli(System.currentTimeMillis()), list); rec_hcat13ComplexTypes.add(m2); return new DefaultHCatRecord(rec_hcat13ComplexTypes); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] cols = value.toString().split(","); DefaultHCatRecord record = new DefaultHCatRecord(3); record.set(0, Integer.parseInt(cols[0])); record.set(1, cols[1]); record.set(2, cols[2]); context.write(NullWritable.get(), record); } }
private static HCatRecord getHCat13TypesRecord() { List<Object> rec_hcat13types = new ArrayList<Object>(5); rec_hcat13types.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2 rec_hcat13types.add(new HiveChar("hive_char", 10)); rec_hcat13types.add(new HiveVarchar("hive_varchar", 20)); rec_hcat13types.add(Date.valueOf("2014-01-06")); rec_hcat13types.add(Timestamp.ofEpochMilli(System.currentTimeMillis())); return new DefaultHCatRecord(rec_hcat13types); } private static HCatRecord getHCat13TypesComplexRecord() {
public TestHCatPartitioned(String formatName, String serdeClass, String inputFormatClass, String outputFormatClass) throws Exception { super(formatName, serdeClass, inputFormatClass, outputFormatClass); tableName = "testHCatPartitionedTable_" + formatName; writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 20; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("strvalue" + i); writeRecords.add(new DefaultHCatRecord(objList)); } partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); }
public TestHCatNonPartitioned(String formatName, String serdeClass, String inputFormatClass, String outputFormatClass) throws Exception { super(formatName, serdeClass, inputFormatClass, outputFormatClass); dbName = null; //test if null dbName works ("default" is used) tableName = "testHCatNonPartitionedTable_" + formatName; writeRecords = new ArrayList<HCatRecord>(); for (int i = 0; i < 20; i++) { List<Object> objList = new ArrayList<Object>(); objList.add(i); objList.add("strvalue" + i); writeRecords.add(new DefaultHCatRecord(objList)); } partitionColumns = new ArrayList<HCatFieldSchema>(); partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); }
public void testUpperCaseKey() throws Exception { Configuration conf = new Configuration(); Properties props = new Properties(); props.put(serdeConstants.LIST_COLUMNS, "empid,name"); props.put(serdeConstants.LIST_COLUMN_TYPES, "int,string"); JsonSerDe rjsd = new JsonSerDe(); SerDeUtils.initializeSerDe(rjsd, conf, props, null); Text text1 = new Text("{ \"empId\" : 123, \"name\" : \"John\" } "); Text text2 = new Text("{ \"empId\" : 456, \"name\" : \"Jane\" } "); HCatRecord expected1 = new DefaultHCatRecord(Arrays.<Object>asList(123, "John")); HCatRecord expected2 = new DefaultHCatRecord(Arrays.<Object>asList(456, "Jane")); assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord)rjsd.deserialize(text1), expected1)); assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord)rjsd.deserialize(text2), expected2)); }
/** * test that we properly serialize/deserialize HCatRecordS * @throws IOException */ public void testRYW() throws IOException { File f = new File("binary.dat"); f.delete(); f.createNewFile(); f.deleteOnExit(); OutputStream fileOutStream = new FileOutputStream(f); DataOutput outStream = new DataOutputStream(fileOutStream); HCatRecord[] recs = getHCatRecords(); for (int i = 0; i < recs.length; i++) { recs[i].write(outStream); } fileOutStream.flush(); fileOutStream.close(); InputStream fInStream = new FileInputStream(f); DataInput inpStream = new DataInputStream(fInStream); for (int i = 0; i < recs.length; i++) { HCatRecord rec = new DefaultHCatRecord(); rec.readFields(inpStream); StringBuilder msg = new StringBuilder("recs[" + i + "]='" + recs[i] + "' rec='" + rec + "'"); boolean isEqual = HCatDataCheckUtil.recordsEqual(recs[i], rec, msg); Assert.assertTrue(msg.toString(), isEqual); } Assert.assertEquals(fInStream.available(), 0); fInStream.close(); }
public void testMapValues() throws Exception { Configuration conf = new Configuration(); Properties props = new Properties(); props.put(serdeConstants.LIST_COLUMNS, "a,b"); props.put(serdeConstants.LIST_COLUMN_TYPES, "array<string>,map<string,int>"); JsonSerDe rjsd = new JsonSerDe(); SerDeUtils.initializeSerDe(rjsd, conf, props, null); Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} "); Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}"); Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}"); HCatRecord expected1 = new DefaultHCatRecord(Arrays.<Object>asList( Arrays.<String>asList("aaa"), createHashMapStringInteger("bbb", 1))); HCatRecord expected2 = new DefaultHCatRecord(Arrays.<Object>asList( Arrays.<String>asList("yyy"), createHashMapStringInteger("zzz", 123))); HCatRecord expected3 = new DefaultHCatRecord(Arrays.<Object>asList( Arrays.<String>asList("a"), createHashMapStringInteger("x", 11, "y", 22, "z", null))); assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord)rjsd.deserialize(text1), expected1)); assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord)rjsd.deserialize(text2), expected2)); } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { HCatRecord record = null; String[] splits = value.toString().split(","); switch (i) { case 0: record = new DefaultHCatRecord(2); record.set(0, splits[0]); record.set(1, splits[1]); break; case 1: record = new DefaultHCatRecord(1); record.set(0, splits[0]); break; case 2: record = new DefaultHCatRecord(3); record.set(0, splits[0]); record.set(1, splits[1]); record.set(2, "extra"); break; default: Assert.fail("This should not happen!!!!!"); } MultiOutputFormat.write(tableNames[i], null, record, context); i++; } }
/** * Test get and set calls with type * @throws HCatException */ public void testGetSetByType2() throws HCatException { HCatRecord inpRec = getGetSet2InpRec(); HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); HCatSchema hsch = HCatSchemaUtils.getHCatSchema("a:binary,b:map<string,string>,c:array<int>,d:struct<i:int>"); newRec.setByteArray("a", hsch, inpRec.getByteArray("a", hsch)); newRec.setMap("b", hsch, inpRec.getMap("b", hsch)); newRec.setList("c", hsch, inpRec.getList("c", hsch)); newRec.setStruct("d", hsch, inpRec.getStruct("d", hsch)); Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); }
/** * Test type specific get/set methods on HCatRecord types added in Hive 13 * @throws HCatException */ public void testGetSetByType3() throws HCatException { HCatRecord inpRec = getHCat13TypesRecord(); HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); HCatSchema hsch = HCatSchemaUtils.getHCatSchema( "a:decimal(5,2),b:char(10),c:varchar(20),d:date,e:timestamp"); newRec.setDecimal("a", hsch, inpRec.getDecimal("a", hsch)); newRec.setChar("b", hsch, inpRec.getChar("b", hsch)); newRec.setVarchar("c", hsch, inpRec.getVarchar("c", hsch)); newRec.setDate("d", hsch, inpRec.getDate("d", hsch)); newRec.setTimestamp("e", hsch, inpRec.getTimestamp("e", hsch)); }
/** * Test get and set calls with type * @throws HCatException */ public void testGetSetByType1() throws HCatException { HCatRecord inpRec = getHCatRecords()[0]; HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); HCatSchema hsch = HCatSchemaUtils.getHCatSchema( "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string"); newRec.setByte("a", hsch, inpRec.getByte("a", hsch)); newRec.setShort("b", hsch, inpRec.getShort("b", hsch)); newRec.setInteger("c", hsch, inpRec.getInteger("c", hsch)); newRec.setLong("d", hsch, inpRec.getLong("d", hsch)); newRec.setFloat("e", hsch, inpRec.getFloat("e", hsch)); newRec.setDouble("f", hsch, inpRec.getDouble("f", hsch)); newRec.setBoolean("g", hsch, inpRec.getBoolean("g", hsch)); newRec.setString("h", hsch, inpRec.getString("h", hsch)); newRec.setByteArray("i", hsch, inpRec.getByteArray("i", hsch)); newRec.setString("j", hsch, inpRec.getString("j", hsch)); Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); }