@Override public Object doDeserialize(Writable blob) throws SerDeException { if (byteArrayRef == null) { byteArrayRef = new ByteArrayRef(); } // we use the default field delimiter('\1') to replace the multiple-char field delimiter // but we cannot use it to parse the row since column data can contain '\1' as well String rowStr; if (blob instanceof BytesWritable) { BytesWritable b = (BytesWritable) blob; rowStr = new String(b.getBytes()); } else if (blob instanceof Text) { Text rowText = (Text) blob; rowStr = rowText.toString(); } else { throw new SerDeException(getClass() + ": expects either BytesWritable or Text object!"); } byteArrayRef.setData(rowStr.replaceAll(Pattern.quote(fieldDelimited), "\1").getBytes()); cachedLazyStruct.init(byteArrayRef, 0, byteArrayRef.getData().length); // use the multi-char delimiter to parse the lazy struct cachedLazyStruct.parseMultiDelimit(rowStr.getBytes(), fieldDelimited.getBytes()); return cachedLazyStruct; }
Object rowField = row.getField(fieldID);
/** * Create a hierarchical LazyObject based on the given typeInfo. */ public static LazyObject<? extends ObjectInspector> createLazyObject(ObjectInspector oi) { ObjectInspector.Category c = oi.getCategory(); switch (c) { case PRIMITIVE: return createLazyPrimitiveClass((PrimitiveObjectInspector) oi); case MAP: return new LazyMap((LazyMapObjectInspector) oi); case LIST: return new LazyArray((LazyListObjectInspector) oi); case STRUCT: return new LazyStruct((LazySimpleStructObjectInspector) oi); case UNION: return new LazyUnion((LazyUnionObjectInspector) oi); } throw new RuntimeException("Hive LazySerDe Internal error."); }
/** * Get one field out of the struct. * * If the field is a primitive field, return the actual object. Otherwise * return the LazyObject. This is because PrimitiveObjectInspector does not * have control over the object used by the user - the user simply directly * use the Object instead of going through Object * PrimitiveObjectInspector.get(Object). * * @param fieldID * The field ID * @return The field as a LazyObject */ public Object getField(int fieldID) { if (!parsed) { parse(); } return uncheckedGetField(fieldID); }
/** * Deserialize a row from the Writable to a LazyObject. * * @param field * the Writable that contains the data * @return The deserialized row Object. * @see org.apache.hadoop.hive.serde2.AbstractSerDe#deserialize(Writable) */ @Override public Object doDeserialize(Writable field) throws SerDeException { if (byteArrayRef == null) { byteArrayRef = new ByteArrayRef(); } BinaryComparable b = (BinaryComparable) field; byteArrayRef.setData(b.getBytes()); cachedLazyStruct.init(byteArrayRef, 0, b.getLength()); lastOperationSerialize = false; lastOperationDeserialize = true; return cachedLazyStruct; }
protected final void initLazyFields(List<? extends StructField> fieldRefs) { fields = new LazyObjectBase[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { try { fields[i] = createLazyField(i, fieldRefs.get(i)); } catch (Exception e) { throw new RuntimeException(e); } } fieldInited = new boolean[fields.length]; // Extra element to make sure we have the same formula to compute the // length of each element of the array. startPosition = new int[fields.length + 1]; }
/** * Returns the statistics after (de)serialization) */ @Override public SerDeStats getSerDeStats() { // must be different assert (lastOperationSerialize != lastOperationDeserialize); if (lastOperationSerialize) { stats.setRawDataSize(serializedSize); } else { stats.setRawDataSize(cachedLazyStruct.getRawDataSerializedSize()); } return stats; }
public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) { if (rawRow == null || fieldDelimit == null) { return; } if (fields == null) { List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); fields = new LazyObject[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector()); } fieldInited = new boolean[fields.length]; startPosition = new int[fields.length + 1]; } // the indexes of the delimiters int[] delimitIndexes = findIndexes(rawRow, fieldDelimit); int diff = fieldDelimit.length - 1; // first field always starts from 0, even when missing startPosition[0] = 0; for (int i = 1; i < fields.length; i++) { if (delimitIndexes[i - 1] != -1) { int start = delimitIndexes[i - 1] + fieldDelimit.length; startPosition[i] = start - i * diff; } else { startPosition[i] = length + 1; } } startPosition[fields.length] = length + 1; Arrays.fill(fieldInited, false); parsed = true; }
initLazyFields(oi.getAllStructFieldRefs());
byte[] data = ((LazyStruct) struct).getBytes(); AvroDeserializer deserializer = new AvroDeserializer();
/** * Get the values of the fields as an ArrayList. * * @return The values of the fields as an ArrayList. */ public List<Object> getFieldsAsList() { if (!parsed) { parse(); } if (cachedList == null) { cachedList = new ArrayList<Object>(); } else { cachedList.clear(); } for (int i = 0; i < fields.length; i++) { cachedList.add(uncheckedGetField(i)); } return cachedList; }
/** * Deserialize a row from the Writable to a LazyObject. * * @param field * the Writable that contains the data * @return The deserialized row Object. * @see SerDe#deserialize(Writable) */ public Object deserialize(Writable field) throws SerDeException { if (byteArrayRef == null) { byteArrayRef = new ByteArrayRef(); } if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; // For backward-compatibility with hadoop 0.17 byteArrayRef.setData(b.get()); cachedLazyStruct.init(byteArrayRef, 0, b.getSize()); } else if (field instanceof Text) { Text t = (Text) field; byteArrayRef.setData(t.getBytes()); cachedLazyStruct.init(byteArrayRef, 0, t.getLength()); } else { throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!"); } return cachedLazyStruct; }
protected final void initLazyFields(List<? extends StructField> fieldRefs) { fields = new LazyObjectBase[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { try { fields[i] = createLazyField(i, fieldRefs.get(i)); } catch (Exception e) { throw new RuntimeException(e); } } fieldInited = new boolean[fields.length]; // Extra element to make sure we have the same formula to compute the // length of each element of the array. startPosition = new int[fields.length + 1]; }
/** * Returns the statistics after (de)serialization) */ @Override public SerDeStats getSerDeStats() { // must be different assert (lastOperationSerialize != lastOperationDeserialize); if (lastOperationSerialize) { stats.setRawDataSize(serializedSize); } else { stats.setRawDataSize(cachedLazyStruct.getRawDataSerializedSize()); } return stats; }
public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) { if (rawRow == null || fieldDelimit == null) { return; } if (fields == null) { List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); fields = new LazyObject[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector()); } fieldInited = new boolean[fields.length]; startPosition = new int[fields.length + 1]; } // the indexes of the delimiters int[] delimitIndexes = findIndexes(rawRow, fieldDelimit); int diff = fieldDelimit.length - 1; // first field always starts from 0, even when missing startPosition[0] = 0; for (int i = 1; i < fields.length; i++) { if (delimitIndexes[i - 1] != -1) { int start = delimitIndexes[i - 1] + fieldDelimit.length; startPosition[i] = start - i * diff; } else { startPosition[i] = length + 1; } } startPosition[fields.length] = length + 1; Arrays.fill(fieldInited, false); parsed = true; }
initLazyFields(oi.getAllStructFieldRefs());
byte[] data = ((LazyStruct) struct).getBytes(); AvroDeserializer deserializer = new AvroDeserializer();
/** * Get one field out of the struct. * * If the field is a primitive field, return the actual object. Otherwise * return the LazyObject. This is because PrimitiveObjectInspector does not * have control over the object used by the user - the user simply directly * use the Object instead of going through Object * PrimitiveObjectInspector.get(Object). * * @param fieldID * The field ID * @return The field as a LazyObject */ public Object getField(int fieldID) { if (!parsed) { parse(); } return uncheckedGetField(fieldID); }
@Override public Object deserialize(Writable blob) throws SerDeException { if (byteArrayRef == null) { byteArrayRef = new ByteArrayRef(); } // we use the default field delimiter('\1') to replace the multiple-char field delimiter // but we cannot use it to parse the row since column data can contain '\1' as well String rowStr; if (blob instanceof BytesWritable) { BytesWritable b = (BytesWritable) blob; rowStr = new String(b.getBytes()); } else if (blob instanceof Text) { Text rowText = (Text) blob; rowStr = rowText.toString(); } else { throw new SerDeException(getClass() + ": expects either BytesWritable or Text object!"); } byteArrayRef.setData(rowStr.replaceAll(Pattern.quote(fieldDelimited), "\1").getBytes()); cachedLazyStruct.init(byteArrayRef, 0, byteArrayRef.getData().length); // use the multi-char delimiter to parse the lazy struct cachedLazyStruct.parseMultiDelimit(rowStr.getBytes(), fieldDelimited.getBytes()); return cachedLazyStruct; }
/** * Deserialize a row from the Writable to a LazyObject. * * @param field * the Writable that contains the data * @return The deserialized row Object. * @see org.apache.hadoop.hive.serde2.AbstractSerDe#deserialize(Writable) */ @Override public Object doDeserialize(Writable field) throws SerDeException { if (byteArrayRef == null) { byteArrayRef = new ByteArrayRef(); } BinaryComparable b = (BinaryComparable) field; byteArrayRef.setData(b.getBytes()); cachedLazyStruct.init(byteArrayRef, 0, b.getLength()); lastOperationSerialize = false; lastOperationDeserialize = true; return cachedLazyStruct; }