org.apache.hadoop.hive.serde2.lazy.LazyStruct java code examples

@Override 
public Object doDeserialize(Writable blob) throws SerDeException {
 if (byteArrayRef == null) {
  byteArrayRef = new ByteArrayRef();
 }
 // we use the default field delimiter('\1') to replace the multiple-char field delimiter
 // but we cannot use it to parse the row since column data can contain '\1' as well
 String rowStr;
 if (blob instanceof BytesWritable) {
  BytesWritable b = (BytesWritable) blob;
  rowStr = new String(b.getBytes());
 } else if (blob instanceof Text) {
  Text rowText = (Text) blob;
  rowStr = rowText.toString();
 } else {
  throw new SerDeException(getClass() + ": expects either BytesWritable or Text object!");
 }
 byteArrayRef.setData(rowStr.replaceAll(Pattern.quote(fieldDelimited), "\1").getBytes());
 cachedLazyStruct.init(byteArrayRef, 0, byteArrayRef.getData().length);
 // use the multi-char delimiter to parse the lazy struct
 cachedLazyStruct.parseMultiDelimit(rowStr.getBytes(), fieldDelimited.getBytes());
 return cachedLazyStruct;
}

Object rowField = row.getField(fieldID);

/**
 * Create a hierarchical LazyObject based on the given typeInfo.
 */
public static LazyObject<? extends ObjectInspector> createLazyObject(ObjectInspector oi) {
 ObjectInspector.Category c = oi.getCategory();
 switch (c) {
 case PRIMITIVE:
  return createLazyPrimitiveClass((PrimitiveObjectInspector) oi);
 case MAP:
  return new LazyMap((LazyMapObjectInspector) oi);
 case LIST:
  return new LazyArray((LazyListObjectInspector) oi);
 case STRUCT:
  return new LazyStruct((LazySimpleStructObjectInspector) oi);
 case UNION:
  return new LazyUnion((LazyUnionObjectInspector) oi);
 }
 throw new RuntimeException("Hive LazySerDe Internal error.");
}

/**
 * Get one field out of the struct.
 *
 * If the field is a primitive field, return the actual object. Otherwise
 * return the LazyObject. This is because PrimitiveObjectInspector does not
 * have control over the object used by the user - the user simply directly
 * use the Object instead of going through Object
 * PrimitiveObjectInspector.get(Object).
 *
 * @param fieldID
 *          The field ID
 * @return The field as a LazyObject
 */
public Object getField(int fieldID) {
 if (!parsed) {
  parse();
 }
 return uncheckedGetField(fieldID);
}

/**
 * Deserialize a row from the Writable to a LazyObject.
 *
 * @param field
 *          the Writable that contains the data
 * @return The deserialized row Object.
 * @see  org.apache.hadoop.hive.serde2.AbstractSerDe#deserialize(Writable)
 */
@Override
public Object doDeserialize(Writable field) throws SerDeException {
 if (byteArrayRef == null) {
  byteArrayRef = new ByteArrayRef();
 }
 BinaryComparable b = (BinaryComparable) field;
 byteArrayRef.setData(b.getBytes());
 cachedLazyStruct.init(byteArrayRef, 0, b.getLength());
 lastOperationSerialize = false;
 lastOperationDeserialize = true;
 return cachedLazyStruct;
}

protected final void initLazyFields(List<? extends StructField> fieldRefs) {
 fields = new LazyObjectBase[fieldRefs.size()];
 for (int i = 0; i < fields.length; i++) {
  try {
   fields[i] = createLazyField(i, fieldRefs.get(i));
  } catch (Exception e) {
   throw new RuntimeException(e);
  }
 }
 fieldInited = new boolean[fields.length];
 // Extra element to make sure we have the same formula to compute the
 // length of each element of the array.
 startPosition = new int[fields.length + 1];
}

/**
 * Returns the statistics after (de)serialization)
 */
@Override
public SerDeStats getSerDeStats() {
 // must be different
 assert (lastOperationSerialize != lastOperationDeserialize);
 if (lastOperationSerialize) {
  stats.setRawDataSize(serializedSize);
 } else {
  stats.setRawDataSize(cachedLazyStruct.getRawDataSerializedSize());
 }
 return stats;
}

public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) {
 if (rawRow == null || fieldDelimit == null) {
  return;
 }
 if (fields == null) {
  List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
  fields = new LazyObject[fieldRefs.size()];
  for (int i = 0; i < fields.length; i++) {
   fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector());
  }
  fieldInited = new boolean[fields.length];
  startPosition = new int[fields.length + 1];
 }
 // the indexes of the delimiters
 int[] delimitIndexes = findIndexes(rawRow, fieldDelimit);
 int diff = fieldDelimit.length - 1;
 // first field always starts from 0, even when missing
 startPosition[0] = 0;
 for (int i = 1; i < fields.length; i++) {
  if (delimitIndexes[i - 1] != -1) {
   int start = delimitIndexes[i - 1] + fieldDelimit.length;
   startPosition[i] = start - i * diff;
  } else {
   startPosition[i] = length + 1;
  }
 }
 startPosition[fields.length] = length + 1;
 Arrays.fill(fieldInited, false);
 parsed = true;
}

initLazyFields(oi.getAllStructFieldRefs());

byte[] data = ((LazyStruct) struct).getBytes();
AvroDeserializer deserializer = new AvroDeserializer();

/**
 * Get the values of the fields as an ArrayList.
 *
 * @return The values of the fields as an ArrayList.
 */
public List<Object> getFieldsAsList() {
 if (!parsed) {
  parse();
 }
 if (cachedList == null) {
  cachedList = new ArrayList<Object>();
 } else {
  cachedList.clear();
 }
 for (int i = 0; i < fields.length; i++) {
  cachedList.add(uncheckedGetField(i));
 }
 return cachedList;
}

/**
 * Deserialize a row from the Writable to a LazyObject.
 *
 * @param field
 *          the Writable that contains the data
 * @return The deserialized row Object.
 * @see SerDe#deserialize(Writable)
 */
public Object deserialize(Writable field) throws SerDeException {
 if (byteArrayRef == null) {
  byteArrayRef = new ByteArrayRef();
 }
 if (field instanceof BytesWritable) {
  BytesWritable b = (BytesWritable) field;
  // For backward-compatibility with hadoop 0.17
  byteArrayRef.setData(b.get());
  cachedLazyStruct.init(byteArrayRef, 0, b.getSize());
 } else if (field instanceof Text) {
  Text t = (Text) field;
  byteArrayRef.setData(t.getBytes());
  cachedLazyStruct.init(byteArrayRef, 0, t.getLength());
 } else {
  throw new SerDeException(getClass().toString()
    + ": expects either BytesWritable or Text object!");
 }
 return cachedLazyStruct;
}

protected final void initLazyFields(List<? extends StructField> fieldRefs) {
 fields = new LazyObjectBase[fieldRefs.size()];
 for (int i = 0; i < fields.length; i++) {
  try {
   fields[i] = createLazyField(i, fieldRefs.get(i));
  } catch (Exception e) {
   throw new RuntimeException(e);
  }
 }
 fieldInited = new boolean[fields.length];
 // Extra element to make sure we have the same formula to compute the
 // length of each element of the array.
 startPosition = new int[fields.length + 1];
}

/**
 * Returns the statistics after (de)serialization)
 */
@Override
public SerDeStats getSerDeStats() {
 // must be different
 assert (lastOperationSerialize != lastOperationDeserialize);
 if (lastOperationSerialize) {
  stats.setRawDataSize(serializedSize);
 } else {
  stats.setRawDataSize(cachedLazyStruct.getRawDataSerializedSize());
 }
 return stats;
}

public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) {
 if (rawRow == null || fieldDelimit == null) {
  return;
 }
 if (fields == null) {
  List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
  fields = new LazyObject[fieldRefs.size()];
  for (int i = 0; i < fields.length; i++) {
   fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector());
  }
  fieldInited = new boolean[fields.length];
  startPosition = new int[fields.length + 1];
 }
 // the indexes of the delimiters
 int[] delimitIndexes = findIndexes(rawRow, fieldDelimit);
 int diff = fieldDelimit.length - 1;
 // first field always starts from 0, even when missing
 startPosition[0] = 0;
 for (int i = 1; i < fields.length; i++) {
  if (delimitIndexes[i - 1] != -1) {
   int start = delimitIndexes[i - 1] + fieldDelimit.length;
   startPosition[i] = start - i * diff;
  } else {
   startPosition[i] = length + 1;
  }
 }
 startPosition[fields.length] = length + 1;
 Arrays.fill(fieldInited, false);
 parsed = true;
}

initLazyFields(oi.getAllStructFieldRefs());

byte[] data = ((LazyStruct) struct).getBytes();
AvroDeserializer deserializer = new AvroDeserializer();

/**
 * Get one field out of the struct.
 *
 * If the field is a primitive field, return the actual object. Otherwise
 * return the LazyObject. This is because PrimitiveObjectInspector does not
 * have control over the object used by the user - the user simply directly
 * use the Object instead of going through Object
 * PrimitiveObjectInspector.get(Object).
 *
 * @param fieldID
 *          The field ID
 * @return The field as a LazyObject
 */
public Object getField(int fieldID) {
 if (!parsed) {
  parse();
 }
 return uncheckedGetField(fieldID);
}

@Override
public Object deserialize(Writable blob) throws SerDeException {
 if (byteArrayRef == null) {
  byteArrayRef = new ByteArrayRef();
 }
 // we use the default field delimiter('\1') to replace the multiple-char field delimiter
 // but we cannot use it to parse the row since column data can contain '\1' as well
 String rowStr;
 if (blob instanceof BytesWritable) {
  BytesWritable b = (BytesWritable) blob;
  rowStr = new String(b.getBytes());
 } else if (blob instanceof Text) {
  Text rowText = (Text) blob;
  rowStr = rowText.toString();
 } else {
  throw new SerDeException(getClass() + ": expects either BytesWritable or Text object!");
 }
 byteArrayRef.setData(rowStr.replaceAll(Pattern.quote(fieldDelimited), "\1").getBytes());
 cachedLazyStruct.init(byteArrayRef, 0, byteArrayRef.getData().length);
 // use the multi-char delimiter to parse the lazy struct
 cachedLazyStruct.parseMultiDelimit(rowStr.getBytes(), fieldDelimited.getBytes());
 return cachedLazyStruct;
}

/**
 * Deserialize a row from the Writable to a LazyObject.
 *
 * @param field
 *          the Writable that contains the data
 * @return The deserialized row Object.
 * @see  org.apache.hadoop.hive.serde2.AbstractSerDe#deserialize(Writable)
 */
@Override
public Object doDeserialize(Writable field) throws SerDeException {
 if (byteArrayRef == null) {
  byteArrayRef = new ByteArrayRef();
 }
 BinaryComparable b = (BinaryComparable) field;
 byteArrayRef.setData(b.getBytes());
 cachedLazyStruct.init(byteArrayRef, 0, b.getLength());
 lastOperationSerialize = false;
 lastOperationDeserialize = true;
 return cachedLazyStruct;
}

Javadoc

LazyObject for storing a struct. The field of a struct can be primitive or non-primitive. LazyStruct does not deal with the case of a NULL struct. That is handled by the parent LazyObject.

Most used methods

init
Set the row data for this LazyStruct.
getField
Get one field out of the struct. If the field is a primitive field, return the actual object. Otherw
<init>
Construct a LazyStruct object with the ObjectInspector.
parse
Parse the byte[] and fill each field.
uncheckedGetField
Get the field out of the row without checking parsed. This is called by both getField and getFieldsA
createLazyField
findIndexes
getBytes
Return the data in bytes corresponding to this given struct. This is useful specifically in cases wh
getRawDataSerializedSize
initLazyFields
isNull
getFieldsAsList
Get the values of the fields as an ArrayList.

Popular in Java

Making http post requests using okhttp
findViewById (Activity)
getExternalFilesDir (Context)
putExtra (Intent)
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
ImageIO (javax.imageio)
Runner (org.openjdk.jmh.runner)
Best IntelliJ plugins

How to useLazyStruct in org.apache.hadoop.hive.serde2.lazy

Best Java code snippets using org.apache.hadoop.hive.serde2.lazy.LazyStruct (Showing top 20 results out of 315)

How to use
LazyStruct
in
org.apache.hadoop.hive.serde2.lazy