public void setSchema(ResourceSchema schema) { schemaField = new ResourceFieldSchema(); schemaField.setType(DataType.TUPLE); try { schemaField.setSchema(schema); } catch (IOException ex) { throw new EsHadoopIllegalStateException(String.format("Cannot use schema [%s]", schema), ex); } }
HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); if (arrayElementFieldSchema.getType() == Type.STRUCT) { bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { ResourceSchema s = new ResourceSchema(); List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()])); bagSubFieldSchemas[0].setSchema(s); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema)) .setSchema(null); // the element type is not a tuple - so no subschema bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
@Override public void checkSchema(ResourceSchema rs) throws IOException { ResourceFieldSchema fs = new ResourceFieldSchema(); fs.setType(DataType.TUPLE); fs.setSchema(rs); typeInfo = HiveUtils.getTypeInfo(fs); Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); p.setProperty(signature + SchemaSignatureSuffix, ObjectSerializer.serialize(typeInfo)); }
bagSchemaFields[0].setSchema(innerResourceSchema); bagSchema.setFields(bagSchemaFields); rf.setSchema(bagSchema); mapSchemaFields[0].setType(DataType.TUPLE); mapSchemaFields[0].setName(mapAvroSchema.getName()); mapSchemaFields[0].setSchema(innerResourceSchemaRecord); mapSchemaFields[0].setDescription(fieldSchema.getDoc()); mapSchema.setFields(mapSchemaFields); rf.setSchema(mapSchema); break; case MAP: avroSchemaToResourceSchema(mapAvroSchema, schemasInStack, alreadyDefinedSchemas, allowRecursiveSchema); rf.setSchema(innerResourceSchema); break; default: mapSchemaFields[0].setType(getPigType(mapAvroSchema)); mapSchema.setFields(mapSchemaFields); rf.setSchema(mapSchema); case DataType.TUPLE: if (alreadyDefinedSchemas.containsKey(fieldSchema.getFullName())) { rf.setSchema(alreadyDefinedSchemas.get(fieldSchema.getFullName())); } else { ResourceSchema innerResourceSchema = avroSchemaToResourceSchema(fieldSchema, schemasInStack,
bagvalSchema.setType(getPigType(marshallers.get(MarshallerType.DEFAULT_VALIDATOR))); bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema }); bagTupleField.setSchema(bagTupleSchema); bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField }); bagField.setSchema(bagSchema); ResourceFieldSchema innerTupleField = new ResourceFieldSchema(); innerTupleField.setType(DataType.TUPLE); innerTupleField.setSchema(innerTupleSchema); innerTupleField.setName(new String(cdef.getName()));
HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); if (arrayElementFieldSchema.getType() == Type.STRUCT) { bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { ResourceSchema s = new ResourceSchema(); getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()])); bagSubFieldSchemas[0].setSchema(s); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema)) .setSchema(null); // the element type is not a tuple - so no subschema bagSubFieldSchemas[0].setSchema( new ResourceSchema().setFields(innerTupleFieldSchemas));
HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); if (arrayElementFieldSchema.getType() == Type.STRUCT) { bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { ResourceSchema s = new ResourceSchema(); List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()])); bagSubFieldSchemas[0].setSchema(s); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema)) .setSchema(null); // the element type is not a tuple - so no subschema bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); if (arrayElementFieldSchema.getType() == Type.STRUCT) { bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { ResourceSchema s = new ResourceSchema(); List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()])); bagSubFieldSchemas[0].setSchema(s); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema)) .setSchema(null); // the element type is not a tuple - so no subschema bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
tupleSchema.setSchema(elementSchema);
private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) throws IOException { ResourceFieldSchema rfSchema; // if we are dealing with a bag or tuple column - need to worry about subschema if (hfs.getType() == Type.STRUCT) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getTupleSubSchema(hfs)); } else if (hfs.getType() == Type.ARRAY) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getBagSubSchema(hfs)); } else { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(null); // no munging inner-schemas } return rfSchema; }
private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) throws IOException { ResourceFieldSchema rfSchema; // if we are dealing with a bag or tuple column - need to worry about subschema if (hfs.getType() == Type.STRUCT) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getTupleSubSchema(hfs)); } else if (hfs.getType() == Type.ARRAY) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getBagSubSchema(hfs)); } else { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(null); // no munging inner-schemas } return rfSchema; }
/** * Creates a new ResourceFieldSchema which reflects data from an input RequiredField. * * @param field * @return new ResourceFieldSchema which reflects {@code field}. * @throws IOException */ public static ResourceFieldSchema createResourceFieldSchema(RequiredField field) throws IOException { ResourceFieldSchema schema = new ResourceFieldSchema().setName(field.getAlias()).setType(field.getType()); List<RequiredField> subFields = field.getSubFields(); if (subFields != null && !subFields.isEmpty()) { ResourceFieldSchema[] subSchemaFields = new ResourceFieldSchema[subFields.size()]; int i = 0; for (RequiredField subField : subFields) { subSchemaFields[i++] = createResourceFieldSchema(subField); } ResourceSchema subSchema = new ResourceSchema(); subSchema.setFields(subSchemaFields); schema.setSchema(subSchema); } return schema; }
private void init(Schema inputSchema, GenericUDF evalUDF, ConstantObjectInspectInfo constantsInfo) throws IOException { ResourceSchema rs = new ResourceSchema(inputSchema); ResourceFieldSchema wrappedTupleFieldSchema = new ResourceFieldSchema(); wrappedTupleFieldSchema.setType(DataType.TUPLE); wrappedTupleFieldSchema.setSchema(rs); TypeInfo ti = HiveUtils.getTypeInfo(wrappedTupleFieldSchema); inputObjectInspector = (StructObjectInspector)HiveUtils.createObjectInspector(ti); try { ObjectInspector[] arguments = new ObjectInspector[inputSchema.size()]; for (int i=0;i<inputSchema.size();i++) { if (constantsInfo!=null && !constantsInfo.isEmpty() && constantsInfo.get(i)!=null) { arguments[i] = constantsInfo.get(i); } else { arguments[i] = inputObjectInspector.getAllStructFieldRefs().get(i).getFieldObjectInspector(); } } outputObjectInspector = evalUDF.initialize(arguments); } catch (Exception e) { throw new IOException(e); } } }
private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) throws IOException { ResourceFieldSchema rfSchema; // if we are dealing with a bag or tuple column - need to worry about subschema if (hfs.getType() == Type.STRUCT) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getTupleSubSchema(hfs)); } else if (hfs.getType() == Type.ARRAY) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getBagSubSchema(hfs)); } else { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(null); // no munging inner-schemas } return rfSchema; }
@Override public Schema outputSchema(Schema input) { try { if (!inited) { schemaInfo.init(getInputSchema(), udtf, constantsInfo); inited = true; } ResourceFieldSchema rfs = HiveUtils.getResourceFieldSchema( TypeInfoUtils.getTypeInfoFromObjectInspector(schemaInfo.outputObjectInspector)); ResourceSchema tupleSchema = new ResourceSchema(); tupleSchema.setFields(new ResourceFieldSchema[] {rfs}); ResourceFieldSchema bagFieldSchema = new ResourceFieldSchema(); bagFieldSchema.setType(DataType.BAG); bagFieldSchema.setSchema(tupleSchema); ResourceSchema bagSchema = new ResourceSchema(); bagSchema.setFields(new ResourceFieldSchema[] {bagFieldSchema}); return Schema.getPigSchema(bagSchema); } catch (Exception e) { throw new RuntimeException(e); } }
private void init(Schema inputSchema, GenericUDTF udtf, ConstantObjectInspectInfo constantsInfo) throws IOException { ResourceSchema rs = new ResourceSchema(inputSchema); ResourceFieldSchema wrappedTupleFieldSchema = new ResourceFieldSchema(); wrappedTupleFieldSchema.setType(DataType.TUPLE); wrappedTupleFieldSchema.setSchema(rs); TypeInfo ti = HiveUtils.getTypeInfo(wrappedTupleFieldSchema); inputObjectInspector = (StructObjectInspector)HiveUtils.createObjectInspector(ti); if (constantsInfo!=null) { constantsInfo.injectConstantObjectInspector(inputObjectInspector); } try { outputObjectInspector = udtf.initialize(inputObjectInspector); } catch (Exception e) { throw new IOException(e); } } }
private static ResourceFieldSchema convert(Type type) throws IOException { ResourceFieldSchema result = new ResourceFieldSchema(); result.setType(convertType(type)); if (!type.isPrimitiveType()) { result.setSchema(convertComplex(type)); } return result; }
public void setSchema(ResourceSchema schema) { schemaField = new ResourceFieldSchema(); schemaField.setType(DataType.TUPLE); try { schemaField.setSchema(schema); } catch (IOException ex) { throw new EsHadoopIllegalStateException(String.format("Cannot use schema [%s]", schema), ex); } }
private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) throws IOException { ResourceFieldSchema rfSchema; // if we are dealing with a bag or tuple column - need to worry about subschema if (hfs.getType() == Type.STRUCT) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getTupleSubSchema(hfs)); } else if (hfs.getType() == Type.ARRAY) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getBagSubSchema(hfs)); } else { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(null); // no munging inner-schemas } return rfSchema; }