bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) .setDescription("The tuple in the bag") .setType(DataType.TUPLE); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema))
byte pigType = TypeUtil.getPigDataTypeForPhoenixType(phoenixDataType); ResourceFieldSchema field = new ResourceFieldSchema(); field.setType(pigType).setName(cinfo.getDisplayName()); fields[i++] = field;
@Test public void testSchema() throws SQLException, IOException { final Configuration configuration = mock(Configuration.class); when(configuration.get(PhoenixConfigurationUtil.SCHEMA_TYPE)).thenReturn(SchemaType.TABLE.name()); final ResourceSchema actual = PhoenixPigSchemaUtil.getResourceSchema( configuration, new Dependencies() { List<ColumnInfo> getSelectColumnMetadataList( Configuration configuration) throws SQLException { return Lists.newArrayList(ID_COLUMN, NAME_COLUMN); } }); // expected schema. final ResourceFieldSchema[] fields = new ResourceFieldSchema[2]; fields[0] = new ResourceFieldSchema().setName("ID") .setType(DataType.LONG); fields[1] = new ResourceFieldSchema().setName("NAME") .setType(DataType.CHARARRAY); final ResourceSchema expected = new ResourceSchema().setFields(fields); assertEquals(expected.toString(), actual.toString()); }
keyFieldSchema.setName("key"); keyFieldSchema.setType(getPigType(marshallers.get(MarshallerType.KEY_VALIDATOR))); ResourceFieldSchema bagField = new ResourceFieldSchema(); bagField.setType(DataType.BAG); bagField.setName("columns"); ResourceFieldSchema bagcolSchema = new ResourceFieldSchema(); ResourceFieldSchema bagvalSchema = new ResourceFieldSchema(); bagcolSchema.setName("name"); bagvalSchema.setName("value"); bagcolSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR))); bagvalSchema.setType(getPigType(marshallers.get(MarshallerType.DEFAULT_VALIDATOR))); innerTupleField.setType(DataType.TUPLE); innerTupleField.setSchema(innerTupleSchema); innerTupleField.setName(new String(cdef.getName())); idxColSchema.setName("name"); idxColSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR))); if (validator == null) validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR); valSchema.setName("value"); valSchema.setType(getPigType(validator)); idxSchema.setName("index_" + new String(cdef.getName())); AbstractType validator = validators.get(cdef.name); if (validator == null)
rf.setName(f.name()); Schema fieldSchema = f.schema(); if (isNullableUnion(fieldSchema)) { innerResourceSchema = avroSchemaToResourceSchema(elementSchema, schemasInStack, alreadyDefinedSchemas, allowRecursiveSchema); bagSchemaFields[0].setName(elementSchema.getName()); break; case UNION: mapSchemaFields[0] = new ResourceSchema.ResourceFieldSchema(); mapSchemaFields[0].setType(DataType.TUPLE); mapSchemaFields[0].setName(mapAvroSchema.getName()); mapSchemaFields[0].setSchema(innerResourceSchemaRecord); mapSchemaFields[0].setDescription(fieldSchema.getDoc());
for (int i=0;i<typeInfos.size();i++) { innerFs[i] = getResourceFieldSchema(typeInfos.get(i)); innerFs[i].setName(names.get(i));
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) .setDescription("The tuple in the bag") .setType(DataType.TUPLE); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema))
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) .setDescription("The tuple in the bag") .setType(DataType.TUPLE); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema))
bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) .setDescription("The tuple in the bag") .setType(DataType.TUPLE); } else { ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) .setDescription("The inner field in the tuple in the bag") .setType(getPigType(arrayElementFieldSchema))
byte pigType = TypeUtil.getPigDataTypeForPhoenixType(phoenixDataType); ResourceFieldSchema field = new ResourceFieldSchema(); field.setType(pigType).setName(cinfo.getDisplayName()); fields[i++] = field;
rfs.setName(fieldName); rfs.setDescription(fieldName);
private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) throws IOException { ResourceFieldSchema rfSchema; // if we are dealing with a bag or tuple column - need to worry about subschema if (hfs.getType() == Type.STRUCT) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getTupleSubSchema(hfs)); } else if (hfs.getType() == Type.ARRAY) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getBagSubSchema(hfs)); } else { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(null); // no munging inner-schemas } return rfSchema; }
rfs.setName(fieldName); rfs.setDescription(fieldName);
private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) throws IOException { ResourceFieldSchema rfSchema; // if we are dealing with a bag or tuple column - need to worry about subschema if (hfs.getType() == Type.STRUCT) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getTupleSubSchema(hfs)); } else if (hfs.getType() == Type.ARRAY) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getBagSubSchema(hfs)); } else { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(null); // no munging inner-schemas } return rfSchema; }
/** schema: (value, value, value) where keys are in the front. */ public ResourceSchema getSchema(String location, Job job) throws IOException { setLocation(location, job); CfInfo cfInfo = getCfInfo(loadSignature); CfDef cfDef = cfInfo.cfDef; // top-level schema, no type ResourceSchema schema = new ResourceSchema(); // get default marshallers and validators Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef); Map<ByteBuffer, AbstractType> validators = getValidatorMap(cfDef); // will contain all fields for this schema List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>(); for (ColumnDef cdef : cfDef.column_metadata) { ResourceFieldSchema valSchema = new ResourceFieldSchema(); AbstractType validator = validators.get(cdef.name); if (validator == null) validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR); valSchema.setName(new String(cdef.getName())); valSchema.setType(getPigType(validator)); allSchemaFields.add(valSchema); } // top level schema contains everything schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()])); return schema; }
/** * Creates a new ResourceFieldSchema which reflects data from an input RequiredField. * * @param field * @return new ResourceFieldSchema which reflects {@code field}. * @throws IOException */ public static ResourceFieldSchema createResourceFieldSchema(RequiredField field) throws IOException { ResourceFieldSchema schema = new ResourceFieldSchema().setName(field.getAlias()).setType(field.getType()); List<RequiredField> subFields = field.getSubFields(); if (subFields != null && !subFields.isEmpty()) { ResourceFieldSchema[] subSchemaFields = new ResourceFieldSchema[subFields.size()]; int i = 0; for (RequiredField subField : subFields) { subSchemaFields[i++] = createResourceFieldSchema(subField); } ResourceSchema subSchema = new ResourceSchema(); subSchema.setFields(subSchemaFields); schema.setSchema(subSchema); } return schema; }
private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) throws IOException { ResourceFieldSchema rfSchema; // if we are dealing with a bag or tuple column - need to worry about subschema if (hfs.getType() == Type.STRUCT) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getTupleSubSchema(hfs)); } else if (hfs.getType() == Type.ARRAY) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getBagSubSchema(hfs)); } else { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(null); // no munging inner-schemas } return rfSchema; }
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { // determine key field schema ResourceFieldSchema keySchema = config.keyConverter.getLoadSchema(); if (keySchema == null) { return null; } keySchema.setName("key"); // determine value field schema ResourceFieldSchema valueSchema = config.valueConverter.getLoadSchema(); if (valueSchema == null) { return null; } valueSchema.setName("value"); // return tuple schema ResourceSchema resourceSchema = new ResourceSchema(); resourceSchema.setFields(new ResourceFieldSchema[] { keySchema, valueSchema }); return resourceSchema; }
private static ResourceFieldSchema convert(Types.NestedField field) throws IOException { ResourceFieldSchema result = convert(field.type()); result.setName(field.name()); result.setDescription(format("FieldId: %s", field.fieldId())); return result; }
private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) throws IOException { ResourceFieldSchema rfSchema; // if we are dealing with a bag or tuple column - need to worry about subschema if (hfs.getType() == Type.STRUCT) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getTupleSubSchema(hfs)); } else if (hfs.getType() == Type.ARRAY) { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(getBagSubSchema(hfs)); } else { rfSchema = new ResourceFieldSchema() .setName(hfs.getName()) .setDescription(hfs.getComment()) .setType(getPigType(hfs)) .setSchema(null); // no munging inner-schemas } return rfSchema; }