/** * Checks to see if the given field is a schema-less Map that has values. * @return true if Map has no schema but has values (mixed schema map). false if not a Map or if Map is just empty. */ private boolean isPopulatedMixedValueMap(ResourceFieldSchema schema, int field, Tuple object) { if (schema.getType() != DataType.MAP) { // Can't be a mixed value map if it's not a map at all. return false; } try { Object fieldValue = object.get(field); Map<?, ?> map = (Map<?, ?>) fieldValue; return schema.getSchema() == null && !(map == null || map.isEmpty()); } catch (ExecException e) { throw new EsHadoopIllegalStateException(e); } }
ResourceSchema tupleSchema = schema.getSchema();
ArrayList<String> names = new ArrayList<String>(); ArrayList<TypeInfo> typeInfos = new ArrayList<TypeInfo>(); for (ResourceFieldSchema subFs : fs.getSchema().getFields()) { TypeInfo info = getTypeInfo(subFs); names.add(subFs.getName()); case DataType.BAG: ti = new ListTypeInfo(); if (fs.getSchema()==null || fs.getSchema().getFields().length!=1) { throw new IOException("Wrong bag inner schema"); ResourceFieldSchema tupleSchema = fs.getSchema().getFields()[0]; ResourceFieldSchema itemSchema = tupleSchema; if (tupleSchema.getSchema().getFields().length == 1) { itemSchema = tupleSchema.getSchema().getFields()[0]; ti = new MapTypeInfo(); TypeInfo valueField; if (fs.getSchema() == null || fs.getSchema().getFields().length != 1) { valueField = TypeInfoFactory.binaryTypeInfo; } else { valueField = getTypeInfo(fs.getSchema().getFields()[0]);
rfs.getDescription().equals("autogenerated from Pig Field Schema") ? null : rfs.getDescription(), rfs.getSchema(), definedRecordNames, doubleColonsToDoubleUnderscores); fields.add(new Schema.Field((rfsName != null)
ResourceSchema tupleSchema = schema.getSchema();
private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { if (fieldSchema==null) { throw new IOException("Schema is null"); } ResourceFieldSchema[] fss=fieldSchema.getSchema().getFields(); Tuple t; int buf; while ((buf=in.read())!='{') { if (buf==-1) { throw new IOException("Unexpect end of bag"); } } if (fss.length!=1) throw new IOException("Only tuple is allowed inside bag schema"); ResourceFieldSchema fs = fss[0]; DataBag db = DefaultBagFactory.getInstance().newDefaultBag(); while (true) { t = consumeTuple(in, fs); if (t!=null) db.add(t); while ((buf=in.read())!='}'&&buf!=',') { if (buf==-1) { throw new IOException("Unexpect end of bag"); } } if (buf=='}') break; } return db; }
public static Schema getPigSchema(ResourceSchema rSchema) throws FrontendException { if(rSchema == null) { return null; } List<FieldSchema> fsList = new ArrayList<FieldSchema>(); for(ResourceFieldSchema rfs : rSchema.getFields()) { FieldSchema fs = new FieldSchema(rfs.getName(), rfs.getSchema() == null ? null : getPigSchema(rfs.getSchema()), rfs.getType()); if(rfs.getType() == DataType.BAG) { if (fs.schema != null) { // allow partial schema if (fs.schema.size() == 1) { FieldSchema innerFs = fs.schema.getField(0); if (innerFs.type != DataType.TUPLE) { ResourceFieldSchema.throwInvalidSchemaException(); } } else { ResourceFieldSchema.throwInvalidSchemaException(); } } } fsList.add(fs); } return new Schema(fsList); }
/** * Checks to see if the given field is a schema-less Map that has values. * @return true if Map has no schema but has values (mixed schema map). false if not a Map or if Map is just empty. */ private boolean isPopulatedMixedValueMap(ResourceFieldSchema schema, int field, Tuple object) { if (schema.getType() != DataType.MAP) { // Can't be a mixed value map if it's not a map at all. return false; } try { Object fieldValue = object.get(field); Map<?, ?> map = (Map<?, ?>) fieldValue; return schema.getSchema() == null && !(map == null || map.isEmpty()); } catch (ExecException e) { throw new EsHadoopIllegalStateException(e); } }
/*** * Determine the field schema of an ResourceFieldSchema * @param rcFieldSchema the rcFieldSchema we want translated * @return the field schema corresponding to the object * @throws ExecException,FrontendException,SchemaMergeException */ public static Schema.FieldSchema determineFieldSchema(ResourceSchema.ResourceFieldSchema rcFieldSchema) throws ExecException, FrontendException, SchemaMergeException { byte dt = rcFieldSchema.getType(); Iterator<ResourceSchema.ResourceFieldSchema> fieldIter = null; long fieldNum = 0; if (dt == TUPLE || dt == BAG ) { fieldIter = Arrays.asList(rcFieldSchema.getSchema().getFields()).iterator(); fieldNum = rcFieldSchema.getSchema().getFields().length; } return determineFieldSchema(dt, fieldIter, fieldNum, ResourceSchema.ResourceFieldSchema.class); }
@Override public boolean accept(Path path) { Reader reader; try { reader = OrcFile.createReader(fs, path); ObjectInspector oip = (ObjectInspector)reader.getObjectInspector(); ResourceFieldSchema rs = HiveUtils.getResourceFieldSchema(TypeInfoUtils.getTypeInfoFromObjectInspector(oip)); if (rs.getSchema().getFields().length!=0) { return true; } } catch (IOException e) { throw new RuntimeException(e); } return false; } }
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { if (typeInfo == null) { typeInfo = getTypeInfo(location, job); // still null means case of multiple load store if (typeInfo == null) { return null; } } ResourceFieldSchema fs = HiveUtils.getResourceFieldSchema(typeInfo); return fs.getSchema(); }
@Override public void checkStoreSchema(ResourceFieldSchema schema) throws IOException { Preconditions.checkNotNull(schema, "Schema is null"); Preconditions.checkArgument(DataType.TUPLE == schema.getType(), "Expected schema type '%s' but found type '%s'", DataType.findTypeName(DataType.TUPLE), DataType.findTypeName(schema.getType())); ResourceSchema childSchema = schema.getSchema(); Preconditions.checkNotNull(childSchema, "Child schema is null"); Schema actualSchema = Schema.getPigSchema(childSchema); Preconditions.checkArgument(Schema.equals(expectedSchema, actualSchema, false, true), "Expected store schema '%s' but found schema '%s'", expectedSchema, actualSchema); }
@Override public void checkStoreSchema(ResourceFieldSchema schema) throws IOException { Preconditions.checkNotNull(schema, "Schema is null"); Preconditions.checkArgument(DataType.TUPLE == schema.getType(), "Expected schema type '%s' but found type '%s'", DataType.findTypeName(DataType.TUPLE), DataType.findTypeName(schema.getType())); ResourceSchema childSchema = schema.getSchema(); Preconditions.checkNotNull(childSchema, "Child schema is null"); Schema actualSchema = Schema.getPigSchema(childSchema); Preconditions.checkArgument(Schema.equals(expectedSchema, actualSchema, false, true), "Expected store schema '%s' but found schema '%s'", expectedSchema, actualSchema); }
private Result writeTuple(Object object, ResourceFieldSchema field, Generator generator, boolean writeTupleFieldNames, boolean isRoot) { ResourceSchema nestedSchema = field.getSchema(); Tuple currentTuple = (Tuple) object; for (ResourceFieldSchema nestedField : nestedSchema.getFields()) { allEmpty = (nestedField.getSchema() == null && !isPopulatedMixedValueMap(nestedField, currentField, currentTuple) && PigUtils.isComplexType(nestedField));