list.add(rf.getIndex());
private static void addField(RequiredField field, List<String> fields, FieldAlias fa, String currentNode) { if (field.getSubFields() != null && !field.getSubFields().isEmpty()) { for (RequiredField subField : field.getSubFields()) { addField(subField, fields, fa, currentNode + "." + fa.toES(subField.getAlias())); } } else { fields.add(fa.toES(field.getAlias())); } }
for(ColumnConfig columnConfig: super.columnConfigList) { if(columnConfig.isTarget()) { requiredFieldList.add(new RequiredField(columnConfig.getColumnName(), columnConfig.getColumnNum(), null, DataType.FLOAT)); } else { requiredFieldList.add(new RequiredField(columnConfig.getColumnName(), columnConfig .getColumnNum(), null, DataType.FLOAT)); requiredFieldList.add(new RequiredField(columnConfig.getColumnName(), columnConfig .getColumnNum(), null, DataType.FLOAT)); requiredFieldList.add(new RequiredField("weight", columnConfigList.size(), null, DataType.DOUBLE));
int columnIndex = requiredFieldList.getFields().get(index).getIndex(); if(columnIndex >= super.columnConfigList.size()) { assert element != null;
requiredField = new RequiredField(); requiredField.setIndex(i); requiredField.setAlias(s.getField(i).alias); requiredField.setType(s.getField(i).type); List<RequiredField> subFields = new ArrayList<RequiredField>(); for( String key : required.first.get(i) ) { RequiredField subField = new RequiredField(key,-1,null,DataType.BYTEARRAY); subFields.add(subField); requiredField.setSubFields(subFields); requiredFields.add(requiredField); requiredField = new RequiredField(); requiredField.setIndex(i); requiredField.setAlias(s.getField(i).alias); requiredField.setType(s.getField(i).type); requiredFields.add(requiredField); columnRequired[rf.getIndex()] = true; List<RequiredField> sub = rf.getSubFields(); if (sub != null) { message.append("Map key required for " + load.getAlias() + ": $" + rf.getIndex() + "->" + sub + "\n"); LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, rf.getIndex()); innerPlan.add(innerLoad); innerPlan.connect(innerLoad, gen); List<LoadPushDown.RequiredField> fieldList = requiredFields.getFields();
for (RequiredField rf : rfl) { try { Schema.Field f = oldSchema.getField(rf.getAlias()); if (f == null) { return null; if (getPigType(f.schema()) != rf.getType()) { return null; return null; if (rf.getSubFields() == null) { fields.add( new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultValue())); } else { Schema innerSchema = newSchemaFromRequiredFieldList(f.schema(), rf.getSubFields()); if (innerSchema == null) { return null;
@Override public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) { List<Type> newFields = new ArrayList<Type>(); List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>(); try { if(requiredFieldsList == null) { int index = 0; for(FieldSchema fs : pigSchema.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(fs, index++)); } } else { for(RequiredField rf : requiredFieldsList.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(pigSchema.getField(rf.getAlias()), rf.getIndex())); } } for (Pair<FieldSchema, Integer> p : indexedFields) { FieldSchema fieldSchema = pigSchema.getField(p.first.alias); if (p.second < schemaToFilter.getFieldCount()) { Type type = schemaToFilter.getFields().get(p.second); newFields.add(filter(type, fieldSchema)); } } } catch (FrontendException e) { throw new RuntimeException("Failed to filter requested fields", e); } return newFields; } }
@Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (requiredFieldList.getFields() != null) { int lastColumn = -1; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()>lastColumn) { lastColumn = rf.getIndex(); } } mRequiredColumns = new boolean[lastColumn+1]; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()!=-1) mRequiredColumns[rf.getIndex()] = true; } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature, ObjectSerializer.serialize(mRequiredColumns)); } catch (Exception e) { throw new RuntimeException("Cannot serialize mRequiredColumns"); } } return new RequiredFieldResponse(true); }
@Override public void setLocation(String location, Job job) throws IOException { // The location is assumed to be comma separated paths. FileInputFormat.setInputPaths(job, location); requiredFieldList = (RequiredFieldList) getFromUDFContext(PRUNE_PROJECTION_INFO); // If we encounter a PushDown Projection we strip the requestedFields to only the needed ones // This pruning will very effectively push the projection down into the actual parser system. if (requiredFieldList != null && originalRequestedFields == null) { // Avoid pruning twice !! Set<Integer> requestedFieldIndexes = new HashSet<>(); for (RequiredField requiredField : requiredFieldList.getFields()) { requestedFieldIndexes.add(requiredField.getIndex()); } List<String> prunedRequestedFields = new ArrayList<>(requestedFieldIndexes.size()); int index = 0; for (String field : requestedFields) { if (requestedFieldIndexes.contains(index)) { prunedRequestedFields.add(field); } ++index; } originalRequestedFields = requestedFields; requestedFields = prunedRequestedFields; } }
@Override public void setLocation(String location, Job job) throws IOException { // The location is assumed to be comma separated paths. FileInputFormat.setInputPaths(job, location); requiredFieldList = (RequiredFieldList) getFromUDFContext(PRUNE_PROJECTION_INFO); // If we encounter a PushDown Projection we strip the requestedFields to only the needed ones // This pruning will very effectively push the projection down into the actual parser system. if (requiredFieldList != null && originalRequestedFields == null) { // Avoid pruning twice !! Set<Integer> requestedFieldIndexes = new HashSet<>(); for (RequiredField requiredField : requiredFieldList.getFields()) { requestedFieldIndexes.add(requiredField.getIndex()); } List<String> prunedRequestedFields = new ArrayList<>(requestedFieldIndexes.size()); int index = 0; for (String field : requestedFields) { if (requestedFieldIndexes.contains(index)) { prunedRequestedFields.add(field); } ++index; } originalRequestedFields = requestedFields; requestedFields = prunedRequestedFields; } }
private Schema getSchemaFromRequiredFieldList(Schema schema, List<RequiredField> fieldList) throws FrontendException { Schema s = new Schema(); for (RequiredField rf : fieldList) { FieldSchema f; try { f = schema.getField(rf.getAlias()).clone(); } catch (CloneNotSupportedException e) { throw new FrontendException("Clone not supported for the fieldschema", e); } if (rf.getSubFields() == null) { s.add(f); } else { Schema innerSchema = getSchemaFromRequiredFieldList(f.schema, rf.getSubFields()); if (innerSchema == null) { return null; } else { f.schema = innerSchema; s.add(f); } } } return s; }
/** * Creates a new ResourceFieldSchema which reflects data from an input RequiredField. * * @param field * @return new ResourceFieldSchema which reflects {@code field}. * @throws IOException */ public static ResourceFieldSchema createResourceFieldSchema(RequiredField field) throws IOException { ResourceFieldSchema schema = new ResourceFieldSchema().setName(field.getAlias()).setType(field.getType()); List<RequiredField> subFields = field.getSubFields(); if (subFields != null && !subFields.isEmpty()) { ResourceFieldSchema[] subSchemaFields = new ResourceFieldSchema[subFields.size()]; int i = 0; for (RequiredField subField : subFields) { subSchemaFields[i++] = createResourceFieldSchema(subField); } ResourceSchema subSchema = new ResourceSchema(); subSchema.setFields(subSchemaFields); schema.setSchema(subSchema); } return schema; }
@Override public RequiredFieldResponse pushProjection( RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (requiredFieldList.getFields() != null) { int schemaSize = ((StructTypeInfo)typeInfo).getAllStructFieldTypeInfos().size(); mRequiredColumns = new boolean[schemaSize]; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()!=-1) mRequiredColumns[rf.getIndex()] = true; } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature + RequiredColumnsSuffix, ObjectSerializer.serialize(mRequiredColumns)); } catch (Exception e) { throw new RuntimeException("Cannot serialize mRequiredColumns"); } } return new RequiredFieldResponse(true); }
@Override public void setLocation(String location, Job job) throws IOException { super.setLocation(location, job); if (requiredFieldList != null) { readKey = readValue = false; for (RequiredField field : requiredFieldList.getFields()) { int i = field.getIndex(); switch (i) { case 0: readKey = true; break; case 1: readValue = true; break; default: // TODO fix Pig's silent ignorance of FrontendExceptions thrown from here throw new FrontendException("Expected field index in [0, 1] but found index " + i); } } } }
public ProjectedThriftTupleFactory(TypeRef<T> typeRef, RequiredFieldList requiredFieldList) { tStructDesc = TStructDescriptor.getInstance(typeRef.getRawClass()); int numFields = tStructDesc.getFields().size(); if (requiredFieldList != null) { List<RequiredField> tupleFields = requiredFieldList.getFields(); requiredFields = new int[tupleFields.size()]; // should we handle nested projections? not yet. int i = 0; for(RequiredField f : tupleFields) { Preconditions.checkState(f.getIndex() < numFields, "Projected index is out of range"); requiredFields[i++] = f.getIndex(); } } else { // all the fields are required requiredFields = new int[numFields]; for (int i=0; i < numFields; i++) { requiredFields[i] = i; } } }
public ProjectedProtobufTupleFactory(TypeRef<M> typeRef, RequiredFieldList requiredFieldList) { List<FieldDescriptor> protoFields = Protobufs.getMessageDescriptor(typeRef.getRawClass()).getFields(); protoConv = new ProtobufToPig(); if (requiredFieldList != null) { List<RequiredField> tupleFields = requiredFieldList.getFields(); requiredFields = Lists.newArrayListWithCapacity(tupleFields.size()); // should we handle nested projections? for(RequiredField f : tupleFields) { requiredFields.add(protoFields.get(f.getIndex())); } } else { requiredFields = protoFields; } }
HCatSchema getHCatSchema(List<RequiredField> fields, String signature, Class<?> classForUDFCLookup) throws IOException { if (fields == null) { return null; } Properties props = UDFContext.getUDFContext().getUDFProperties( classForUDFCLookup, new String[]{signature}); HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); ArrayList<HCatFieldSchema> fcols = new ArrayList<HCatFieldSchema>(); for (RequiredField rf : fields) { fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); } return new HCatSchema(fcols); }