HCatSchema getHCatSchema(List<RequiredField> fields, String signature, Class<?> classForUDFCLookup) throws IOException { if (fields == null) { return null; } Properties props = UDFContext.getUDFContext().getUDFProperties( classForUDFCLookup, new String[]{signature}); HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); ArrayList<HCatFieldSchema> fcols = new ArrayList<HCatFieldSchema>(); for (RequiredField rf : fields) { fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); } return new HCatSchema(fcols); }
list.add(rf.getIndex());
@Override public void setLocation(String location, Job job) throws IOException { // The location is assumed to be comma separated paths. FileInputFormat.setInputPaths(job, location); requiredFieldList = (RequiredFieldList) getFromUDFContext(PRUNE_PROJECTION_INFO); // If we encounter a PushDown Projection we strip the requestedFields to only the needed ones // This pruning will very effectively push the projection down into the actual parser system. if (requiredFieldList != null && originalRequestedFields == null) { // Avoid pruning twice !! Set<Integer> requestedFieldIndexes = new HashSet<>(); for (RequiredField requiredField : requiredFieldList.getFields()) { requestedFieldIndexes.add(requiredField.getIndex()); } List<String> prunedRequestedFields = new ArrayList<>(requestedFieldIndexes.size()); int index = 0; for (String field : requestedFields) { if (requestedFieldIndexes.contains(index)) { prunedRequestedFields.add(field); } ++index; } originalRequestedFields = requestedFields; requestedFields = prunedRequestedFields; } }
int columnIndex = requiredFieldList.getFields().get(index).getIndex(); if(columnIndex >= super.columnConfigList.size()) { assert element != null;
columnRequired[rf.getIndex()] = true; List<RequiredField> sub = rf.getSubFields(); if (sub != null) { message.append("Map key required for " + load.getAlias() + ": $" + rf.getIndex() + "->" + sub + "\n"); LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, rf.getIndex()); innerPlan.add(innerLoad); innerPlan.connect(innerLoad, gen); List<LoadPushDown.RequiredField> fieldList = requiredFields.getFields(); for (int i=0; i<fieldList.size(); i++) { requiredIndexes.add(fieldList.get(i).getIndex()); newSchema.addField(s.getField(fieldList.get(i).getIndex()));
@Override public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) { List<Type> newFields = new ArrayList<Type>(); List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>(); try { if(requiredFieldsList == null) { int index = 0; for(FieldSchema fs : pigSchema.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(fs, index++)); } } else { for(RequiredField rf : requiredFieldsList.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(pigSchema.getField(rf.getAlias()), rf.getIndex())); } } for (Pair<FieldSchema, Integer> p : indexedFields) { FieldSchema fieldSchema = pigSchema.getField(p.first.alias); if (p.second < schemaToFilter.getFieldCount()) { Type type = schemaToFilter.getFields().get(p.second); newFields.add(filter(type, fieldSchema)); } } } catch (FrontendException e) { throw new RuntimeException("Failed to filter requested fields", e); } return newFields; } }
@Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (requiredFieldList.getFields() != null) { int lastColumn = -1; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()>lastColumn) { lastColumn = rf.getIndex(); } } mRequiredColumns = new boolean[lastColumn+1]; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()!=-1) mRequiredColumns[rf.getIndex()] = true; } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature, ObjectSerializer.serialize(mRequiredColumns)); } catch (Exception e) { throw new RuntimeException("Cannot serialize mRequiredColumns"); } } return new RequiredFieldResponse(true); }
@Override public void setLocation(String location, Job job) throws IOException { // The location is assumed to be comma separated paths. FileInputFormat.setInputPaths(job, location); requiredFieldList = (RequiredFieldList) getFromUDFContext(PRUNE_PROJECTION_INFO); // If we encounter a PushDown Projection we strip the requestedFields to only the needed ones // This pruning will very effectively push the projection down into the actual parser system. if (requiredFieldList != null && originalRequestedFields == null) { // Avoid pruning twice !! Set<Integer> requestedFieldIndexes = new HashSet<>(); for (RequiredField requiredField : requiredFieldList.getFields()) { requestedFieldIndexes.add(requiredField.getIndex()); } List<String> prunedRequestedFields = new ArrayList<>(requestedFieldIndexes.size()); int index = 0; for (String field : requestedFields) { if (requestedFieldIndexes.contains(index)) { prunedRequestedFields.add(field); } ++index; } originalRequestedFields = requestedFields; requestedFields = prunedRequestedFields; } }
@Override public RequiredFieldResponse pushProjection( RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (requiredFieldList.getFields() != null) { int schemaSize = ((StructTypeInfo)typeInfo).getAllStructFieldTypeInfos().size(); mRequiredColumns = new boolean[schemaSize]; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()!=-1) mRequiredColumns[rf.getIndex()] = true; } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature + RequiredColumnsSuffix, ObjectSerializer.serialize(mRequiredColumns)); } catch (Exception e) { throw new RuntimeException("Cannot serialize mRequiredColumns"); } } return new RequiredFieldResponse(true); }
@Override public void setLocation(String location, Job job) throws IOException { super.setLocation(location, job); if (requiredFieldList != null) { readKey = readValue = false; for (RequiredField field : requiredFieldList.getFields()) { int i = field.getIndex(); switch (i) { case 0: readKey = true; break; case 1: readValue = true; break; default: // TODO fix Pig's silent ignorance of FrontendExceptions thrown from here throw new FrontendException("Expected field index in [0, 1] but found index " + i); } } } }
public ProjectedThriftTupleFactory(TypeRef<T> typeRef, RequiredFieldList requiredFieldList) { tStructDesc = TStructDescriptor.getInstance(typeRef.getRawClass()); int numFields = tStructDesc.getFields().size(); if (requiredFieldList != null) { List<RequiredField> tupleFields = requiredFieldList.getFields(); requiredFields = new int[tupleFields.size()]; // should we handle nested projections? not yet. int i = 0; for(RequiredField f : tupleFields) { Preconditions.checkState(f.getIndex() < numFields, "Projected index is out of range"); requiredFields[i++] = f.getIndex(); } } else { // all the fields are required requiredFields = new int[numFields]; for (int i=0; i < numFields; i++) { requiredFields[i] = i; } } }
public ProjectedProtobufTupleFactory(TypeRef<M> typeRef, RequiredFieldList requiredFieldList) { List<FieldDescriptor> protoFields = Protobufs.getMessageDescriptor(typeRef.getRawClass()).getFields(); protoConv = new ProtobufToPig(); if (requiredFieldList != null) { List<RequiredField> tupleFields = requiredFieldList.getFields(); requiredFields = Lists.newArrayListWithCapacity(tupleFields.size()); // should we handle nested projections? for(RequiredField f : tupleFields) { requiredFields.add(protoFields.get(f.getIndex())); } } else { requiredFields = protoFields; } }
HCatSchema getHCatSchema(List<RequiredField> fields, String signature, Class<?> classForUDFCLookup) throws IOException { if (fields == null) { return null; } Properties props = UDFContext.getUDFContext().getUDFProperties( classForUDFCLookup, new String[]{signature}); HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); ArrayList<HCatFieldSchema> fcols = new ArrayList<HCatFieldSchema>(); for (RequiredField rf : fields) { fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); } return new HCatSchema(fcols); }
HCatSchema getHCatSchema(List<RequiredField> fields, String signature, Class<?> classForUDFCLookup) throws IOException { if (fields == null) { return null; } Properties props = UDFContext.getUDFContext().getUDFProperties( classForUDFCLookup, new String[]{signature}); HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); ArrayList<HCatFieldSchema> fcols = new ArrayList<HCatFieldSchema>(); for (RequiredField rf : fields) { fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); } return new HCatSchema(fcols); }
HCatSchema getHCatSchema(List<RequiredField> fields, String signature, Class<?> classForUDFCLookup) throws IOException { if (fields == null) { return null; } Properties props = UDFContext.getUDFContext().getUDFProperties( classForUDFCLookup, new String[]{signature}); HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); ArrayList<HCatFieldSchema> fcols = new ArrayList<HCatFieldSchema>(); for (RequiredField rf : fields) { fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); } return new HCatSchema(fcols); }