ArrayList<Integer> list = new ArrayList<Integer>(requiredFieldsInfo.getFields().size()); for (RequiredField rf : requiredFieldsInfo.getFields()) { list.add(rf.getIndex()); outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) {
RequiredFieldList requiredFieldList = new RequiredFieldList(); boolean hasCandidates = CommonUtils.hasCandidateColumns(columnConfigList); for(ColumnConfig columnConfig: super.columnConfigList) { if(columnConfig.isTarget()) { requiredFieldList.add(new RequiredField(columnConfig.getColumnName(), columnConfig.getColumnNum(), null, DataType.FLOAT)); } else { requiredFieldList.add(new RequiredField(columnConfig.getColumnName(), columnConfig .getColumnNum(), null, DataType.FLOAT)); requiredFieldList.add(new RequiredField(columnConfig.getColumnName(), columnConfig .getColumnNum(), null, DataType.FLOAT)); requiredFieldList.add(new RequiredField("weight", columnConfigList.size(), null, DataType.DOUBLE));
int columnIndex = requiredFieldList.getFields().get(index).getIndex(); if(columnIndex >= super.columnConfigList.size()) { assert element != null;
ArrayList<Integer> list = new ArrayList<Integer>(requiredFieldsInfo.getFields().size()); for (RequiredField rf : requiredFieldsInfo.getFields()) { list.add(rf.getIndex()); outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) {
ArrayList<Integer> list = new ArrayList<Integer>(requiredFieldsInfo.getFields().size()); for (RequiredField rf : requiredFieldsInfo.getFields()) { list.add(rf.getIndex()); outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) {
outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); HCatRSInputFormat.setOutputSchema(job, outputSchema);
requiredItems.get(load); RequiredFieldList requiredFields = new RequiredFieldList(); requiredFields.add(requiredField); requiredField.setAlias(s.getField(i).alias); requiredField.setType(s.getField(i).type); requiredFields.add(requiredField); for (RequiredField rf : requiredFields.getFields()) columnRequired[rf.getIndex()] = true; for(RequiredField rf: requiredFields.getFields()) { List<RequiredField> sub = rf.getSubFields(); if (sub != null) { LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[requiredFields.getFields().size()]); innerPlan.add(gen); for (int i=0; i<requiredFields.getFields().size(); i++) { LoadPushDown.RequiredField rf = requiredFields.getFields().get(i); LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, rf.getIndex()); innerPlan.add(innerLoad); List<LoadPushDown.RequiredField> fieldList = requiredFields.getFields(); for (int i=0; i<fieldList.size(); i++) { requiredIndexes.add(fieldList.get(i).getIndex());
@Override public RequiredFieldResponse pushProjection( RequiredFieldList requiredFieldList) throws FrontendException { List<RequiredField> requiredFields = requiredFieldList.getFields(); List<ColumnInfo> newColumns = Lists.newArrayListWithExpectedSize(requiredFields.size());
@Override public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) { List<Type> newFields = new ArrayList<Type>(); List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>(); try { if(requiredFieldsList == null) { int index = 0; for(FieldSchema fs : pigSchema.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(fs, index++)); } } else { for(RequiredField rf : requiredFieldsList.getFields()) { indexedFields.add(new Pair<FieldSchema, Integer>(pigSchema.getField(rf.getAlias()), rf.getIndex())); } } for (Pair<FieldSchema, Integer> p : indexedFields) { FieldSchema fieldSchema = pigSchema.getField(p.first.alias); if (p.second < schemaToFilter.getFieldCount()) { Type type = schemaToFilter.getFields().get(p.second); newFields.add(filter(type, fieldSchema)); } } } catch (FrontendException e) { throw new RuntimeException("Failed to filter requested fields", e); } return newFields; } }
@Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (requiredFieldList.getFields() != null) { int lastColumn = -1; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()>lastColumn) { lastColumn = rf.getIndex(); } } mRequiredColumns = new boolean[lastColumn+1]; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()!=-1) mRequiredColumns[rf.getIndex()] = true; } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature, ObjectSerializer.serialize(mRequiredColumns)); } catch (Exception e) { throw new RuntimeException("Cannot serialize mRequiredColumns"); } } return new RequiredFieldResponse(true); }
@Override public void setLocation(String location, Job job) throws IOException { // The location is assumed to be comma separated paths. FileInputFormat.setInputPaths(job, location); requiredFieldList = (RequiredFieldList) getFromUDFContext(PRUNE_PROJECTION_INFO); // If we encounter a PushDown Projection we strip the requestedFields to only the needed ones // This pruning will very effectively push the projection down into the actual parser system. if (requiredFieldList != null && originalRequestedFields == null) { // Avoid pruning twice !! Set<Integer> requestedFieldIndexes = new HashSet<>(); for (RequiredField requiredField : requiredFieldList.getFields()) { requestedFieldIndexes.add(requiredField.getIndex()); } List<String> prunedRequestedFields = new ArrayList<>(requestedFieldIndexes.size()); int index = 0; for (String field : requestedFields) { if (requestedFieldIndexes.contains(index)) { prunedRequestedFields.add(field); } ++index; } originalRequestedFields = requestedFields; requestedFields = prunedRequestedFields; } }
@Override public void setLocation(String location, Job job) throws IOException { // The location is assumed to be comma separated paths. FileInputFormat.setInputPaths(job, location); requiredFieldList = (RequiredFieldList) getFromUDFContext(PRUNE_PROJECTION_INFO); // If we encounter a PushDown Projection we strip the requestedFields to only the needed ones // This pruning will very effectively push the projection down into the actual parser system. if (requiredFieldList != null && originalRequestedFields == null) { // Avoid pruning twice !! Set<Integer> requestedFieldIndexes = new HashSet<>(); for (RequiredField requiredField : requiredFieldList.getFields()) { requestedFieldIndexes.add(requiredField.getIndex()); } List<String> prunedRequestedFields = new ArrayList<>(requestedFieldIndexes.size()); int index = 0; for (String field : requestedFields) { if (requestedFieldIndexes.contains(index)) { prunedRequestedFields.add(field); } ++index; } originalRequestedFields = requestedFields; requestedFields = prunedRequestedFields; } }
@Override public RequiredFieldResponse pushProjection( RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (requiredFieldList.getFields() != null) { int schemaSize = ((StructTypeInfo)typeInfo).getAllStructFieldTypeInfos().size(); mRequiredColumns = new boolean[schemaSize]; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()!=-1) mRequiredColumns[rf.getIndex()] = true; } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature + RequiredColumnsSuffix, ObjectSerializer.serialize(mRequiredColumns)); } catch (Exception e) { throw new RuntimeException("Cannot serialize mRequiredColumns"); } } return new RequiredFieldResponse(true); }
@Override public void setLocation(String location, Job job) throws IOException { super.setLocation(location, job); if (requiredFieldList != null) { readKey = readValue = false; for (RequiredField field : requiredFieldList.getFields()) { int i = field.getIndex(); switch (i) { case 0: readKey = true; break; case 1: readValue = true; break; default: // TODO fix Pig's silent ignorance of FrontendExceptions thrown from here throw new FrontendException("Expected field index in [0, 1] but found index " + i); } } } }
public ProjectedThriftTupleFactory(TypeRef<T> typeRef, RequiredFieldList requiredFieldList) { tStructDesc = TStructDescriptor.getInstance(typeRef.getRawClass()); int numFields = tStructDesc.getFields().size(); if (requiredFieldList != null) { List<RequiredField> tupleFields = requiredFieldList.getFields(); requiredFields = new int[tupleFields.size()]; // should we handle nested projections? not yet. int i = 0; for(RequiredField f : tupleFields) { Preconditions.checkState(f.getIndex() < numFields, "Projected index is out of range"); requiredFields[i++] = f.getIndex(); } } else { // all the fields are required requiredFields = new int[numFields]; for (int i=0; i < numFields; i++) { requiredFields[i] = i; } } }
public ProjectedProtobufTupleFactory(TypeRef<M> typeRef, RequiredFieldList requiredFieldList) { List<FieldDescriptor> protoFields = Protobufs.getMessageDescriptor(typeRef.getRawClass()).getFields(); protoConv = new ProtobufToPig(); if (requiredFieldList != null) { List<RequiredField> tupleFields = requiredFieldList.getFields(); requiredFields = Lists.newArrayListWithCapacity(tupleFields.size()); // should we handle nested projections? for(RequiredField f : tupleFields) { requiredFields.add(protoFields.get(f.getIndex())); } } else { requiredFields = protoFields; } }
@Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException { this.requiredFieldList = requiredFieldList; if (requiredFieldList == null) return null; schema = getSchemaFromRequiredFieldList(schema, requiredFieldList.getFields()); storeInUDFContext(PARQUET_PIG_SCHEMA, pigSchemaToString(schema)); storeInUDFContext(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList)); return new RequiredFieldResponse(true); }
/** * Takes an Avro Schema and a Pig RequiredFieldList and returns a new schema * with only the requried fields, or no if the function can't extract only * those fields. Useful for push down projections. * @param oldSchema The avro schema from which to extract the schema * @param rfl the Pig required field list * @return the new schema, or null */ public static Schema newSchemaFromRequiredFieldList( final Schema oldSchema, final RequiredFieldList rfl) { return newSchemaFromRequiredFieldList(oldSchema, rfl.getFields()); }
static String asProjection(RequiredFieldList list, Properties props) { List<String> fields = new ArrayList<String>(); FieldAlias alias = alias(new PropertiesSettings(props)); for (RequiredField field : list.getFields()) { addField(field, fields, alias, ""); } return StringUtils.concatenate(fields, ","); }
@Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) { LOG.info(format("[%s]: pushProjection() -> %s", signature, requiredFieldList)); try { List<String> projection = requiredFieldList.getFields().stream().map(RequiredField::getAlias).collect(Collectors.toList()); storeInUDFContext(ICEBERG_PROJECTED_FIELDS, (Serializable) projection); } catch (IOException e) { throw new RuntimeException(e); } return new RequiredFieldResponse(true); }