@Override public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) { List<FieldSchema> fields = requestedPigSchema.getFields(); List<Type> newFields = new ArrayList<Type>(); for (int i = 0; i < fields.size(); i++) { FieldSchema fieldSchema = fields.get(i); String name = name(fieldSchema.alias, "field_"+i); if (schemaToFilter.containsField(name)) { newFields.add(filter(schemaToFilter.getType(name), fieldSchema)); } } return newFields; } }
@Override protected boolean containsPath(String[] path, int depth) { if (depth == path.length) { return false; } return containsField(path[depth]) && getType(path[depth]).containsPath(path, depth + 1); }
private boolean contains(GroupType group, String[] path, int index) { if (index == path.length) { return false; } if (group.containsField(path[index])) { Type type = group.getType(path[index]); if (type.isPrimitive()) { return index + 1 == path.length; } else { return contains(type.asGroupType(), path, index + 1); } } return false; }
private boolean contains(GroupType group, String[] path, int index) { if (index == path.length) { return false; } if (group.containsField(path[index])) { Type type = group.getType(path[index]); if (type.isPrimitive()) { return index + 1 == path.length; } else { return contains(type.asGroupType(), path, index + 1); } } return false; }
public TupleConverter(GroupType parquetSchema, Schema pigSchema, boolean elephantBirdCompatible, boolean columnIndexAccess) { this.parquetSchema = parquetSchema; this.elephantBirdCompatible = elephantBirdCompatible; try { this.schemaSize = max(parquetSchema.getFieldCount(), pigSchema.getFields().size()); this.converters = new Converter[this.schemaSize]; for (int i = 0, c = 0; i < schemaSize; i++) { FieldSchema field = pigSchema.getField(i); if(parquetSchema.containsField(field.alias) || columnIndexAccess) { Type type = getType(columnIndexAccess, field.alias, i); if(type != null) { final int index = i; converters[c++] = newConverter(field, type, new ParentValueContainer() { @Override void add(Object value) { TupleConverter.this.set(index, value); } }, elephantBirdCompatible, columnIndexAccess); } } } } catch (FrontendException e) { throw new ParquetDecodingException("can not initialize pig converter from:\n" + parquetSchema + "\n" + pigSchema, e); } }
if (toMerge.containsField(type.getName())) { Type fieldToMerge = toMerge.getType(type.getName()); if (fieldToMerge.getRepetition().isMoreRestrictiveThan(type.getRepetition())) { if (!this.containsField(type.getName())) { newFields.add(type);
private void visitChildren(GroupColumnIO newIO, GroupType groupType, GroupType requestedGroupType) { GroupColumnIO oldIO = current; current = newIO; for (Type type : groupType.getFields()) { // if the file schema does not contain the field it will just stay null if (requestedGroupType.containsField(type.getName())) { currentRequestedIndex = requestedGroupType.getFieldIndex(type.getName()); currentRequestedType = requestedGroupType.getType(currentRequestedIndex); if (currentRequestedType.getRepetition().isMoreRestrictiveThan(type.getRepetition())) { incompatibleSchema(type, currentRequestedType); } type.accept(this); } } current = oldIO; }
private boolean hasMissingRequiredFieldInGroupType(GroupType requested, GroupType fullSchema) { for (Type field : fullSchema.getFields()) { if (requested.containsField(field.getName())) { Type requestedType = requested.getType(field.getName()); // if a field is in requested schema and the type of it is a group type, then do recursive check if (!field.isPrimitive()) { if (hasMissingRequiredFieldInGroupType(requestedType.asGroupType(), field.asGroupType())) { return true; } else { continue;// check next field } } } else { if (field.getRepetition() == Type.Repetition.REQUIRED) { return true; // if a field is missing in requested schema and it's required } else { continue; // the missing field is not required, then continue checking next field } } } return false; }