for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
@Override public void setInputSchema(Schema input) { try { // relation.bag.tuple this.inputSchema = input.getField(0).schema.getField(0).schema; saveSchemaToUDFContext(); } catch (FrontendException e) { throw new RuntimeException("Usage: B = FOREACH (GROUP A ALL) GENERATE Summary(A); Can not get schema from " + input, e); } catch (RuntimeException e) { throw new RuntimeException("Usage: B = FOREACH (GROUP A ALL) GENERATE Summary(A); Can not get schema from "+input, e); } }
private static TypeInfo getInputTypeInfo(Schema inputSchema) throws IOException { FieldSchema innerFieldSchema = inputSchema.getField(0).schema.getField(0); ResourceFieldSchema rfs = new ResourceFieldSchema(innerFieldSchema); TypeInfo inputTypeInfo = HiveUtils.getTypeInfo(rfs); return inputTypeInfo; }
protected FieldSchema getField(Schema schema, int i) { try { if (schema == null || i >= schema.size()) { return null; } FieldSchema field = schema.getField(i); return field; } catch (FrontendException e) { throw new RuntimeException(e); } }
@Override public Schema outputSchema(Schema input) { try { return new Schema(input.getField(0).schema); } catch (Exception e) { return null; } } }
@Override public Schema outputSchema(Schema input) { try{ if(input.getField(0).type != DataType.MAP) { throw new RuntimeException("Expected map, received schema " +DataType.findTypeName(input.getField(0).type)); } } catch(FrontendException e) { throw new RuntimeException(e); } return new Schema(new Schema.FieldSchema(null, DataType.MAP)); }
@Override public Schema outputSchema(Schema input) { try { if (input.size() < 3) { return null; } return new Schema(input.getField(2)); } catch (Exception e) { return null; } }
@Override public Schema outputSchema(Schema input) { try { return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), input.getField(0).schema, DataType.BAG)); } catch (FrontendException e) { return null; } } }
@Override public Schema outputSchema(Schema input) { try { return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), input.getField(0).schema, DataType.BAG)); } catch (FrontendException e) { return null; } } }
public Schema outputSchema(Schema input) { if (input == null) { return null; } FieldSchema fs = null; try { fs = input.getField(0); } catch (FrontendException e) { pigLogger.warn(this, "Frontend exception getting field schema", Errors.FRONTEND_EXCEPTION); } return new Schema(fs); } }
@Override public Schema outputSchema(Schema input) { try { Schema tupleSchema = new Schema(); tupleSchema.add(input.getField(0)); tupleSchema.add(input.getField(1)); tupleSchema.add(input.getField(2)); return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), tupleSchema, DataType.TUPLE)); } catch (Exception e) { return null; } } }
@Override public Schema outputSchema(Schema input) { try { Schema.FieldSchema inputFieldSchema = input.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), inputFieldSchema.schema, DataType.BAG)); } catch (FrontendException e) { e.printStackTrace(); throw new RuntimeException(e); } }
MapConverter(GroupType parquetSchema, FieldSchema pigSchema, ParentValueContainer parent, boolean numbersDefaultToZero, boolean columnIndexAccess) throws FrontendException { if (parquetSchema.getFieldCount() != 1) { throw new IllegalArgumentException("maps have only one field. " + parquetSchema); } this.parent = parent; keyValue = new MapKeyValueConverter(parquetSchema.getType(0).asGroupType(), pigSchema.schema.getField(0), numbersDefaultToZero, columnIndexAccess); }
private DataBag jsToPigBag(Scriptable array, Schema schema, int depth) throws FrontendException, ExecException { debugConvertJSToPig(depth, "Bag", array, schema); if (schema.size() == 1 && schema.getField(0).type == DataType.TUPLE) { schema = schema.getField(0).schema; } List<Tuple> bag = new ArrayList<Tuple>(); for (Object id : array.getIds()) { Scriptable arrayValue = (Scriptable)array.get(((Integer)id).intValue(), null); bag.add(jsToPigTuple(arrayValue, schema, depth + 1)); } DataBag result = BagFactory.getInstance().newDefaultBag(bag); debugReturn(depth, result); return result; }
private DataBag deserializeBag(FieldSchema fs, byte[] buf, int startIndex, int endIndex) throws IOException { ArrayList<Tuple> protoBag = new ArrayList<Tuple>(); int depth = 0; int fieldStart = startIndex; for (int index = startIndex; index <= endIndex; index++) { depth = DELIMS.updateDepth(buf, depth, index); if ( StreamingDelimiters.isDelimiter(DELIMS.getFieldDelim(), buf, index, depth, endIndex)) { protoBag.add((Tuple)deserialize(fs.schema.getField(0), buf, fieldStart, index - 1)); fieldStart = index + 3; } } return bagFactory.newDefaultBag(protoBag); }
/** * * @param name * @param fieldSchema * @return an optional group containing one repeated group field * @throws FrontendException */ private GroupType convertBag(String name, FieldSchema fieldSchema) throws FrontendException { FieldSchema innerField = fieldSchema.schema.getField(0); return ConversionPatterns.listType( Repetition.OPTIONAL, name, convertTuple(name(innerField.alias, "bag"), innerField, Repetition.REPEATED)); }
private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException { if (DEBUG) LOG.debug("filtering BAG schema:\n" + bagType + "\nwith:\n " + bagFieldSchema); if (bagType.getFieldCount() != 1) { throw new RuntimeException("not unwrapping the right type, this should be a Bag: " + bagType); } Type nested = bagType.getType(0); FieldSchema innerField = bagFieldSchema.schema.getField(0); if (nested.isPrimitive() || nested.getOriginalType() == OriginalType.MAP || nested.getOriginalType() == OriginalType.LIST) { // Bags always contain tuples => we skip the extra tuple that was inserted in that case. innerField = innerField.schema.getField(0); } return bagType.withNewFields(filter(nested, innerField)); } }
private void createInputHandlers() throws ExecException, FrontendException { PigStreamingUDF serializer = new PigStreamingUDF(); this.inputHandler = new StreamingUDFInputHandler(serializer); PigStreamingUDF deserializer = new PigStreamingUDF(schema.getField(0)); this.outputHandler = new StreamingUDFOutputHandler(deserializer); }
private Type filterMap(GroupType mapType, FieldSchema mapFieldSchema) throws FrontendException { if (DEBUG) LOG.debug("filtering MAP schema:\n" + mapType + "\nwith:\n " + mapFieldSchema); if (mapType.getFieldCount() != 1) { throw new RuntimeException("not unwrapping the right type, this should be a Map: " + mapType); } GroupType nested = mapType.getType(0).asGroupType(); if (nested.getFieldCount() != 2) { throw new RuntimeException("this should be a Map Key/Value: " + mapType); } FieldSchema innerField = mapFieldSchema.schema.getField(0); return mapType.withNewFields(nested.withNewFields(nested.getType(0), filter(nested.getType(1), innerField))); }