public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { return true; } // Column was not found in table schema. Its a new column List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields(); if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { return true; } return false; } /**
/** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing * schema of the table in metastore. */ protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { if(LOG.isDebugEnabled()) { LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")"); } List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size()); for (FieldSchema fSchema : pigSchema.getFields()) { try { HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); //if writing to a partitioned table, then pigSchema will have more columns than tableSchema //partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData() // HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " + // "in target table schema", LOG); fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema)); } catch (HCatException he) { throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); } } HCatSchema s = new HCatSchema(fieldSchemas); LOG.debug("convertPigSchemaToHCatSchema(computed)=(" + s + ")"); return s; }
protected FieldSchema getField(Schema schema, int i) { try { if (schema == null || i >= schema.size()) { return null; } FieldSchema field = schema.getField(i); return field; } catch (FrontendException e) { throw new RuntimeException(e); } }
@Override public Schema outputSchema(Schema input) { try { if (input.size() != 1) { return null; } Schema.FieldSchema fs = input.getField(0); if (fs.type != DataType.BAG) { return null; } return fs.schema; } catch (Exception e) { this.log.error("Caught exception in " + this.getClass().getSimpleName() + ".outputSchema", e); return null; } }
public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { return true; } // Column was not found in table schema. Its a new column List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields(); if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { return true; } return false; } /**
/** * This method allows the user to set the name of the alias of the FieldSchema of the encapsulated * Schema. This method only works if the Schema has one FieldSchema. * * @param arg a RubyString to set the name to * @return the new name */ @JRubyMethod(name = "name=") public RubyString setName(IRubyObject arg) { if (arg instanceof RubyString) { if (internalSchema.size() != 1) throw new RuntimeException("Can only set name if there is one schema present"); try { internalSchema.getField(0).alias = arg.toString(); return (RubyString)arg; } catch (FrontendException e) { throw new RuntimeException("Unable to get field from Schema", e); } } else { throw new RuntimeException("Improper argument passed to 'name=':" + arg); } } }
@Override public Schema outputSchema(Schema input) { try { if (input.size() < 3) { return null; } return new Schema(input.getField(2)); } catch (Exception e) { return null; } }
public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { return true; } // Column was not found in table schema. Its a new column List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields(); if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { return true; } return false; } /**
/** * This method inspects a Schema to see whether or * not a SchemaTuple implementation can be generated * for the types present. Currently, bags and maps * are not supported. * @param s as Schema * @return boolean type value, true if it is generatable */ public static boolean isGeneratable(Schema s) { if (s == null || s.size() == 0) { return false; } for (Schema.FieldSchema fs : s.getFields()) { if (fs.type == DataType.TUPLE && !isGeneratable(fs.schema)) { return false; } } return true; }
/** * @param context the context the method is being executed in * @return the size of the encapsulated Schema */ @JRubyMethod(name = {"size", "length"}) public RubyFixnum size(ThreadContext context) { return new RubyFixnum(context.getRuntime(), internalSchema.size()); }
/** * This method allows the user to see the name of the alias of the FieldSchema of the encapsulated * Schema. This method only works if the Schema has one FieldSchema. * * @param context the context the method is being executed in * @return the name of the Schema */ @JRubyMethod(name = "name") public RubyString getName(ThreadContext context) { try { if (internalSchema.size() != 1) throw new RuntimeException("Can only get name if there is one schema present"); return RubyString.newString(context.getRuntime(), internalSchema.getField(0).alias); } catch (FrontendException e) { throw new RuntimeException("Unable to get field from Schema", e); } }
@Override public Schema outputSchema(Schema input) { try { if (input.size() != 1) { throw new RuntimeException("Expected input to have only a single field"); } Schema.FieldSchema inputFieldSchema = input.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } return new Schema(new Schema.FieldSchema(null, DataType.LONG)); } catch (FrontendException e) { throw new RuntimeException(e); } } }
@Override public Schema outputSchema(Schema input) { try { Schema tupleSchema = new Schema(); for (int i = 0; i < input.size(); ++i) { tupleSchema.add(input.getField(i)); } return new Schema(new Schema.FieldSchema(getSchemaName(this .getClass().getName().toLowerCase(), input), tupleSchema, DataType.TUPLE)); } catch (Exception e) { return null; } }
/** * This method allows access into the Schema nested in the encapsulated Schema. For example, * if the encapsulated Schema is a bag Schema, this allows the user to access the schema of * the interior Tuple. * * @param context the context the method is being executed in * @return a RubySchema encapsulating the nested Schema */ @JRubyMethod(name = {"get", "inner", "in"}) public RubySchema get(ThreadContext context) { if (internalSchema.size() != 1) throw new RuntimeException("Can only return nested schema if there is one schema to get"); Ruby runtime = context.getRuntime(); try { return new RubySchema(runtime, runtime.getClass("Schema"), internalSchema.getField(0).schema, false); } catch (FrontendException e) { throw new RuntimeException("Schema does not have a nested FieldScema", e); } }
public static Schema outputSchemaForThrift(TypeRef<? extends TBase<?,?>> typeRef) { Schema outSchema; try { outSchema = ThriftToPig.toSchema(typeRef.getRawClass()); // wrap the schema if size > 1 if(outSchema.size() > 1) { outSchema = new Schema(new Schema.FieldSchema(typeRef.getRawClass().getSimpleName(), outSchema, DataType.TUPLE)); } } catch (FrontendException e) { throw new RuntimeException(e); } return outSchema; } }
private Tuple deserializeTuple(FieldSchema fs, byte[] buf, int startIndex, int endIndex) throws IOException { Schema tupleSchema = fs.schema; ArrayList<Object> protoTuple = new ArrayList<Object>(tupleSchema.size()); int depth = 0; int fieldNum = 0; int fieldStart = startIndex; for (int index = startIndex; index <= endIndex; index++) { depth = DELIMS.updateDepth(buf, depth, index); if (StreamingDelimiters.isDelimiter(DELIMS.getFieldDelim(), buf, index, depth, endIndex)) { protoTuple.add(deserialize(tupleSchema.getField(fieldNum), buf, fieldStart, index - 1)); fieldStart = index + 3; fieldNum++; } } return tupleFactory.newTupleNoCopy(protoTuple); }
public static Schema outputSchemaForProtobuf(ProtobufToPig protoToPig, TypeRef<? extends Message> typeRef) { Schema outSchema; try { outSchema = protoToPig.toSchema(Protobufs.getMessageDescriptor(typeRef.getRawClass())); // wrap the schema if size > 1 if(outSchema.size() > 1) { outSchema = new Schema(new Schema.FieldSchema(typeRef.getRawClass().getSimpleName(), outSchema, DataType.TUPLE)); } } catch (FrontendException e) { throw new RuntimeException(e); } return outSchema; }
private static void convertEmptyTupleToBytearrayTuple( FieldSchema fs) { if(fs.type == DataType.TUPLE && fs.schema != null && fs.schema.size() == 0){ fs.schema.add(new FieldSchema(null, DataType.BYTEARRAY)); return; } if(fs.schema != null){ for(FieldSchema inFs : fs.schema.getFields()){ convertEmptyTupleToBytearrayTuple(inFs); } } }
private DataBag jsToPigBag(Scriptable array, Schema schema, int depth) throws FrontendException, ExecException { debugConvertJSToPig(depth, "Bag", array, schema); if (schema.size() == 1 && schema.getField(0).type == DataType.TUPLE) { schema = schema.getField(0).schema; } List<Tuple> bag = new ArrayList<Tuple>(); for (Object id : array.getIds()) { Scriptable arrayValue = (Scriptable)array.get(((Integer)id).intValue(), null); bag.add(jsToPigTuple(arrayValue, schema, depth + 1)); } DataBag result = BagFactory.getInstance().newDefaultBag(bag); debugReturn(depth, result); return result; }
private void init(Schema inputSchema, GenericUDF evalUDF, ConstantObjectInspectInfo constantsInfo) throws IOException { ResourceSchema rs = new ResourceSchema(inputSchema); ResourceFieldSchema wrappedTupleFieldSchema = new ResourceFieldSchema(); wrappedTupleFieldSchema.setType(DataType.TUPLE); wrappedTupleFieldSchema.setSchema(rs); TypeInfo ti = HiveUtils.getTypeInfo(wrappedTupleFieldSchema); inputObjectInspector = (StructObjectInspector)HiveUtils.createObjectInspector(ti); try { ObjectInspector[] arguments = new ObjectInspector[inputSchema.size()]; for (int i=0;i<inputSchema.size();i++) { if (constantsInfo!=null && !constantsInfo.isEmpty() && constantsInfo.get(i)!=null) { arguments[i] = constantsInfo.get(i); } else { arguments[i] = inputObjectInspector.getAllStructFieldRefs().get(i).getFieldObjectInspector(); } } outputObjectInspector = evalUDF.initialize(arguments); } catch (Exception e) { throw new IOException(e); } } }