org.apache.pig.impl.logicalLayer.schema.Schema.size java code examples

public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException {
 if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) {
  return true;
 }
 // Column was not found in table schema. Its a new column
 List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields();
 if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) {
  return true;
 }
 return false;
}
/**

 /** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing
  * schema of the table in metastore.
  */
 protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException {
  if(LOG.isDebugEnabled()) {
   LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")");
  }
  List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size());
  for (FieldSchema fSchema : pigSchema.getFields()) {
   try {
    HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema);
    //if writing to a partitioned table, then pigSchema will have more columns than tableSchema
    //partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData()
//        HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " +
//                "in target table schema", LOG);
    fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema));
   } catch (HCatException he) {
    throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
   }
  }
  
  HCatSchema s = new HCatSchema(fieldSchemas);
  LOG.debug("convertPigSchemaToHCatSchema(computed)=(" + s + ")");
  return s;
 }

protected FieldSchema getField(Schema schema, int i) {
 try {
  if (schema == null || i >= schema.size()) {
   return null;
  }
  FieldSchema field = schema.getField(i);
  return field;
 } catch (FrontendException e) {
  throw new RuntimeException(e);
 }
}

@Override
public Schema outputSchema(Schema input)
{
  try {
    if (input.size() != 1) {
      return null;
    }
    Schema.FieldSchema fs = input.getField(0);
    if (fs.type != DataType.BAG) {
      return null;
    }
    return fs.schema;
  }
  catch (Exception e) {
    this.log.error("Caught exception in " + this.getClass().getSimpleName() + ".outputSchema", e);
    return null;
  }
}

public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException {
 if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) {
  return true;
 }
 // Column was not found in table schema. Its a new column
 List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields();
 if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) {
  return true;
 }
 return false;
}
/**

  /**
   * This method allows the user to set the name of the alias of the FieldSchema of the encapsulated
   * Schema. This method only works if the Schema has one FieldSchema.
   *
   * @param arg a RubyString to set the name to
   * @return    the new name
   */
  @JRubyMethod(name = "name=")
  public RubyString setName(IRubyObject arg) {
    if (arg instanceof RubyString) {
       if (internalSchema.size() != 1)
         throw new RuntimeException("Can only set name if there is one schema present");
       try {
         internalSchema.getField(0).alias = arg.toString();
         return (RubyString)arg;
       } catch (FrontendException e) {
         throw new RuntimeException("Unable to get field from Schema", e);
       }
    } else {
       throw new RuntimeException("Improper argument passed to 'name=':" + arg);
    }
  }
}

@Override
public Schema outputSchema(Schema input) {
  try {
    if (input.size() < 3) {
      return null;
    }
    return new Schema(input.getField(2));
  } catch (Exception e) {
    return null;
  }
}

public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException {
 if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) {
  return true;
 }
 // Column was not found in table schema. Its a new column
 List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields();
 if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) {
  return true;
 }
 return false;
}
/**

/**
 * This method inspects a Schema to see whether or
 * not a SchemaTuple implementation can be generated
 * for the types present. Currently, bags and maps
 * are not supported.
 * @param   s as Schema
 * @return  boolean type value, true if it is generatable
 */
public static boolean isGeneratable(Schema s) {
  if (s == null || s.size() == 0) {
    return false;
  }
  for (Schema.FieldSchema fs : s.getFields()) {
    if (fs.type == DataType.TUPLE && !isGeneratable(fs.schema)) {
      return false;
    }
  }
  return true;
}

/**
 * @param context the context the method is being executed in
 * @return        the size of the encapsulated Schema
 */
@JRubyMethod(name = {"size", "length"})
public RubyFixnum size(ThreadContext context) {
  return new RubyFixnum(context.getRuntime(), internalSchema.size());
}

/**
 * This method allows the user to see the name of the alias of the FieldSchema of the encapsulated
 * Schema. This method only works if the Schema has one FieldSchema.
 *
 * @param context the context the method is being executed in
 * @return        the name of the Schema
 */
@JRubyMethod(name = "name")
public RubyString getName(ThreadContext context) {
  try {
    if (internalSchema.size() != 1)
       throw new RuntimeException("Can only get name if there is one schema present");
    return RubyString.newString(context.getRuntime(), internalSchema.getField(0).alias);
  } catch (FrontendException e) {
    throw new RuntimeException("Unable to get field from Schema", e);
  }
}

 @Override
 public Schema outputSchema(Schema input)
 {
  try {
   if (input.size() != 1)
   {
    throw new RuntimeException("Expected input to have only a single field");
   }
   
   Schema.FieldSchema inputFieldSchema = input.getField(0);

   if (inputFieldSchema.type != DataType.BAG)
   {
    throw new RuntimeException("Expected a BAG as input");
   }
   
   return new Schema(new Schema.FieldSchema(null, DataType.LONG));
  }
  catch (FrontendException e) {
   throw new RuntimeException(e);
  }
 }
}

@Override
public Schema outputSchema(Schema input) {
  try {
    Schema tupleSchema = new Schema();
    for (int i = 0; i < input.size(); ++i) {
      tupleSchema.add(input.getField(i));
    }
    return new Schema(new Schema.FieldSchema(getSchemaName(this
        .getClass().getName().toLowerCase(), input), tupleSchema,
        DataType.TUPLE));
  } catch (Exception e) {
    return null;
  }
}

/**
 * This method allows access into the Schema nested in the encapsulated Schema. For example,
 * if the encapsulated Schema is a bag Schema, this allows the user to access the schema of
 * the interior Tuple.
 *
 * @param context the context the method is being executed in
 * @return        a RubySchema encapsulating the nested Schema
 */
@JRubyMethod(name = {"get", "inner", "in"})
public RubySchema get(ThreadContext context) {
  if (internalSchema.size() != 1)
    throw new RuntimeException("Can only return nested schema if there is one schema to get");
  Ruby runtime = context.getRuntime();
  try {
    return new RubySchema(runtime, runtime.getClass("Schema"), internalSchema.getField(0).schema, false);
  } catch (FrontendException e) {
    throw new RuntimeException("Schema does not have a nested FieldScema", e);
  }
}

 public static Schema outputSchemaForThrift(TypeRef<? extends TBase<?,?>> typeRef) {
  Schema outSchema;
  try {
   outSchema = ThriftToPig.toSchema(typeRef.getRawClass());
   // wrap the schema if size > 1
   if(outSchema.size() > 1) {
    outSchema = new Schema(new Schema.FieldSchema(typeRef.getRawClass().getSimpleName(), outSchema, DataType.TUPLE));
   }
  } catch (FrontendException e) {
   throw new RuntimeException(e);
  }
  return outSchema;
 }
}

private Tuple deserializeTuple(FieldSchema fs, byte[] buf, int startIndex, int endIndex) throws IOException {
  Schema tupleSchema = fs.schema;
  
  ArrayList<Object> protoTuple = new ArrayList<Object>(tupleSchema.size());
  int depth = 0;
  int fieldNum = 0;
  int fieldStart = startIndex;
  
  for (int index = startIndex; index <= endIndex; index++) {
    depth = DELIMS.updateDepth(buf, depth, index);
    if (StreamingDelimiters.isDelimiter(DELIMS.getFieldDelim(), buf, index, depth, endIndex)) {
      protoTuple.add(deserialize(tupleSchema.getField(fieldNum), buf, fieldStart, index - 1));
      fieldStart = index + 3;
      fieldNum++;
    }
  }
  return tupleFactory.newTupleNoCopy(protoTuple);
}

public static Schema outputSchemaForProtobuf(ProtobufToPig protoToPig, TypeRef<? extends Message> typeRef) {
 Schema outSchema;
 try {
  outSchema = protoToPig.toSchema(Protobufs.getMessageDescriptor(typeRef.getRawClass()));
  // wrap the schema if size > 1
  if(outSchema.size() > 1) {
   outSchema = new Schema(new Schema.FieldSchema(typeRef.getRawClass().getSimpleName(), outSchema, DataType.TUPLE));
  }
 } catch (FrontendException e) {
  throw new RuntimeException(e);
 }
 return outSchema;
}

private static void convertEmptyTupleToBytearrayTuple(
    FieldSchema fs) {
  if(fs.type == DataType.TUPLE
      && fs.schema != null
      && fs.schema.size() == 0){
    fs.schema.add(new FieldSchema(null, DataType.BYTEARRAY));
    return;
  }
  if(fs.schema != null){
    for(FieldSchema inFs : fs.schema.getFields()){
      convertEmptyTupleToBytearrayTuple(inFs);
    }
  }
}

private DataBag jsToPigBag(Scriptable array, Schema schema, int depth) throws FrontendException, ExecException {
  debugConvertJSToPig(depth, "Bag", array, schema);
  if (schema.size() == 1 && schema.getField(0).type == DataType.TUPLE) {
    schema = schema.getField(0).schema;
  }
  List<Tuple> bag = new ArrayList<Tuple>();
  for (Object id : array.getIds()) {
    Scriptable arrayValue = (Scriptable)array.get(((Integer)id).intValue(), null);
    bag.add(jsToPigTuple(arrayValue, schema, depth + 1));
  }
  DataBag result = BagFactory.getInstance().newDefaultBag(bag);
  debugReturn(depth, result);
  return result;
}

  private void init(Schema inputSchema, GenericUDF evalUDF, ConstantObjectInspectInfo constantsInfo) throws IOException {
    ResourceSchema rs = new ResourceSchema(inputSchema);
    ResourceFieldSchema wrappedTupleFieldSchema = new ResourceFieldSchema();
    wrappedTupleFieldSchema.setType(DataType.TUPLE);
    wrappedTupleFieldSchema.setSchema(rs);
    TypeInfo ti = HiveUtils.getTypeInfo(wrappedTupleFieldSchema);
    inputObjectInspector = (StructObjectInspector)HiveUtils.createObjectInspector(ti);
    try {
      ObjectInspector[] arguments = new ObjectInspector[inputSchema.size()];
      for (int i=0;i<inputSchema.size();i++) {
        if (constantsInfo!=null && !constantsInfo.isEmpty() && constantsInfo.get(i)!=null) {
          arguments[i] = constantsInfo.get(i);
        } else {
          arguments[i] = inputObjectInspector.getAllStructFieldRefs().get(i).getFieldObjectInspector();
        }
      }
      outputObjectInspector = evalUDF.initialize(arguments);
    } catch (Exception e) {
      throw new IOException(e);
    }
  }
}

Javadoc

Find the number of fields in the schema.

Popular methods of Schema

<init>
Copy Constructor.
getFields
add
getField
Given an alias name, find the associated FieldSchema.
toString
generateNestedSchema
equals
Recursively compare two schemas for equality
getPigSchema
prettyPrint
stringifySchema
clone
Make a deep copy of a schema.
getAliases

Popular in Java

Parsing JSON documents to java classes using gson
setContentView (Activity)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getApplicationContext (Context)
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
Socket (java.net)
Provides a client-side TCP socket.
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
Top PhpStorm plugins

How to use sizemethodin org.apache.pig.impl.logicalLayer.schema.Schema

Best Java code snippets using org.apache.pig.impl.logicalLayer.schema.Schema.size (Showing top 20 results out of 315)

How to use
size
method
in
org.apache.pig.impl.logicalLayer.schema.Schema