public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { return true; } // Column was not found in table schema. Its a new column List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields(); if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { return true; } return false; } /**
protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { // Iterate through all the elements in Pig Schema and do validations as // dictated by semantics, consult HCatSchema of table when need be. int columnPos = 0;//helps with debug messages for (FieldSchema pigField : pigSchema.getFields()) { HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++); } try { PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); } catch (IOException e) { throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); } }
private static void addField(Schema schema, List<String> fields, FieldAlias fa, String currentNode) { for (FieldSchema field : schema.getFields()) { String node; if (field.alias != null) { // if no field node = fa.toES(field.alias); node = (currentNode != null ? currentNode + "." + node : node); } else { node = currentNode; } // && field.type != DataType.TUPLE if (field.schema != null) { addField(field.schema, fields, fa, node); } else { if (!StringUtils.hasText(node)) { LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema); } if (node != null) { fields.add(fa.toES(node)); } } } }
/** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing * schema of the table in metastore. */ protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { if(LOG.isDebugEnabled()) { LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")"); } List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size()); for (FieldSchema fSchema : pigSchema.getFields()) { try { HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); //if writing to a partitioned table, then pigSchema will have more columns than tableSchema //partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData() // HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " + // "in target table schema", LOG); fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema)); } catch (HCatException he) { throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); } } HCatSchema s = new HCatSchema(fieldSchemas); LOG.debug("convertPigSchemaToHCatSchema(computed)=(" + s + ")"); return s; }
for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema), topLevelPigSchema, topLevelHCatSchema, columnPos); for (FieldSchema innerField : pigField.schema.getFields()) { validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>(); HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema(); List<FieldSchema> fields = fSchema.schema.getFields(); for (int i = 0; i < fields.size(); i++) { FieldSchema fieldSchema = fields.get(i);
/** * Validates the schema returned when specific columns of a table are given as part of LOAD . * @throws Exception */ @Test public void testSchemaForTableWithSpecificColumns() throws Exception { //create the table final String TABLE = "TABLE2"; final String ddl = "CREATE TABLE " + TABLE + " (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) "; conn.createStatement().execute(ddl); final String selectColumns = "ID,NAME"; pigServer.registerQuery(String.format( "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE, selectColumns, zkQuorum)); Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(2, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("ID")); assertTrue(fields.get(0).type == DataType.INTEGER); assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME")); assertTrue(fields.get(1).type == DataType.CHARARRAY); }
/** * Validates the schema returned when a SQL SELECT query is given as part of LOAD . * @throws Exception */ @Test public void testSchemaForQuery() throws Exception { //create the table. final String TABLE = "TABLE3"; String ddl = String.format("CREATE TABLE " + TABLE + " (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE" + " CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL))\n", TABLE); conn.createStatement().execute(ddl); //sql query for LOAD final String sqlQuery = "SELECT A_STRING,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE; pigServer.registerQuery(String.format( "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", sqlQuery, zkQuorum)); //assert the schema. Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(3, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string")); assertTrue(fields.get(0).type == DataType.CHARARRAY); assertTrue(fields.get(1).alias.equalsIgnoreCase("a_integer")); assertTrue(fields.get(1).type == DataType.INTEGER); assertTrue(fields.get(2).alias.equalsIgnoreCase("a_double")); assertTrue(fields.get(2).type == DataType.DOUBLE); }
/** * Validates the schema returned for a table with Pig data types. * @throws Exception */ @Test public void testSchemaForTable() throws Exception { final String TABLE = "TABLE1"; final String ddl = String.format("CREATE TABLE %s " + " (a_string varchar not null, a_binary varbinary not null, a_integer integer, cf1.a_float float" + " CONSTRAINT pk PRIMARY KEY (a_string, a_binary))\n", TABLE); conn.createStatement().execute(ddl); conn.commit(); pigServer.registerQuery(String.format( "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE, zkQuorum)); final Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(4, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string")); assertTrue(fields.get(0).type == DataType.CHARARRAY); assertTrue(fields.get(1).alias.equalsIgnoreCase("a_binary")); assertTrue(fields.get(1).type == DataType.BYTEARRAY); assertTrue(fields.get(2).alias.equalsIgnoreCase("a_integer")); assertTrue(fields.get(2).type == DataType.INTEGER); assertTrue(fields.get(3).alias.equalsIgnoreCase("a_float")); assertTrue(fields.get(3).type == DataType.FLOAT); }
List<FieldSchema> fields = schema.getFields(); assertEquals(4, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("a"));
List<FieldSchema> fields = schema.getFields(); assertEquals(2, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("FOO"));
List<FieldSchema> fields = schema.getFields(); assertEquals(4, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));
/** * Construct a ResourceSchema from a {@link Schema} * @param pigSchema Schema to use */ public ResourceSchema(Schema pigSchema) { List<FieldSchema> pigSchemaFields = pigSchema.getFields(); fields = new ResourceFieldSchema[pigSchemaFields.size()]; for (int i=0; i<fields.length; i++) { fields[i] = new ResourceFieldSchema(pigSchemaFields.get(i)); } }
private static void stripAliases(Schema s) { for (Schema.FieldSchema fs : s.getFields()) { fs.alias = null; if (fs.schema != null) { stripAliases(fs.schema); } } }
private void convertToElephantBirdCompatibleSchema(Schema schema) { if (schema == null) { return; } for(FieldSchema fieldSchema:schema.getFields()){ if (fieldSchema.type== DataType.BOOLEAN) { fieldSchema.type=DataType.INTEGER; } convertToElephantBirdCompatibleSchema(fieldSchema.schema); } }
@Override public Schema outputSchema(Schema p_input) { List<FieldSchema> fieldSchema = p_input.getFields(); fieldSchema.add(fieldSchema.remove(0)); return new Schema(fieldSchema); }
/** * Recursively set NULL type to the specifid type in a schema * @param s the schema whose NULL type has to be set * @param t the specified type */ public static void setSchemaDefaultType(Schema s, byte t) { if(null == s) return; for(Schema.FieldSchema fs: s.getFields()) { FieldSchema.setFieldSchemaDefaultType(fs, t); } }
public static int hashCode(Schema s) { if (s == null) { return 0; } int idx = 0 ; int hashCode = 0 ; for(FieldSchema fs : s.getFields()) { hashCode += hashCode(fs) * (primeList[idx % primeList.length]) ; idx++ ; } return hashCode ; }
public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { return true; } // Column was not found in table schema. Its a new column List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields(); if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { return true; } return false; } /**
public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { return true; } // Column was not found in table schema. Its a new column List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields(); if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { return true; } return false; } /**