public static Schema getSchemaFromCols(Properties properties, List<String> columnNames, List<TypeInfo> columnTypes, String columnCommentProperty) { List<String> columnComments; if (columnCommentProperty == null || columnCommentProperty.isEmpty()) { columnComments = new ArrayList<String>(); } else { //Comments are separated by "\0" in columnCommentProperty, see method getSchema //in MetaStoreUtils where this string columns.comments is generated columnComments = Arrays.asList(columnCommentProperty.split("\0")); if (LOG.isDebugEnabled()) { LOG.debug("columnComments is " + columnCommentProperty); } } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("AvroSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes); } final String tableName = properties.getProperty(TABLE_NAME); final String tableComment = properties.getProperty(TABLE_COMMENT); TypeInfoToSchema typeInfoToSchema = new TypeInfoToSchema(); return typeInfoToSchema.convert(columnNames, columnTypes, columnComments, properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_NAMESPACE.getPropName()), properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_NAME.getPropName(), tableName), properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_DOC.getPropName(), tableComment)); }
private boolean hasExternalSchema(Map<String, String> tableParams) { return tableParams.containsKey(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()) || tableParams.containsKey(AvroSerdeUtils.AvroTableProperties.SCHEMA_URL.getPropName()); }
private boolean hasExternalSchema(Properties properties) { return properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()) != null || properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_URL.getPropName()) != null; }
+ AvroTableProperties.SCHEMA_LITERAL.getPropName()); schemaUrl = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + AvroTableProperties.SCHEMA_URL.getPropName()); + AvroTableProperties.SCHEMA_LITERAL.getPropName() + ", " + AvroTableProperties.SCHEMA_URL.getPropName() + " or " + serdeConstants.SERIALIZATION_CLASS + " property must be set."); tbl.getProperty(colMap.familyName + "." + AvroTableProperties.SCHEMA_LITERAL.getPropName()); schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroTableProperties.SCHEMA_URL.getPropName()); + AvroTableProperties.SCHEMA_LITERAL.getPropName() + " property or " + serdeConstants.SERIALIZATION_CLASS + " property must be set."); + AvroTableProperties.SCHEMA_LITERAL.getPropName()); schemaUrl = tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroTableProperties.SCHEMA_URL.getPropName()); + AvroTableProperties.SCHEMA_LITERAL.getPropName() + ", " + AvroTableProperties.SCHEMA_URL.getPropName() + " or " + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
+ AvroTableProperties.SCHEMA_LITERAL.getPropName()); + AvroTableProperties.SCHEMA_URL.getPropName()); } else { serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE); tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS); schemaLiteral = tbl.getProperty(colMap.familyName + "." + AvroTableProperties.SCHEMA_LITERAL.getPropName()); schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroTableProperties.SCHEMA_URL.getPropName()); + AvroTableProperties.SCHEMA_LITERAL.getPropName()); tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroTableProperties.SCHEMA_URL.getPropName()); String avroSchemaRetClass = tbl.getProperty(AvroTableProperties.SCHEMA_RETRIEVER.getPropName()); + "] but neither " + AvroTableProperties.SCHEMA_LITERAL.getPropName() + ", " + AvroTableProperties.SCHEMA_URL.getPropName() + ", serialization.class or " + AvroTableProperties.SCHEMA_RETRIEVER.getPropName() + " property was set");
String propName = property.getPropName(); if (tableProps.containsKey(propName)){ String propVal = tableProps.get(propName); if (jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName())==null || jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()).isEmpty()) { jobProperties.put(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), AvroSerDe.getSchemaFromCols(properties, colNames, colTypes, null).toString());
if(props.containsKey(AvroTableProperties.SCHEMA_LITERAL.getPropName()) || props.containsKey(AvroTableProperties.SCHEMA_URL.getPropName())) { return AvroSerdeUtils.determineSchemaOrThrowException(job, props); String s = job.get(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName()); if(s != null) { LOG.info("Found the avro schema in the job: " + s);
+ properties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName())); properties.setProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString()); } else { configuration.set( AvroSerdeUtils.AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false));
bytesConverter = new TextBytesConverter(); } else if (delegateSerDe.getSerializedClass() == AvroGenericRecordWritable.class) { String schemaFromProperty = tbl.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), ""); Preconditions.checkArgument(!schemaFromProperty.isEmpty(), "Avro Schema is empty Can not go further"); Schema schema = AvroSerdeUtils.getSchemaFor(schemaFromProperty);
String schemaString = properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName()); if(schemaString != null && !schemaString.equals(SCHEMA_NONE)) return AvroSerdeUtils.getSchemaFor(schemaString); schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName()); if (schemaString == null) { final String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS); properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString()); if (conf != null) conf.set(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false)); return schema; } else if(schemaString.equals(SCHEMA_NONE)) {
if(props.containsKey(AvroTableProperties.SCHEMA_LITERAL.getPropName()) || props.containsKey(AvroTableProperties.SCHEMA_URL.getPropName())) { return AvroSerdeUtils.determineSchemaOrThrowException(job, props); String s = job.get(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName()); if(s != null) { LOG.info("Found the avro schema in the job: " + s);
private FileSinkOperator.RecordWriter getHiveWriter() throws IOException { if (this.hiveWriter == null){ Properties properties = new Properties(); for (AvroSerdeUtils.AvroTableProperties tableProperty : AvroSerdeUtils.AvroTableProperties.values()){ String propVal; if((propVal = jobConf.get(tableProperty.getPropName())) != null){ properties.put(tableProperty.getPropName(),propVal); } } Boolean isCompressed = jobConf.getBoolean("mapreduce.output.fileoutputformat.compress", false); Path path = new Path(this.fileName); if(path.getFileSystem(jobConf).isDirectory(path)){ // This path is only potentially encountered during setup // Otherwise, a specific part_xxxx file name is generated and passed in. path = new Path(path,"_dummy"); } this.hiveWriter = getHiveRecordWriter(jobConf,path,null,isCompressed, properties, progressable); } return this.hiveWriter; }
private FileSinkOperator.RecordWriter getHiveWriter() throws IOException { if (this.hiveWriter == null){ Properties properties = new Properties(); for (AvroSerdeUtils.AvroTableProperties tableProperty : AvroSerdeUtils.AvroTableProperties.values()){ String propVal; if((propVal = jobConf.get(tableProperty.getPropName())) != null){ properties.put(tableProperty.getPropName(),propVal); } } Boolean isCompressed = jobConf.getBoolean("mapreduce.output.fileoutputformat.compress", false); Path path = new Path(this.fileName); if(path.getFileSystem(jobConf).isDirectory(path)){ // This path is only potentially encountered during setup // Otherwise, a specific part_xxxx file name is generated and passed in. path = new Path(path,"_dummy"); } this.hiveWriter = getHiveRecordWriter(jobConf,path,null,isCompressed, properties, progressable); } return this.hiveWriter; }
/** * Called on specific alter table events, removes schema url and schema literal from given tblproperties * After the change, HMS solely will be responsible for handling the schema * * @param conf * @param serializationLib * @param parameters */ public static void handleAlterTableForAvro(HiveConf conf, String serializationLib, Map<String, String> parameters) { if (AvroSerDe.class.getName().equals(serializationLib)) { String literalPropName = AvroTableProperties.SCHEMA_LITERAL.getPropName(); String urlPropName = AvroTableProperties.SCHEMA_URL.getPropName(); if (parameters.containsKey(literalPropName) || parameters.containsKey(urlPropName)) { throw new RuntimeException("Not allowed to alter schema of Avro stored table having external schema." + " Consider removing "+AvroTableProperties.SCHEMA_LITERAL.getPropName() + " or " + AvroTableProperties.SCHEMA_URL.getPropName() + " from table properties."); } } } }
@Override public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties) throws SerDeException { super.init(hbaseParams, conf, properties); String avroSchemaRetClass = properties.getProperty(AvroTableProperties.SCHEMA_RETRIEVER.getPropName()); if (avroSchemaRetClass != null) { Class<?> avroSchemaRetrieverClass = null; try { avroSchemaRetrieverClass = conf.getClassByName(avroSchemaRetClass); } catch (ClassNotFoundException e) { throw new SerDeException(e); } initAvroSchemaRetriever(avroSchemaRetrieverClass, conf, properties); } }
private Properties createPropertiesForHiveAvroSchemaInline() { Properties tbl = new Properties(); tbl.setProperty("cola.avro.serialization.type", "avro"); tbl.setProperty("cola.avro." + AvroTableProperties.SCHEMA_LITERAL.getPropName(), RECORD_SCHEMA); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); return tbl; }
private Properties createPropertiesForHiveAvroBackwardEvolvedSchema() { Properties tbl = new Properties(); tbl.setProperty("cola.avro.serialization.type", "avro"); tbl.setProperty("cola.avro." + AvroTableProperties.SCHEMA_LITERAL.getPropName(), RECORD_SCHEMA); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); return tbl; }
private Properties createPropertiesForHiveAvroExternalSchema() { Properties tbl = new Properties(); tbl.setProperty("cola.avro.serialization.type", "avro"); tbl.setProperty(AvroTableProperties.SCHEMA_RETRIEVER.getPropName(), "org.apache.hadoop.hive.hbase.HBaseTestAvroSchemaRetriever"); tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS, "org.apache.hadoop.hive.hbase.avro.Employee"); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); return tbl; }
private Properties createPropertiesForHiveAvroSchemaUrl(String schemaUrl) { Properties tbl = new Properties(); tbl.setProperty("cola.avro.serialization.type", "avro"); tbl.setProperty("cola.avro." + AvroTableProperties.SCHEMA_URL.getPropName(), schemaUrl); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); return tbl; }
private Properties createPropertiesForHiveAvroForwardEvolvedSchema() { Properties tbl = new Properties(); tbl.setProperty("cola.avro.serialization.type", "avro"); tbl.setProperty("cola.avro." + AvroTableProperties.SCHEMA_LITERAL.getPropName(), RECORD_SCHEMA_EVOLVED); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); return tbl; }