/** * Setter for Column schemas. */ public HCatTable cols(List<HCatFieldSchema> cols) { if (!this.cols.equals(cols)) { this.cols.clear(); this.cols.addAll(cols); this.sd.setCols(HCatSchemaUtils.getFieldSchemas(cols)); } return this; }
public void setFields(List<FieldSchema> fields) { tTable.getSd().setCols(fields); }
public void setFields(List<FieldSchema> fields) { tTable.getSd().setCols(fields); }
private static StorageDescriptor makeStorageDescriptor(String tableName, List<Column> columns, Storage storage) { if (storage.isSkewed()) { throw new IllegalArgumentException("Writing to skewed table/partition is not supported"); } SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(tableName); serdeInfo.setSerializationLib(storage.getStorageFormat().getSerDeNullable()); serdeInfo.setParameters(storage.getSerdeParameters()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(emptyToNull(storage.getLocation())); sd.setCols(columns.stream() .map(ThriftMetastoreUtil::toMetastoreApiFieldSchema) .collect(toList())); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(storage.getStorageFormat().getInputFormatNullable()); sd.setOutputFormat(storage.getStorageFormat().getOutputFormatNullable()); sd.setParameters(ImmutableMap.of()); Optional<HiveBucketProperty> bucketProperty = storage.getBucketProperty(); if (bucketProperty.isPresent()) { sd.setNumBuckets(bucketProperty.get().getBucketCount()); sd.setBucketCols(bucketProperty.get().getBucketedBy()); if (!bucketProperty.get().getSortedBy().isEmpty()) { sd.setSortCols(bucketProperty.get().getSortedBy().stream() .map(column -> new Order(column.getColumnName(), column.getOrder().getHiveOrder())) .collect(toList())); } } return sd; }
/** * Update table schema, adding new columns as added for the partition. * @param client the client * @param table the table * @param partitionSchema the schema of the partition * @throws java.io.IOException Signals that an I/O exception has occurred. * @throws org.apache.hadoop.hive.metastore.api.InvalidOperationException the invalid operation exception * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception * @throws org.apache.thrift.TException the t exception */ private void updateTableSchema(IMetaStoreClient client, Table table, HCatSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException { List<FieldSchema> newColumns = HCatUtil.validatePartitionSchema(table, partitionSchema); if (newColumns.size() != 0) { List<FieldSchema> tableColumns = new ArrayList<FieldSchema>(table.getTTable().getSd().getCols()); tableColumns.addAll(newColumns); //Update table schema to add the newly added columns table.getTTable().getSd().setCols(tableColumns); client.alter_table(table.getDbName(), table.getTableName(), table.getTTable()); } }
private int updateColumns(Table tbl, Partition part) throws HiveException { String serializationLib = tbl.getSd().getSerdeInfo().getSerializationLib(); if (MetastoreConf.getStringCollection(conf, MetastoreConf.ConfVars.SERDES_USING_METASTORE_FOR_SCHEMA).contains(serializationLib)) { throw new HiveException(tbl.getTableName() + " has serde " + serializationLib + " for which schema " + "is already handled by HMS."); } Deserializer deserializer = tbl.getDeserializer(true); try { LOG.info("Updating metastore columns for table: {}", tbl.getTableName()); final List<FieldSchema> fields = HiveMetaStoreUtils.getFieldsFromDeserializer( tbl.getTableName(), deserializer); StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); sd.setCols(fields); } catch (org.apache.hadoop.hive.serde2.SerDeException | MetaException e) { LOG.error("alter table update columns: {}", e); throw new HiveException(e, ErrorMsg.GENERIC_ERROR); } return 0; }
/** * Sets columns from table cache to table and partition. * * @param table the source of column lists cache * @param partition partition which will set column list */ public static void restoreColumns(HiveTableWithColumnCache table, HivePartition partition) { // exactly the same column lists for partitions or table // stored only one time to reduce physical plan serialization if (partition != null && partition.getSd().getCols() == null) { partition.getSd().setCols(table.getColumnListsCache().getColumns(partition.getColumnListIndex())); } if (table.getSd().getCols() == null) { table.getSd().setCols(table.getColumnListsCache().getColumns(0)); } }
private Partition getTestPartition(Table table) throws HiveException { Partition partition = new Partition(table, ImmutableMap.of("partition_key", "1"), null); StorageDescriptor sd = new StorageDescriptor(); sd.setSerdeInfo(new SerDeInfo("avro", AvroSerDe.class.getName(), null)); sd.setCols(Lists.newArrayList(new FieldSchema("foo", "int", null))); partition.getTPartition().setSd(sd); return partition; } }
@Test(expected = MetaException.class) public void testCreateTableInvalidStorageDescriptorNullColumns() throws Exception { Table table = getNewTable(); table.getSd().setCols(null); client.createTable(table); }
@Test public void testAddPartitionsEmptyColsInSd() throws Exception { createTable(); Partition partition = buildPartition(DB_NAME, TABLE_NAME, DEFAULT_YEAR_VALUE); partition.getSd().setCols(new ArrayList<>()); client.add_partitions(Lists.newArrayList(partition)); // TODO: Not sure that this is the correct behavior. It doesn't make sense to create the // partition without column info. This should be investigated later. Partition part = client.getPartition(DB_NAME, TABLE_NAME, Lists.newArrayList(DEFAULT_YEAR_VALUE)); Assert.assertNotNull(part); Assert.assertTrue(part.getSd().getCols().isEmpty()); }
@Test public void testAddPartitionEmptyColsInSd() throws Exception { createTable(); Partition partition = buildPartition(DB_NAME, TABLE_NAME, DEFAULT_YEAR_VALUE); partition.getSd().setCols(new ArrayList<>()); client.add_partition(partition); // TODO: Not sure that this is the correct behavior. It doesn't make sense to create the // partition without column info. This should be investigated later. Partition part = client.getPartition(DB_NAME, TABLE_NAME, Lists.newArrayList(DEFAULT_YEAR_VALUE)); Assert.assertNotNull(part); Assert.assertTrue(part.getSd().getCols().isEmpty()); }
private void addSd(ArrayList<FieldSchema> cols, Table tbl) { StorageDescriptor sd = new StorageDescriptor(); sd.setCols(cols); sd.setCompressed(false); sd.setNumBuckets(1); sd.setParameters(new HashMap<String, String>()); sd.setBucketCols(new ArrayList<String>()); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setName(tbl.getTableName()); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().getParameters() .put(serdeConstants.SERIALIZATION_FORMAT, "1"); sd.setSortCols(new ArrayList<Order>()); sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName()); sd.setInputFormat(HiveInputFormat.class.getName()); sd.setOutputFormat(HiveOutputFormat.class.getName()); tbl.setSd(sd); }
@Test public void testAddPartitionsNullColsInSd() throws Exception { createTable(); Partition partition = buildPartition(DB_NAME, TABLE_NAME, DEFAULT_YEAR_VALUE); partition.getSd().setCols(null); client.add_partitions(Lists.newArrayList(partition)); // TODO: Not sure that this is the correct behavior. It doesn't make sense to create the // partition without column info. This should be investigated later. Partition part = client.getPartition(DB_NAME, TABLE_NAME, Lists.newArrayList(DEFAULT_YEAR_VALUE)); Assert.assertNotNull(part); Assert.assertNull(part.getSd().getCols()); }
private StorageDescriptor buildSD(String location) { StorageDescriptor sd = new StorageDescriptor(); sd.setInputFormat("TestInputFormat"); sd.setOutputFormat("TestOutputFormat"); sd.setCols(getYearPartCol()); sd.setCompressed(false); Map<String, String> parameters = new HashMap<>(); parameters.put("testSDParamKey", "testSDParamValue"); sd.setParameters(parameters); sd.setLocation(location); SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName("sharedSDPartSerde"); sd.setSerdeInfo(serdeInfo); return sd; }
@Test(expected = MetaException.class) public void testAlterTableInvalidStorageDescriptorNullCols() throws Exception { Table originalTable = testTables[0]; Table newTable = originalTable.deepCopy(); newTable.getSd().setCols(null); client.alter_table(originalTable.getDbName(), originalTable.getTableName(), newTable); }
private static void createTable(String tableName, String tablePerm) throws Exception { Table tbl = new Table(); tbl.setDbName(DATABASE); tbl.setTableName(tableName); StorageDescriptor sd = new StorageDescriptor(); sd.setCols(ColumnHolder.colMapping.get(tableName)); tbl.setSd(sd); sd.setParameters(new HashMap<String, String>()); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setName(tbl.getTableName()); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); sd.getSerdeInfo().setSerializationLib( org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); tbl.setPartitionKeys(ColumnHolder.partitionCols); hmsc.createTable(tbl); Path path = new Path(warehousedir, tableName); FileSystem fs = path.getFileSystem(hiveConf); fs.setPermission(path, new FsPermission(tablePerm)); }
@Test public void testAddPartitionNullColsInSd() throws Exception { createTable(); Partition partition = buildPartition(DB_NAME, TABLE_NAME, DEFAULT_YEAR_VALUE); partition.getSd().setCols(null); client.add_partition(partition); // TODO: Not sure that this is the correct behavior. It doesn't make sense to create the // partition without column info. This should be investigated later. Partition part = client.getPartition(DB_NAME, TABLE_NAME, Lists.newArrayList(DEFAULT_YEAR_VALUE)); Assert.assertNotNull(part); Assert.assertNull(part.getSd().getCols()); }
public Partition addTestPartition(Table tbl, List<String> values, int createTime) throws Exception { StorageDescriptor partitionSd = new StorageDescriptor(); if (StringUtils.isNotBlank(tbl.getSd().getLocation())) { partitionSd.setLocation(tbl.getSd().getLocation() + values); } else { partitionSd.setLocation("/tmp/" + tbl.getTableName() + "/part1"); } partitionSd.setSerdeInfo( new SerDeInfo("name", "serializationLib", ImmutableMap.of(HiveAvroSerDeManager.SCHEMA_URL, "/tmp/dummy"))); partitionSd.setCols(tbl.getPartitionKeys()); Partition partition = new Partition(values, tbl.getDbName(), tbl.getTableName(), 1, 1, partitionSd, new HashMap<String, String>()); partition.setCreateTime(createTime); return this.getLocalMetastoreClient().add_partition(partition); }
@Test public void testCreateTableDefaultValuesView() throws Exception { Table table = new Table(); StorageDescriptor sd = new StorageDescriptor(); List<FieldSchema> cols = new ArrayList<>(); table.setDbName(DEFAULT_DATABASE); table.setTableName("test_table_2"); table.setTableType("VIRTUAL_VIEW"); cols.add(new FieldSchema("column_name", "int", null)); sd.setCols(cols); sd.setSerdeInfo(new SerDeInfo()); table.setSd(sd); client.createTable(table); Table createdTable = client.getTable(table.getDbName(), table.getTableName()); // No location should be created for views Assert.assertNull("Storage descriptor location should be null", createdTable.getSd().getLocation()); }
@Test public void testCreateTableDefaultLocationInSpecificDatabase() throws Exception { Table table = new Table(); StorageDescriptor sd = new StorageDescriptor(); List<FieldSchema> cols = new ArrayList<>(); table.setDbName(OTHER_DATABASE); table.setTableName("test_table_2"); cols.add(new FieldSchema("column_name", "int", null)); sd.setCols(cols); sd.setSerdeInfo(new SerDeInfo()); table.setSd(sd); client.createTable(table); Table createdTable = client.getTable(table.getDbName(), table.getTableName()); Assert.assertEquals("Storage descriptor location", metaStore.getWarehouseRoot() + "/" + table.getDbName() + ".db/" + table.getTableName(), createdTable.getSd().getLocation()); }