private static ImportTableDesc getBaseCreateTableDescFromTable(String dbName, org.apache.hadoop.hive.metastore.api.Table tblObj) throws Exception { Table table = new Table(tblObj); return new ImportTableDesc(dbName, table); }
private static Task<? extends Serializable> alterTableTask(ImportTableDesc tableDesc, EximUtil.SemanticAnalyzerWrapperContext x, ReplicationSpec replicationSpec) { tableDesc.setReplaceMode(true); if ((replicationSpec != null) && (replicationSpec.isInReplicationScope())){ tableDesc.setReplicationSpec(replicationSpec); } return tableDesc.getCreateTableTask(x); }
static TableLocationTuple tableLocation(ImportTableDesc tblDesc, Database parentDb, TableContext tableContext, Context context) throws MetaException, SemanticException { Warehouse wh = context.warehouse; Path defaultTablePath; if (parentDb == null) { defaultTablePath = wh.getDefaultTablePath(tblDesc.getDatabaseName(), tblDesc.getTableName(), tblDesc.isExternal()); } else { defaultTablePath = wh.getDefaultTablePath( parentDb, tblDesc.getTableName(), tblDesc.isExternal() ); } // dont use TableType.EXTERNAL_TABLE.equals(tblDesc.tableType()) since this comes in as managed always for tables. if (tblDesc.isExternal()) { if (tblDesc.getLocation() == null) { // this is the use case when the table got converted to external table as part of migration // related rules to be applied to replicated tables across different versions of hive. return new TableLocationTuple(wh.getDnsPath(defaultTablePath).toString(), true); } String currentLocation = new Path(tblDesc.getLocation()).toUri().getPath(); String newLocation = ReplExternalTables.externalTableLocation(context.hiveConf, currentLocation); LOG.debug("external table {} data location is: {}", tblDesc.getTableName(), newLocation); return new TableLocationTuple(newLocation, false); } Path path = tableContext.waitOnPrecursor() ? wh.getDnsPath(defaultTablePath) : wh.getDefaultTablePath(parentDb, tblDesc.getTableName(), tblDesc.isExternal()); return new TableLocationTuple(path.toString(), false); }
/** * Utility method that returns a table if one corresponding to the destination * tblDesc is found. Returns null if no such table is found. */ public static Table tableIfExists(ImportTableDesc tblDesc, Hive db) throws HiveException { try { return db.getTable(tblDesc.getDatabaseName(),tblDesc.getTableName()); } catch (InvalidTableException e) { return null; } }
private static Table createNewTableMetadataObject(ImportTableDesc tblDesc, boolean isRepl) throws SemanticException { Table newTable = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName()); //so that we know the type of table we are creating: acid/MM to match what was exported newTable.setParameters(tblDesc.getTblProps()); if(tblDesc.isExternal() && AcidUtils.isTransactionalTable(newTable)) { if (isRepl) { throw new SemanticException("External tables may not be transactional: " + Warehouse.getQualifiedName(tblDesc.getDatabaseName(), tblDesc.getTableName())); } else { throw new AssertionError("Internal error: transactional properties not set properly" + tblDesc.getTblProps()); } } return newTable; }
if ((tableDesc.isExternal()) // IMPORT statement specified EXTERNAL && (!table.isPartitioned() || !table.getTableType().equals(TableType.EXTERNAL_TABLE)) ){ if ((tableDesc.getLocation() != null) && (!table.isPartitioned()) && (!table.getDataLocation().equals(new Path(tableDesc.getLocation()))) ){ throw new SemanticException( ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg(" Location does not match")); List<FieldSchema> importedTableCols = tableDesc.getCols(); if (!EximUtil.schemaCompare(importedTableCols, existingTableCols)) { throw new SemanticException( List<FieldSchema> importedTablePartCols = tableDesc.getPartCols(); if (!EximUtil.schemaCompare(importedTablePartCols, existingTablePartCols)) { throw new SemanticException( Map<String, String> importedTableParams = tableDesc.getTblProps(); String error = checkParams(existingTableParams, importedTableParams, new String[] { "howl.isd", String importedifc = tableDesc.getInputFormat(); String existingofc = table.getOutputFormatClass().getName(); String importedofc = tableDesc.getOutputFormat(); String importedSerde = tableDesc.getSerName(); if (!existingSerde.equals(importedSerde)) { throw new SemanticException(
if (TableType.valueOf(tblObj.getTableType()) == TableType.EXTERNAL_TABLE) { replicationSpec.setMigratingToExternalTable(); tblDesc.setExternal(true); tblDesc.setLocation(null); tblDesc.setReplicationSpec(replicationSpec); if (TxnUtils.isTransactionalTable(tblDesc.getTblProps())) { StatsSetupConst.setBasicStatsState(tblDesc.getTblProps(), StatsSetupConst.FALSE); tblDesc.setReplWriteId(writeId); tblDesc.setExternal(isExternalSet); STATIC_LOG.debug("table {} location is {}", tblDesc.getTableName(), parsedLocation); tblDesc.setLocation(parsedLocation); x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf())); tblDesc.setTableName(parsedTableName); if (TxnUtils.isTransactionalTable(tblDesc.getTblProps())) { StatsSetupConst.setBasicStatsState(partsDesc.getPartition(0).getPartParams(), StatsSetupConst.FALSE); if (tblDesc.getTableName() == null) { x.getConf().set("import.destination.table", tblDesc.getTableName()); for (AddPartitionDesc addPartitionDesc : partitionDescs) { addPartitionDesc.setTableName(tblDesc.getTableName());
private void newTableTasks(ImportTableDesc tblDesc, Task<?> tblRootTask, TableLocationTuple tuple) throws Exception { Table table = tblDesc.toTable(context.hiveConf); ReplicationSpec replicationSpec = event.replicationSpec(); Task<?> createTableTask = tblDesc.getCreateTableTask(new HashSet<>(), new HashSet<>(), context.hiveConf); if (tblRootTask == null) { tblRootTask = createTableTask; if (replicationSpec.isTransactionalTableDump()) { List<String> partNames = isPartitioned(tblDesc) ? event.partitions(tblDesc) : null; ReplTxnWork replTxnWork = new ReplTxnWork(tblDesc.getDatabaseName(), tblDesc.getTableName(), partNames, replicationSpec.getValidWriteIdList(), ReplTxnWork.OperationType.REPL_WRITEID_STATE); Task<?> replTxnTask = TaskFactory.get(replTxnWork, context.hiveConf); AcidUtils.getFullTableName(tblDesc.getDatabaseName(), tblDesc.getTableName()), new long[0], new BitSet(), ReplUtils.REPL_BOOTSTRAP_MIGRATION_BASE_WRITE_ID); ReplTxnWork replTxnWork = new ReplTxnWork(tblDesc.getDatabaseName(), tblDesc.getTableName(), null, validWriteIdList.writeToString(), ReplTxnWork.OperationType.REPL_WRITEID_STATE); Task<?> replTxnTask = TaskFactory.get(replTxnWork, context.hiveConf); if (shouldCreateLoadTableTask) { LOG.debug("adding dependent ReplTxnTask/CopyWork/MoveWork for table"); Task<?> loadTableTask = loadTableTask(table, replicationSpec, new Path(tblDesc.getLocation()), event.metadataPath()); parentTask.addDependentTask(loadTableTask);
= new ImportTableDesc(StringUtils.isBlank(dbName) ? table.getDbName() : dbName, table); if (TableType.EXTERNAL_TABLE.equals(table.getTableType())) { tableDesc.setLocation( table.getDataLocation() == null ? null : table.getDataLocation().toString()); tableDesc.setExternal(true); tableDesc.setReplicationSpec(replicationSpec()); if (table.getTableType() == TableType.EXTERNAL_TABLE) { tableDesc.setExternal(true);
try { AddPartitionDesc partsDesc = new AddPartitionDesc(tblDesc.getDatabaseName(), tblDesc.getTableName(), EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues()), partition.getSd().getLocation(), partition.getParameters()); AddPartitionDesc.OnePartitionDesc partDesc = partsDesc.getPartition(0); partDesc.setBucketCols(partition.getSd().getBucketCols()); partDesc.setSortCols(partition.getSd().getSortCols()); if (tblDesc.isExternal() && !replicationSpec().isMigratingToExternalTable()) { Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues())).toString()); ColumnStatistics colStats = partition.getColStats(); ColumnStatisticsDesc colStatsDesc = new ColumnStatisticsDesc(colStats.getStatsDesc()); colStatsDesc.setTableName(tblDesc.getTableName()); colStatsDesc.setDbName(tblDesc.getDatabaseName()); partDesc.setColStats(new ColumnStatistics(colStatsDesc, colStats.getStatsObj()));
/** * Helper method to set location properly in partSpec */ private static void fixLocationInPartSpec( FileSystem fs, ImportTableDesc tblDesc, Table table, Warehouse wh, ReplicationSpec replicationSpec, AddPartitionDesc.OnePartitionDesc partSpec, EximUtil.SemanticAnalyzerWrapperContext x) throws MetaException, HiveException, IOException { Path tgtPath = null; if (tblDesc.getLocation() == null) { if (table.getDataLocation() != null) { tgtPath = new Path(table.getDataLocation().toString(), Warehouse.makePartPath(partSpec.getPartSpec())); } else { Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName()); tgtPath = new Path( wh.getDefaultTablePath( parentDb, tblDesc.getTableName()), Warehouse.makePartPath(partSpec.getPartSpec())); } } else { tgtPath = new Path(tblDesc.getLocation(), Warehouse.makePartPath(partSpec.getPartSpec())); } FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf()); checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x); partSpec.setLocation(tgtPath.toString()); }
private static AddPartitionDesc getBaseAddPartitionDescFromPartition( Path fromPath, String dbName, ImportTableDesc tblDesc, Partition partition, ReplicationSpec replicationSpec, HiveConf conf) throws MetaException, SemanticException { AddPartitionDesc partsDesc = new AddPartitionDesc(dbName, tblDesc.getTableName(), EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues()), partition.getSd().getLocation(), partition.getParameters()); AddPartitionDesc.OnePartitionDesc partDesc = partsDesc.getPartition(0); partDesc.setInputFormat(partition.getSd().getInputFormat()); partDesc.setOutputFormat(partition.getSd().getOutputFormat()); partDesc.setNumBuckets(partition.getSd().getNumBuckets()); partDesc.setCols(partition.getSd().getCols()); partDesc.setSerializationLib(partition.getSd().getSerdeInfo().getSerializationLib()); partDesc.setSerdeParams(partition.getSd().getSerdeInfo().getParameters()); partDesc.setBucketCols(partition.getSd().getBucketCols()); partDesc.setSortCols(partition.getSd().getSortCols()); if (replicationSpec.isInReplicationScope() && tblDesc.isExternal() && !replicationSpec.isMigratingToExternalTable()) { String newLocation = ReplExternalTables .externalTableLocation(conf, partition.getSd().getLocation()); LOG.debug("partition {} has data location: {}", partition, newLocation); partDesc.setLocation(newLocation); } else { partDesc.setLocation(new Path(fromPath, Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues())).toString()); } return partsDesc; }
boolean copyToMigratedTxnTable = false; if (tblDesc.isExternal() && tblDesc.getLocation() == null) { x.getLOG().debug("Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec())); if (replicationSpec.isInReplicationScope() && AcidUtils.isTransactionalTable(tblDesc.getTblProps()) && !replicationSpec.isMigratingToTxnTable()) { LoadMultiFilesDesc loadFilesWork = new LoadMultiFilesDesc(
Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName()); LoadTable.TableLocationTuple tableLocationTuple = LoadTable.tableLocation(tableDesc, parentDb, tableContext, context); tableDesc.setLocation(tableLocationTuple.location); table = tableDesc.toTable(context.hiveConf); if (isPartitioned(tableDesc)) { updateReplicationState(initialReplicationState());
Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName()); if (parentDb == null) { if (!tableContext.waitOnPrecursor()) { throw new SemanticException( ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tableDesc.getDatabaseName())); tableDesc.setLocation(tableLocationTuple.location);
private static AddPartitionDesc getBaseAddPartitionDescFromPartition( Path fromPath, String dbname, ImportTableDesc tblDesc, Partition partition) throws MetaException, SemanticException { AddPartitionDesc partsDesc = new AddPartitionDesc(dbname, tblDesc.getTableName(), EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues()), partition.getSd().getLocation(), partition.getParameters()); AddPartitionDesc.OnePartitionDesc partDesc = partsDesc.getPartition(0); partDesc.setInputFormat(partition.getSd().getInputFormat()); partDesc.setOutputFormat(partition.getSd().getOutputFormat()); partDesc.setNumBuckets(partition.getSd().getNumBuckets()); partDesc.setCols(partition.getSd().getCols()); partDesc.setSerializationLib(partition.getSd().getSerdeInfo().getSerializationLib()); partDesc.setSerdeParams(partition.getSd().getSerdeInfo().getParameters()); partDesc.setBucketCols(partition.getSd().getBucketCols()); partDesc.setSortCols(partition.getSd().getSortCols()); partDesc.setLocation(new Path(fromPath, Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues())).toString()); return partsDesc; }
throws MetaException, IOException, HiveException { AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0); if (tblDesc.isExternal() && tblDesc.getLocation() == null) { x.getLOG().debug("Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec()));
public static boolean isPartitioned(ImportTableDesc tblDesc) { return !(tblDesc.getPartCols() == null || tblDesc.getPartCols().isEmpty()); }
private static Task<?> createTableTask(ImportTableDesc tableDesc, EximUtil.SemanticAnalyzerWrapperContext x){ return tableDesc.getCreateTableTask(x); }
private TaskTracker forNewTable() throws Exception { Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName()); // If table doesn't exist, allow creating a new one only if the database state is older than the update. // This in-turn applicable for partitions creation as well. if ((parentDb != null) && (!event.replicationSpec().allowReplacementInto(parentDb.getParameters()))) { return tracker; } Iterator<AddPartitionDesc> iterator = event.partitionDescriptions(tableDesc).iterator(); while (iterator.hasNext() && tracker.canAddMoreTasks()) { AddPartitionDesc currentPartitionDesc = iterator.next(); /* the currentPartitionDesc cannot be inlined as we need the hasNext() to be evaluated post the current retrieved lastReplicatedPartition */ addPartition(iterator.hasNext(), currentPartitionDesc, null); } return tracker; }