public void logMessage(LoadTableDesc tbd) { StringBuilder mesg = new StringBuilder("Loading data to table ") .append( tbd.getTable().getTableName()); if (tbd.getPartitionSpec().size() > 0) { mesg.append(" partition ("); Map<String, String> partSpec = tbd.getPartitionSpec(); for (String key: partSpec.keySet()) { mesg.append(key).append('=').append(partSpec.get(key)).append(", "); } mesg.setLength(mesg.length()-2); mesg.append(')'); } String mesg_detail = " from " + tbd.getSourcePath(); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace(mesg.toString() + " " + mesg_detail); } console.printInfo(mesg.toString(), mesg_detail); }
public List<Task<? extends Serializable>> generateUpdateTasks() throws HiveException { hive = Hive.get(this.conf); for (LoadTableDesc ltd : loadTableWork) { TableDesc td = ltd.getTable(); Table srcTable = hive.getTable(td.getTableName()); List<Index> tblIndexes = IndexUtils.getAllIndexes(srcTable, (short)-1); Map<String, String> partSpec = ltd.getPartitionSpec(); if (partSpec == null || partSpec.size() == 0) { //unpartitioned table, update whole index doIndexUpdate(tblIndexes); } else { doIndexUpdate(tblIndexes, partSpec); } } return tasks; }
table = db.getTable(tbd.getTable().getTableName()); if (!table.isPartitioned()) { return null;
table = db.getTable(tbd.getTable().getTableName()); if (!table.isPartitioned()) { return null;
+ ltd.getTable().getTableName() + " is an insert_only table"); return null;
try { if (work.getLoadTableDesc() != null) { tableName = work.getLoadTableDesc().getTable().getTableName(); } else if (work.getTableSpecs() != null){ tableName = work.getTableSpecs().tableName;
if (tbd != null) { logMessage(tbd); Table table = db.getTable(tbd.getTable().getTableName()); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("loadTable called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName()); db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getLoadFileType(), work.isSrcLocal(), isSkewedStoredAsDirs(tbd), isFullAcidOp, resetStatisticsProps(table), tbd.getWriteId(), tbd.getStmtId(),
public String getTableName() { BasicStatsWork work = this; if (work.getLoadTableDesc() != null) { return work.getLoadTableDesc().getTable().getTableName(); } else if (work.getTableSpecs() != null) { return work.getTableSpecs().tableName; } else if (getLoadFileDesc().getCtasCreateTableDesc() != null) { return getLoadFileDesc().getCtasCreateTableDesc().getTableName(); } else { return getLoadFileDesc().getCreateViewDesc().getViewName(); } }
db.loadDynamicPartitions( tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getLoadFileType(),
if (tbd != null) { StringBuilder mesg = new StringBuilder("Loading data to table ") .append( tbd.getTable().getTableName()); if (tbd.getPartitionSpec().size() > 0) { mesg.append(" partition ("); Table table = db.getTable(tbd.getTable().getTableName()); srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); } else { srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); } else { flag = HiveFileFormatUtils.checkInputFormat( if (tbd.getPartitionSpec().size() == 0) { dc = new DataContainer(table.getTTable()); db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), work.isSrcLocal(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, db.loadDynamicPartitions( tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getPartitionSpec()); db.validatePartitionNameCharacters(partVals);
/** * Checks whether the given input/output paths and a linked MoveWork should be merged into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames/copies that are not necessary. * * @param conf A HiveConf object to check if BlobStorage optimizations are enabled. * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param condOutputPath A path that the ConditionalTask uses as output for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return True if both Conditional input/output paths and the linked MoveWork should be merged. */ @VisibleForTesting protected static boolean shouldMergeMovePaths(HiveConf conf, Path condInputPath, Path condOutputPath, MoveWork linkedMoveWork) { Path linkedSourcePath, linkedTargetPath; if (linkedMoveWork == null || !BlobStorageUtils.areOptimizationsEnabled(conf)) { return false; } if (linkedMoveWork.getLoadFileWork() != null && linkedMoveWork.getLoadTableWork() == null) { linkedSourcePath = linkedMoveWork.getLoadFileWork().getSourcePath(); linkedTargetPath = linkedMoveWork.getLoadFileWork().getTargetDir(); } else if (linkedMoveWork.getLoadTableWork() != null && linkedMoveWork.getLoadFileWork() == null) { linkedSourcePath = linkedMoveWork.getLoadTableWork().getSourcePath(); linkedTargetPath = getTableLocationPath(conf, linkedMoveWork.getLoadTableWork().getTable()); } else { return false; } return condOutputPath.equals(linkedSourcePath) && BlobStorageUtils.isBlobStoragePath(conf, condInputPath) && BlobStorageUtils.isBlobStoragePath(conf, linkedTargetPath); }
@Test public void testMergePathValidMoveWorkReturnsNewMoveWork() { final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000"); final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002"); final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003"); final MoveWork mockWork = mock(MoveWork.class); final LineageState lineageState = new LineageState(); MoveWork newWork; // test using loadFileWork when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condOutputPath, targetMoveWorkPath, false, "", "", false)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath()); assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir()); // test using loadTableWork TableDesc tableDesc = new TableDesc(); reset(mockWork); when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc( condOutputPath, tableDesc, null)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath()); assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc)); }
private DataContainer handleStaticParts(Hive db, Table table, LoadTableDesc tbd, TaskInformation ti) throws HiveException, IOException, InvalidOperationException { List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec()); db.validatePartitionNameCharacters(partVals); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("loadPartition called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName()); } db.loadPartition(tbd.getSourcePath(), db.getTable(tbd.getTable().getTableName()), tbd.getPartitionSpec(), tbd.getLoadFileType(), tbd.getInheritTableSpecs(), tbd.getInheritLocation(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && !tbd.isMmTable(), resetStatisticsProps(table), tbd.getWriteId(), tbd.getStmtId(), tbd.isInsertOverwrite()); Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); // See the comment inside updatePartitionBucketSortColumns. if (!tbd.isMmTable() && (ti.bucketCols != null || ti.sortCols != null)) { updatePartitionBucketSortColumns(db, table, partn, ti.bucketCols, ti.numBuckets, ti.sortCols); } DataContainer dc = new DataContainer(table.getTTable(), partn.getTPartition()); // add this partition to post-execution hook if (work.getOutputs() != null) { DDLTask.addIfAbsentByName(new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs()); } return dc; }
/** * Checks whether the given input/output paths and a linked MoveWork should be merged into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames/copies that are not necessary. * * @param conf A HiveConf object to check if BlobStorage optimizations are enabled. * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param condOutputPath A path that the ConditionalTask uses as output for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return True if both Conditional input/output paths and the linked MoveWork should be merged. */ @VisibleForTesting protected static boolean shouldMergeMovePaths(HiveConf conf, Path condInputPath, Path condOutputPath, MoveWork linkedMoveWork) { Path linkedSourcePath, linkedTargetPath; if (linkedMoveWork == null || !BlobStorageUtils.areOptimizationsEnabled(conf)) { return false; } if (linkedMoveWork.getLoadFileWork() != null && linkedMoveWork.getLoadTableWork() == null) { linkedSourcePath = linkedMoveWork.getLoadFileWork().getSourcePath(); linkedTargetPath = linkedMoveWork.getLoadFileWork().getTargetDir(); } else if (linkedMoveWork.getLoadTableWork() != null && linkedMoveWork.getLoadFileWork() == null) { linkedSourcePath = linkedMoveWork.getLoadTableWork().getSourcePath(); linkedTargetPath = getTableLocationPath(conf, linkedMoveWork.getLoadTableWork().getTable()); } else { return false; } return condOutputPath.equals(linkedSourcePath) && BlobStorageUtils.isBlobStoragePath(conf, condInputPath) && BlobStorageUtils.isBlobStoragePath(conf, linkedTargetPath); }
statsWork = new BasicStatsWork(mvWork.getLoadTableWork()); truncate = mvWork.getLoadTableWork().getReplace(); String tableName = mvWork.getLoadTableWork().getTable().getTableName(); try {
public List<Task<? extends Serializable>> generateUpdateTasks() throws HiveException { hive = Hive.get(this.conf); for (LoadTableDesc ltd : loadTableWork) { TableDesc td = ltd.getTable(); Table srcTable = hive.getTable(td.getTableName()); List<Index> tblIndexes = IndexUtils.getAllIndexes(srcTable, (short)-1); Map<String, String> partSpec = ltd.getPartitionSpec(); if (partSpec == null || partSpec.size() == 0) { //unpartitioned table, update whole index doIndexUpdate(tblIndexes); } else { doIndexUpdate(tblIndexes, partSpec); } } return tasks; }
@Override public int execute(DriverContext driverContext) { // Make sure that it is either an ANALYZE command or an INSERT OVERWRITE command assert (work.getLoadTableDesc() != null && work.getTableSpecs() == null || work.getLoadTableDesc() == null && work.getTableSpecs() != null); String tableName = ""; try { if (work.getLoadTableDesc() != null) { tableName = work.getLoadTableDesc().getTable().getTableName(); } else { tableName = work.getTableSpecs().tableName; } table = db.getTable(tableName); } catch (HiveException e) { LOG.error("Cannot get table " + tableName, e); console.printError("Cannot get table " + tableName, e.toString()); } return aggregateStats(); }
private String getTablePrefix(StatsWork work) throws HiveException { String tableName; if (work.getLoadTableDesc() != null) { tableName = work.getLoadTableDesc().getTable().getTableName(); } else if (work.getTableSpecs() != null) { tableName = work.getTableSpecs().tableName; } else { tableName = work.getLoadFileDesc().getDestinationCreateTable(); } Table table; try { table = db.getTable(tableName); } catch (HiveException e) { LOG.warn("Failed to get table:" + tableName); // For CTAS query, table does not exist in this period, just use table name as prefix. return tableName.toLowerCase(); } return table.getDbName() + "." + table.getTableName(); }
try { if (work.getLoadTableDesc() != null) { tableName = work.getLoadTableDesc().getTable().getTableName(); } else if (work.getTableSpecs() != null){ tableName = work.getTableSpecs().tableName;