@Override public Table idempotentTask() throws TException { String dbName = outputDesc.getTableDesc().getDatabaseName(); String tableName = outputDesc.getTableDesc().getTableName(); ThriftHiveMetastore.Iface client = outputDesc.metastoreClient(conf); return client.get_table(dbName, tableName); } };
@Override public Boolean idempotentTask() throws TException { ThriftHiveMetastore.Iface client = description.metastoreClient(conf); try { client.get_table(description.getTableDesc().getDatabaseName(), description.getTableDesc().getTableName()); } catch (NoSuchObjectException e) { return false; } return true; } };
/** * Check if the given table is empty, that is has no files * @param conf Configuration to use * @param description HiveOutputDescription * @param oti OutputInfo * @throws IOException Hadoop Filesystem issues */ private void checkTableIsEmpty(Configuration conf, HiveOutputDescription description, OutputInfo oti) throws IOException { Path tablePath = new Path(oti.getTableRoot()); FileSystem fs = tablePath.getFileSystem(conf); if (fs.exists(tablePath) && FileSystems.dirHasNonHiddenFiles(fs, tablePath)) { throw new IOException("Table " + description.getTableDesc().getTableName() + " has existing data"); } }
/** * Initialize this output format * * @param outputDescription Output description * @param profileId Profile id * @param conf Configuration */ public void initialize(HiveOutputDescription outputDescription, String profileId, Configuration conf) { checkNotNull(outputDescription, "inputDescription is null"); checkNotNull(profileId, "profileId is null"); checkNotNull(conf, "conf is null"); try { setMyProfileId(profileId); initProfile(conf, outputDescription, profileId); HiveTableSchemas.initTableSchema(conf, profileId, outputDescription.getTableDesc()); } catch (IOException e) { throw new IllegalStateException("initialize: IOException occurred", e); } }
@Override public Boolean idempotentTask() throws TException { ThriftHiveMetastore.Iface client = description.metastoreClient(conf); String db = description.getTableDesc().getDatabaseName(); String table = description.getTableDesc().getTableName(); if (oti.hasPartitionInfo()) { Map<String, String> partitionSpec = description.getPartitionValues(); List<String> partitionValues = listOfPartitionValues( partitionSpec, oti.getPartitionInfo()); if (partitionExists(client, db, table, partitionValues)) { LOG.error("Table " + db + ":" + table + " partition " + partitionSpec + " already exists"); return true; } } return false; } };
/** * Drop partition which we will be writing to * @param conf Configuration to use * @param description HiveOutputDescription * @param oti OutputInfo * @return True iff partition was dropped */ private boolean dropPartitionIfExists(Configuration conf, HiveOutputDescription description, OutputInfo oti) throws IOException { ThriftHiveMetastore.Iface client; try { client = description.metastoreClient(conf); } catch (TException e) { throw new IOException(e); } String db = description.getTableDesc().getDatabaseName(); String table = description.getTableDesc().getTableName(); if (oti.hasPartitionInfo()) { Map<String, String> partitionSpec = description.getPartitionValues(); List<String> partitionValues = listOfPartitionValues( partitionSpec, oti.getPartitionInfo()); if (partitionExists(client, db, table, partitionValues)) { LOG.info("Dropping partition {} from table {}:{}", partitionSpec, db, table); return dropPartition(client, db, table, partitionValues); } } return false; }
@Override public Void idempotentTask() throws TException { String dbName = outputDesc.getTableDesc().getDatabaseName(); String tableName = outputDesc.getTableDesc().getTableName(); ThriftHiveMetastore.Iface client = outputDesc.metastoreClient(conf); Table hiveTable = client.get_table(dbName, tableName); Partition partition = new Partition(); partition.setDbName(dbName); partition.setTableName(tableName); partition.setParameters(outputInfo.getTableParams()); List<String> partitionValues = HiveUtils.orderedPartitionValues( hiveTable.getPartitionKeys(), outputDesc.getPartitionValues()); partition.setValues(partitionValues); StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); sd.setParameters(outputInfo.getSerializerParams()); sd.setLocation(outputInfo.getFinalOutputPath()); sd.setCols(outputInfo.getColumnInfo()); partition.setSd(sd); LOG.info("Registering partition with values {} located at {}", outputInfo.getSerializerParams(), outputInfo.getFinalOutputPath()); try { client.add_partition(partition); } catch (AlreadyExistsException e) { LOG.info("Partition already exists; Giraph must have just created it"); } catch (InvalidObjectException e) { throw new IllegalStateException(e); } return null; } };
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); } }
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); } }
outputDesc.getTableDesc().setDatabaseName(args.table.database); outputDesc.getTableDesc().setTableName(args.table.table); outputDesc.getMetastoreDesc().setHost(args.metastore.host); outputDesc.getMetastoreDesc().setPort(args.metastore.port);
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); }
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); }
@Test public void testRecoveredFromFailuresAfterRetries() throws Exception { FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("foo"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); HiveMetastores.setTestClient(metastore); outputFormat.checkOutputSpecs(jobContext); assertEquals( BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls()); } }
@Test public void testRecoveredFromFailuresAfterRetries() throws Exception { FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("foo"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); HiveMetastores.setTestClient(metastore); outputFormat.checkOutputSpecs(jobContext); assertEquals( BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls()); } }