/** * Setup the job * * @param conf Configuration * @throws IOException */ public static void setupJob(Configuration conf) throws IOException { HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.setTableDesc(getHiveTableName()); Map<String, String> partitionValues = ImmutableMap.of("ds", "2013-04-01"); outputDesc.setPartitionValues(partitionValues); LOG.info("Writing to {}", outputDesc); try { HiveApiOutputFormat.initProfile(conf, outputDesc, SampleOutputFormat.SAMPLE_PROFILE_ID); } catch (IOException e) { LOG.error("Failed to initialize profile {}", outputDesc); throw e; } }
@Override public Boolean idempotentTask() throws TException { ThriftHiveMetastore.Iface client = description.metastoreClient(conf); String db = description.getTableDesc().getDatabaseName(); String table = description.getTableDesc().getTableName(); if (oti.hasPartitionInfo()) { Map<String, String> partitionSpec = description.getPartitionValues(); List<String> partitionValues = listOfPartitionValues( partitionSpec, oti.getPartitionInfo()); if (partitionExists(client, db, table, partitionValues)) { LOG.error("Table " + db + ":" + table + " partition " + partitionSpec + " already exists"); return true; } } return false; } };
/** * Check that the table's partition info and the user's match. * * @param conf Configuration * @param description HiveInputDescription * @param oti OutputInfo * @param outputConf OutputConf * @throws IOException */ private void checkPartitionInfo(Configuration conf, HiveOutputDescription description, OutputInfo oti, OutputConf outputConf) throws IOException { if (oti.hasPartitionInfo()) { if (!description.hasPartitionValues()) { throw new IOException("table is partitioned but user input isn't"); } if (outputConf.shouldDropPartitionIfExists()) { dropPartitionIfExists(conf, description, oti); } else { checkPartitionDoesntExist(conf, description, oti); } } else { if (description.hasPartitionValues()) { throw new IOException("table is not partitioned but user input is"); } else { checkTableIsEmpty(conf, description, oti); } } }
@Override public Table idempotentTask() throws TException { String dbName = outputDesc.getTableDesc().getDatabaseName(); String tableName = outputDesc.getTableDesc().getTableName(); ThriftHiveMetastore.Iface client = outputDesc.metastoreClient(conf); return client.get_table(dbName, tableName); } };
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); } }
@Test public void testOutput() throws Exception { hiveServer.createTable("CREATE TABLE " + hiveTableDesc.getTableName() + " (i1 INT, d1 DOUBLE) " + " ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'"); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.setTableDesc(hiveTableDesc); HiveTableSchema schema = HiveTableSchemas.lookup(hiveServer.getClient(), null, hiveTableDesc); writeData(outputDesc, schema); HiveInputDescription inputDesc = new HiveInputDescription(); inputDesc.setTableDesc(hiveTableDesc); verifyData(inputDesc); }
context.conf.setInt("mapred.task.partition", 1); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setDatabaseName(args.table.database); outputDesc.getTableDesc().setTableName(args.table.table); outputDesc.getMetastoreDesc().setHost(args.metastore.host); outputDesc.getMetastoreDesc().setPort(args.metastore.port); if (args.table.partitioned) { outputDesc.putPartitionValue("ds", "2013-04-01");
@Test public void testOutputWithPartitions() throws Exception { hiveServer.createTable("CREATE TABLE " + hiveTableDesc.getTableName() + " (i1 INT, d1 DOUBLE) " + " PARTITIONED BY (ds STRING) " + " ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'"); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.putPartitionValue("ds", "foobar"); outputDesc.setTableDesc(hiveTableDesc); HiveTableSchema schema = HiveTableSchemas.lookup(hiveServer.getClient(), null, hiveTableDesc); writeData(outputDesc, schema); HiveInputDescription inputDesc = new HiveInputDescription(); inputDesc.setPartitionFilter("ds='foobar'"); inputDesc.setTableDesc(hiveTableDesc); verifyData(inputDesc); }
/** * Read user's output description from Configuration * * @return HiveOutputDescription */ public HiveOutputDescription readOutputDescription() { String value = conf.get(getOutputDescriptionKey()); HiveOutputDescription hod = null; if (value != null) { hod = new HiveOutputDescription(); Writables.readFieldsFromEncodedStr(value, hod); } return hod; }
/** * Check if the given table is empty, that is has no files * @param conf Configuration to use * @param description HiveOutputDescription * @param oti OutputInfo * @throws IOException Hadoop Filesystem issues */ private void checkTableIsEmpty(Configuration conf, HiveOutputDescription description, OutputInfo oti) throws IOException { Path tablePath = new Path(oti.getTableRoot()); FileSystem fs = tablePath.getFileSystem(conf); if (fs.exists(tablePath) && FileSystems.dirHasNonHiddenFiles(fs, tablePath)) { throw new IOException("Table " + description.getTableDesc().getTableName() + " has existing data"); } }
try { partitionPiece = HiveUtils.computePartitionPath( outputInfo.getPartitionInfo(), outputDesc.getPartitionValues()); } catch (MetaException e) { throw new IOException(e);
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); } }
@Test public void testOutput() throws Exception { hiveServer.createTable("CREATE TABLE " + hiveTableDesc.getTableName() + " (i1 INT, d1 DOUBLE) " + " ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'"); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.setTableDesc(hiveTableDesc); HiveTableSchema schema = HiveTableSchemas.lookup(hiveServer.getClient(), null, hiveTableDesc); writeData(outputDesc, schema); HiveInputDescription inputDesc = new HiveInputDescription(); inputDesc.setTableDesc(hiveTableDesc); verifyData(inputDesc); }
@Test public void testOutputWithPartitions() throws Exception { hiveServer.createTable("CREATE TABLE " + hiveTableDesc.getTableName() + " (i1 INT, d1 DOUBLE) " + " PARTITIONED BY (ds STRING) " + " ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'"); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.putPartitionValue("ds", "foobar"); outputDesc.setTableDesc(hiveTableDesc); HiveTableSchema schema = HiveTableSchemas.lookup(hiveServer.getClient(), null, hiveTableDesc); writeData(outputDesc, schema); HiveInputDescription inputDesc = new HiveInputDescription(); inputDesc.setPartitionFilter("ds='foobar'"); inputDesc.setTableDesc(hiveTableDesc); verifyData(inputDesc); }
@Override public Boolean idempotentTask() throws TException { ThriftHiveMetastore.Iface client = description.metastoreClient(conf); try { client.get_table(description.getTableDesc().getDatabaseName(), description.getTableDesc().getTableName()); } catch (NoSuchObjectException e) { return false; } return true; } };
/** * Initialize this output format * * @param outputDescription Output description * @param profileId Profile id * @param conf Configuration */ public void initialize(HiveOutputDescription outputDescription, String profileId, Configuration conf) { checkNotNull(outputDescription, "inputDescription is null"); checkNotNull(profileId, "profileId is null"); checkNotNull(conf, "conf is null"); try { setMyProfileId(profileId); initProfile(conf, outputDescription, profileId); HiveTableSchemas.initTableSchema(conf, profileId, outputDescription.getTableDesc()); } catch (IOException e) { throw new IllegalStateException("initialize: IOException occurred", e); } }
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); }
createTestTable(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.setTableDesc(hiveTableDesc);
/** * Drop partition which we will be writing to * @param conf Configuration to use * @param description HiveOutputDescription * @param oti OutputInfo * @return True iff partition was dropped */ private boolean dropPartitionIfExists(Configuration conf, HiveOutputDescription description, OutputInfo oti) throws IOException { ThriftHiveMetastore.Iface client; try { client = description.metastoreClient(conf); } catch (TException e) { throw new IOException(e); } String db = description.getTableDesc().getDatabaseName(); String table = description.getTableDesc().getTableName(); if (oti.hasPartitionInfo()) { Map<String, String> partitionSpec = description.getPartitionValues(); List<String> partitionValues = listOfPartitionValues( partitionSpec, oti.getPartitionInfo()); if (partitionExists(client, db, table, partitionValues)) { LOG.info("Dropping partition {} from table {}:{}", partitionSpec, db, table); return dropPartition(client, db, table, partitionValues); } } return false; }
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); }