/** * Initialize this output format * * @param outputDescription Output description * @param profileId Profile id * @param conf Configuration */ public void initialize(HiveOutputDescription outputDescription, String profileId, Configuration conf) { checkNotNull(outputDescription, "inputDescription is null"); checkNotNull(profileId, "profileId is null"); checkNotNull(conf, "conf is null"); try { setMyProfileId(profileId); initProfile(conf, outputDescription, profileId); HiveTableSchemas.initTableSchema(conf, profileId, outputDescription.getTableDesc()); } catch (IOException e) { throw new IllegalStateException("initialize: IOException occurred", e); } }
/** * Check that the table's partition info and the user's match. * * @param conf Configuration * @param description HiveInputDescription * @param oti OutputInfo * @param outputConf OutputConf * @throws IOException */ private void checkPartitionInfo(Configuration conf, HiveOutputDescription description, OutputInfo oti, OutputConf outputConf) throws IOException { if (oti.hasPartitionInfo()) { if (!description.hasPartitionValues()) { throw new IOException("table is partitioned but user input isn't"); } if (outputConf.shouldDropPartitionIfExists()) { dropPartitionIfExists(conf, description, oti); } else { checkPartitionDoesntExist(conf, description, oti); } } else { if (description.hasPartitionValues()) { throw new IOException("table is not partitioned but user input is"); } else { checkTableIsEmpty(conf, description, oti); } } }
@Override public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException { Configuration conf = jobContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, myProfileId); HiveOutputDescription description = outputConf.readOutputDescription(); OutputInfo oti = outputConf.readOutputTableInfo(); LOG.info("Check output specs of " + description); if (description == null) { LOG.error("HiveOutputDescription is null in Configuration, nothing to check"); return; } checkTableExists(conf, description); if (oti == null) { LOG.error("OutputInfo is null in Configuration, nothing to check"); return; } checkPartitionInfo(conf, description, oti, outputConf); }
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); } }
conf.set("mapred.task.id", taskAttemptIdStr); HiveApiOutputFormat.initProfile(conf, outputDesc, profile); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(profile); JobContext jobContext = new HackJobContext(new JobConf(conf), taskAttemptID.getJobID()); RecordWriterImpl recordWriter = outputFormat.getRecordWriter(taskContext); HiveApiOutputCommitter committer = outputFormat.getOutputCommitter(taskContext); committer.setupJob(jobContext);
context.outputFormat = new HiveApiOutputFormat(); context.outputFormat.init(context.conf, outputDesc); context.outputFormat.checkOutputSpecs(threadLocal.jobContext()); context.outputFormat.getOutputCommitter(threadLocal.taskContext()); context.schema = context.outputFormat.getTableSchema(context.conf);
context.outputFormat.getOutputCommitter(threadLocal.taskContext()); context.outputFormat.getRecordWriter(threadLocal.taskContext());
/** * Initialize with default profile ID using Configuration and output * description passsed in. * @param conf Configuration to use * @param outputDesc HiveOutputDescription * @throws TException Hive Metastore issues */ public static void initDefaultProfile(Configuration conf, HiveOutputDescription outputDesc) throws TException, IOException { initProfile(conf, outputDesc, DEFAULT_PROFILE_ID); }
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); } }
/** * Initialize using object's profile ID with Configuration and output * description passed in. * @param conf Configuration to use * @param outputDesc HiveOutputDescription * @throws TException Hive Metastore issues */ public void init(Configuration conf, HiveOutputDescription outputDesc) throws TException, IOException { initProfile(conf, outputDesc, myProfileId); }
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); }
/** * Setup the job * * @param conf Configuration * @throws IOException */ public static void setupJob(Configuration conf) throws IOException { HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.setTableDesc(getHiveTableName()); Map<String, String> partitionValues = ImmutableMap.of("ds", "2013-04-01"); outputDesc.setPartitionValues(partitionValues); LOG.info("Writing to {}", outputDesc); try { HiveApiOutputFormat.initProfile(conf, outputDesc, SampleOutputFormat.SAMPLE_PROFILE_ID); } catch (IOException e) { LOG.error("Failed to initialize profile {}", outputDesc); throw e; } }
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); }
@Test public void testRecoveredFromFailuresAfterRetries() throws Exception { FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("foo"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); HiveMetastores.setTestClient(metastore); outputFormat.checkOutputSpecs(jobContext); assertEquals( BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls()); } }
@Test public void testRecoveredFromFailuresAfterRetries() throws Exception { FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("foo"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); HiveMetastores.setTestClient(metastore); outputFormat.checkOutputSpecs(jobContext); assertEquals( BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls()); } }