@Override public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException { Configuration conf = jobContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, myProfileId); HiveOutputDescription description = outputConf.readOutputDescription(); OutputInfo oti = outputConf.readOutputTableInfo(); LOG.info("Check output specs of " + description); if (description == null) { LOG.error("HiveOutputDescription is null in Configuration, nothing to check"); return; } checkTableExists(conf, description); if (oti == null) { LOG.error("OutputInfo is null in Configuration, nothing to check"); return; } checkPartitionInfo(conf, description, oti, outputConf); }
@Override public void commitJob(JobContext jobContext) throws IOException { baseCommitter.commitJob(jobContext); Configuration conf = jobContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, profileId); HiveOutputDescription outputDesc = outputConf.readOutputDescription(); OutputInfo outputInfo = outputConf.readOutputTableInfo(); if (outputInfo.hasPartitionInfo()) { registerPartitions(conf, outputDesc, outputInfo); } else { noPartitionsCopyData(conf, outputInfo); } writeSuccessFile(conf); }
@Override public RecordWriterImpl getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { HadoopUtils.setWorkOutputDir(taskAttemptContext); Configuration conf = taskAttemptContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, myProfileId); OutputInfo oti = outputConf.readOutputTableInfo(); HiveUtils.setRCileNumColumns(conf, oti.getColumnInfo().size()); HadoopUtils.setOutputKeyWritableClass(conf, NullWritable.class); Serializer serializer = oti.createSerializer(conf); HadoopUtils.setOutputValueWritableClass(conf, serializer.getSerializedClass()); org.apache.hadoop.mapred.OutputFormat baseOutputFormat = ReflectionUtils.newInstance(oti.getOutputFormatClass(), conf); // CHECKSTYLE: stop LineLength org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = getBaseRecordWriter(taskAttemptContext, baseOutputFormat); // CHECKSTYLE: resume LineLength StructObjectInspector soi = Inspectors.createFor(oti.getColumnInfo()); if (!outputConf.shouldResetSlowWrites()) { return new RecordWriterImpl(baseWriter, serializer, soi); } else { long writeTimeout = outputConf.getWriteResetTimeout(); return new ResettableRecordWriterImpl(baseWriter, serializer, soi, taskAttemptContext, baseOutputFormat, writeTimeout); } }
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); } }
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); } }
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); }
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail(); }
HiveTableSchemas.putToConf(conf, profileId, tableSchema); OutputConf outputConf = new OutputConf(conf, profileId); outputConf.writeOutputDescription(outputDesc); outputConf.writeOutputTableInfo(outputInfo);
@Test public void testRecoveredFromFailuresAfterRetries() throws Exception { FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("foo"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); HiveMetastores.setTestClient(metastore); outputFormat.checkOutputSpecs(jobContext); assertEquals( BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls()); } }
@Test public void testRecoveredFromFailuresAfterRetries() throws Exception { FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("foo"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); HiveMetastores.setTestClient(metastore); outputFormat.checkOutputSpecs(jobContext); assertEquals( BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls()); } }