/** * Initialize with default profile ID using Configuration and output * description passsed in. * @param conf Configuration to use * @param outputDesc HiveOutputDescription * @throws TException Hive Metastore issues */ public static void initDefaultProfile(Configuration conf, HiveOutputDescription outputDesc) throws TException, IOException { initProfile(conf, outputDesc, DEFAULT_PROFILE_ID); }
/** * Single threaded execution * * @param context Context * @throws Exception */ private void singleThreaded(Context context) throws Exception { write(context); }
@Override public void commitJob(JobContext jobContext) throws IOException { baseCommitter.commitJob(jobContext); Configuration conf = jobContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, profileId); HiveOutputDescription outputDesc = outputConf.readOutputDescription(); OutputInfo outputInfo = outputConf.readOutputTableInfo(); if (outputInfo.hasPartitionInfo()) { registerPartitions(conf, outputDesc, outputInfo); } else { noPartitionsCopyData(conf, outputInfo); } writeSuccessFile(conf); }
@Override public Boolean idempotentTask() throws TException { ThriftHiveMetastore.Iface client = description.metastoreClient(conf); String db = description.getTableDesc().getDatabaseName(); String table = description.getTableDesc().getTableName(); if (oti.hasPartitionInfo()) { Map<String, String> partitionSpec = description.getPartitionValues(); List<String> partitionValues = listOfPartitionValues( partitionSpec, oti.getPartitionInfo()); if (partitionExists(client, db, table, partitionValues)) { LOG.error("Table " + db + ":" + table + " partition " + partitionSpec + " already exists"); return true; } } return false; } };
/** * Check that the table's partition info and the user's match. * * @param conf Configuration * @param description HiveInputDescription * @param oti OutputInfo * @param outputConf OutputConf * @throws IOException */ private void checkPartitionInfo(Configuration conf, HiveOutputDescription description, OutputInfo oti, OutputConf outputConf) throws IOException { if (oti.hasPartitionInfo()) { if (!description.hasPartitionValues()) { throw new IOException("table is partitioned but user input isn't"); } if (outputConf.shouldDropPartitionIfExists()) { dropPartitionIfExists(conf, description, oti); } else { checkPartitionDoesntExist(conf, description, oti); } } else { if (description.hasPartitionValues()) { throw new IOException("table is not partitioned but user input is"); } else { checkTableIsEmpty(conf, description, oti); } } }
@Override public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException { Configuration conf = jobContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, myProfileId); HiveOutputDescription description = outputConf.readOutputDescription(); OutputInfo oti = outputConf.readOutputTableInfo(); LOG.info("Check output specs of " + description); if (description == null) { LOG.error("HiveOutputDescription is null in Configuration, nothing to check"); return; } checkTableExists(conf, description); if (oti == null) { LOG.error("OutputInfo is null in Configuration, nothing to check"); return; } checkPartitionInfo(conf, description, oti, outputConf); }
/** * Read user's output description from Configuration * * @return HiveOutputDescription */ public HiveOutputDescription readOutputDescription() { String value = conf.get(getOutputDescriptionKey()); HiveOutputDescription hod = null; if (value != null) { hod = new HiveOutputDescription(); Writables.readFieldsFromEncodedStr(value, hod); } return hod; }
@Override public Table idempotentTask() throws TException { String dbName = outputDesc.getTableDesc().getDatabaseName(); String tableName = outputDesc.getTableDesc().getTableName(); ThriftHiveMetastore.Iface client = outputDesc.metastoreClient(conf); return client.get_table(dbName, tableName); } };
/** * Read output table info from Configuration * * @return OutputInfo */ public OutputInfo readOutputTableInfo() { String value = conf.get(getOutputTableInfoKey()); OutputInfo oti = null; if (value != null) { oti = new OutputInfo(); Writables.readFieldsFromEncodedStr(value, oti); } return oti; }
@Override public void write(WritableComparable key, HiveWritableRecord value) throws IOException, InterruptedException { long startTime = System.currentTimeMillis(); super.write(key, value); long elapsedTime = System.currentTimeMillis() - startTime; if (elapsedTime > writeTimeoutMs) { LOG.info("write: Write taking too long ({}s), creating new file to write to", elapsedTime); baseWriter.close(new ProgressReporter(taskAttemptContext)); baseWriter = HiveApiOutputFormat.getBaseRecordWriter(taskAttemptContext, baseOutputFormat); } } }
/** * Check table is not misconfigured. * @param table Table to check * @param outputDesc HiveOutputDescription to use */ private static void sanityCheck(Table table, HiveOutputDescription outputDesc) { StorageDescriptor sd = table.getSd(); Preconditions.checkArgument(!sd.isCompressed()); Preconditions.checkArgument(nullOrEmpty(sd.getBucketCols())); Preconditions.checkArgument(nullOrEmpty(sd.getSortCols())); Preconditions.checkArgument(table.getPartitionKeysSize() == outputDesc.numPartitionValues()); }
public String getOutputDescriptionKey() { return getProfileId() + OUTPUT_DESCRIPTION_KEY; }
@Override protected PerThread initialValue() { return new PerThread(conf); } };
/** * Constructor * * @param baseWriter Hadoop RecordWriter * @param serializer Serializer * @param objectInspector ObjectInspector */ public RecordWriterImpl(org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter, Serializer serializer, ObjectInspector objectInspector) { // CHECKSTYLE: resume LineLength this.baseWriter = baseWriter; this.serializer = serializer; this.objectInspector = objectInspector; this.observer = HiveApiOutputObserver.Empty.get(); }
/** * Write output table info to Configuration * * @param oti OutputInfo to write */ public void writeOutputTableInfo(OutputInfo oti) { conf.set(getOutputTableInfoKey(), Writables.writeToEncodedStr(oti)); }
/** * Write user's output description to Configuration * * @param hod HiveOutputDescription */ public void writeOutputDescription(HiveOutputDescription hod) { conf.set(getOutputDescriptionKey(), Writables.writeToEncodedStr(hod)); }
/** * Initialize using object's profile ID with Configuration and output * description passed in. * @param conf Configuration to use * @param outputDesc HiveOutputDescription * @throws TException Hive Metastore issues */ public void init(Configuration conf, HiveOutputDescription outputDesc) throws TException, IOException { initProfile(conf, outputDesc, myProfileId); }
public String getOutputTableInfoKey() { return getProfileId() + OUTPUT_TABLE_INFO_KEY; }