sanityCheck(table, outputDesc); OutputInfo outputInfo = new OutputInfo(table); if (outputInfo.hasPartitionInfo()) { try { partitionPiece = HiveUtils.computePartitionPath( outputInfo.getPartitionInfo(), outputDesc.getPartitionValues()); } catch (MetaException e) { throw new IOException(e); partitionPiece = "_temp"; String partitionPath = outputInfo.getTableRoot() + Path.SEPARATOR + partitionPiece; outputInfo.setPartitionPath(partitionPath); HadoopUtils.setOutputDir(conf, partitionPath); if (outputInfo.hasPartitionInfo()) { outputInfo.setFinalOutputPath(outputInfo.getPartitionPath()); } else { outputInfo.setFinalOutputPath(table.getSd().getLocation());
/** * Table has no partitions, just copy data * * @param conf Configuration * @param outputInfo OutputInfo * @throws IOException I/O errors */ private void noPartitionsCopyData(Configuration conf, OutputInfo outputInfo) throws IOException { Preconditions.checkArgument(!outputInfo.hasPartitionInfo()); Path tablePath = new Path(outputInfo.getTableRoot()); Path writePath = new Path(outputInfo.getPartitionPath()); FileSystem tableFs = tablePath.getFileSystem(conf); FileSystem writePathFs = writePath.getFileSystem(conf); if (!tableFs.getUri().equals(writePathFs.getUri())) { LOG.error("Table's root path fs {} is not on same as its partition path fs {}", tableFs.getUri(), writePathFs.getUri()); throw new IllegalStateException("Table's root path fs " + tableFs.getUri() + " is not on same as its partition path fs " + writePathFs.getUri()); } LOG.info("No partitions, copying data from {} to {}", writePath, tablePath); FileSystems.move(tableFs, writePath, writePath, tablePath); tableFs.delete(writePath, true); }
@Override public Void idempotentTask() throws TException { String dbName = outputDesc.getTableDesc().getDatabaseName(); String tableName = outputDesc.getTableDesc().getTableName(); ThriftHiveMetastore.Iface client = outputDesc.metastoreClient(conf); Table hiveTable = client.get_table(dbName, tableName); Partition partition = new Partition(); partition.setDbName(dbName); partition.setTableName(tableName); partition.setParameters(outputInfo.getTableParams()); List<String> partitionValues = HiveUtils.orderedPartitionValues( hiveTable.getPartitionKeys(), outputDesc.getPartitionValues()); partition.setValues(partitionValues); StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); sd.setParameters(outputInfo.getSerializerParams()); sd.setLocation(outputInfo.getFinalOutputPath()); sd.setCols(outputInfo.getColumnInfo()); partition.setSd(sd); LOG.info("Registering partition with values {} located at {}", outputInfo.getSerializerParams(), outputInfo.getFinalOutputPath()); try { client.add_partition(partition); } catch (AlreadyExistsException e) { LOG.info("Partition already exists; Giraph must have just created it"); } catch (InvalidObjectException e) { throw new IllegalStateException(e); } return null; } };
@Override public RecordWriterImpl getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { HadoopUtils.setWorkOutputDir(taskAttemptContext); Configuration conf = taskAttemptContext.getConfiguration(); OutputConf outputConf = new OutputConf(conf, myProfileId); OutputInfo oti = outputConf.readOutputTableInfo(); HiveUtils.setRCileNumColumns(conf, oti.getColumnInfo().size()); HadoopUtils.setOutputKeyWritableClass(conf, NullWritable.class); Serializer serializer = oti.createSerializer(conf); HadoopUtils.setOutputValueWritableClass(conf, serializer.getSerializedClass()); org.apache.hadoop.mapred.OutputFormat baseOutputFormat = ReflectionUtils.newInstance(oti.getOutputFormatClass(), conf); // CHECKSTYLE: stop LineLength org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = getBaseRecordWriter(taskAttemptContext, baseOutputFormat); // CHECKSTYLE: resume LineLength StructObjectInspector soi = Inspectors.createFor(oti.getColumnInfo()); if (!outputConf.shouldResetSlowWrites()) { return new RecordWriterImpl(baseWriter, serializer, soi); } else { long writeTimeout = outputConf.getWriteResetTimeout(); return new ResettableRecordWriterImpl(baseWriter, serializer, soi, taskAttemptContext, baseOutputFormat, writeTimeout); } }
@Override public Boolean idempotentTask() throws TException { ThriftHiveMetastore.Iface client = description.metastoreClient(conf); String db = description.getTableDesc().getDatabaseName(); String table = description.getTableDesc().getTableName(); if (oti.hasPartitionInfo()) { Map<String, String> partitionSpec = description.getPartitionValues(); List<String> partitionValues = listOfPartitionValues( partitionSpec, oti.getPartitionInfo()); if (partitionExists(client, db, table, partitionValues)) { LOG.error("Table " + db + ":" + table + " partition " + partitionSpec + " already exists"); return true; } } return false; } };
/** * Read output table info from Configuration * * @return OutputInfo */ public OutputInfo readOutputTableInfo() { String value = conf.get(getOutputTableInfoKey()); OutputInfo oti = null; if (value != null) { oti = new OutputInfo(); Writables.readFieldsFromEncodedStr(value, oti); } return oti; }
/** * Check if the given table is empty, that is has no files * @param conf Configuration to use * @param description HiveOutputDescription * @param oti OutputInfo * @throws IOException Hadoop Filesystem issues */ private void checkTableIsEmpty(Configuration conf, HiveOutputDescription description, OutputInfo oti) throws IOException { Path tablePath = new Path(oti.getTableRoot()); FileSystem fs = tablePath.getFileSystem(conf); if (fs.exists(tablePath) && FileSystems.dirHasNonHiddenFiles(fs, tablePath)) { throw new IOException("Table " + description.getTableDesc().getTableName() + " has existing data"); } }
/** * Drop partition which we will be writing to * @param conf Configuration to use * @param description HiveOutputDescription * @param oti OutputInfo * @return True iff partition was dropped */ private boolean dropPartitionIfExists(Configuration conf, HiveOutputDescription description, OutputInfo oti) throws IOException { ThriftHiveMetastore.Iface client; try { client = description.metastoreClient(conf); } catch (TException e) { throw new IOException(e); } String db = description.getTableDesc().getDatabaseName(); String table = description.getTableDesc().getTableName(); if (oti.hasPartitionInfo()) { Map<String, String> partitionSpec = description.getPartitionValues(); List<String> partitionValues = listOfPartitionValues( partitionSpec, oti.getPartitionInfo()); if (partitionExists(client, db, table, partitionValues)) { LOG.info("Dropping partition {} from table {}:{}", partitionSpec, db, table); return dropPartition(client, db, table, partitionValues); } } return false; }