@Override public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { reader.initialize(inputSplit, taskAttemptContext); }
@Override public float getProgress() { return reader.getProgress(); }
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { return this.columnFamilyInputFormat.getSplits(jobContext); }
@Override public void setConf(final Configuration config) { super.setConf(config); // Copy some Titan configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat ConfigHelper.setInputInitialAddress(config, titanConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]); if (titanConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) ConfigHelper.setInputRpcPort(config, String.valueOf(titanConf.get(GraphDatabaseConfiguration.STORAGE_PORT))); if (titanConf.has(GraphDatabaseConfiguration.AUTH_USERNAME)) ConfigHelper.setInputKeyspaceUserName(config, titanConf.get(GraphDatabaseConfiguration.AUTH_USERNAME)); if (titanConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) ConfigHelper.setInputKeyspacePassword(config, titanConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD)); // Copy keyspace, force the CF setting to edgestore, honor widerows when set final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false); // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false ConfigHelper.setInputColumnFamily(config, titanConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE), mrConf.get(TitanHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows); log.debug("Set keyspace: {}", titanConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE)); // Set the column slice bounds via Faunus's vertex query filter final SlicePredicate predicate = new SlicePredicate(); final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE); predicate.setSlice_range(getSliceRange(TitanHadoopSetupCommon.DEFAULT_SLICE_QUERY, rangeBatchSize)); // TODO stop slicing the whole row ConfigHelper.setInputSlicePredicate(config, predicate); }
boolean hasNext; do { hasNext = reader.nextKeyValue(); incompleteKV = null; } else { StaticArrayBuffer key = StaticArrayBuffer.of(reader.getCurrentKey()); SortedMap<ByteBuffer, Cell> valueSortedMap = reader.getCurrentValue(); List<Entry> entries = new ArrayList<>(valueSortedMap.size()); for (Map.Entry<ByteBuffer, Cell> ent : valueSortedMap.entrySet()) {
/** * Set the column family for the output of this job. * * @param conf Job configuration you are about to run * @param keyspace * @param columnFamily */ public static void setOutputColumnFamily(Configuration conf, String keyspace, String columnFamily) { setOutputKeyspace(conf, keyspace); setOutputColumnFamily(conf, columnFamily); }
/** * Set the keyspace and column family for the input of this job. * * @param conf Job configuration you are about to run * @param keyspace * @param columnFamily */ public static void setInputColumnFamily(Configuration conf, String keyspace, String columnFamily) { setInputColumnFamily(conf, keyspace, columnFamily, false); }
/** * returns TaskAttemptContext.getTaskAttemptID(). Works with both * Hadoop 1 and 2. */ public static TaskAttemptID getTaskAttemptID(TaskAttemptContext taskContext) { return (TaskAttemptID) invoke(GET_TASK_ATTEMPT_ID, taskContext); }
@Override public void checkOutputSpecs(JobContext context) { checkOutputSpecs(HadoopCompat.getConfiguration(context)); }
/** * Check for validity of the output-specification for the job. * * @param context * information about the job * @throws IOException * when output should not be attempted */ public void checkOutputSpecs(JobContext context) { checkOutputSpecs(HadoopCompat.getConfiguration(context)); }
public static ColumnFamilySplit read(DataInput in) throws IOException { ColumnFamilySplit w = new ColumnFamilySplit(); w.readFields(in); return w; } }
public static void setOutputKeyspaceUserNameAndPassword(Configuration conf, String username, String password) { setOutputKeyspaceUserName(conf, username); setOutputKeyspacePassword(conf, password); }
@Override public void close() throws IOException { reader.close(); }
/** * Upon construction, obtain the map that this writer will use to collect * mutations, and the ring cache for the given keyspace. * * @param context the task attempt context * @throws IOException */ CqlRecordWriter(TaskAttemptContext context) throws IOException { this(HadoopCompat.getConfiguration(context)); this.context = context; }
@Override public RecordReader<StaticBuffer, Iterable<Entry>> createRecordReader(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { columnFamilyRecordReader = (ColumnFamilyRecordReader)columnFamilyInputFormat.createRecordReader(inputSplit, taskAttemptContext); titanRecordReader = new CassandraBinaryRecordReader(columnFamilyRecordReader); return titanRecordReader; }
Configuration configuration = new Configuration(); FileSystem hdfs = FileSystem.get( new URI( "hdfs://localhost:54310" ), configuration ); Path file = new Path("hdfs://localhost:54310/s2013/batch/table.html"); if ( hdfs.exists( file )) { hdfs.delete( file, true ); } OutputStream os = hdfs.create( file, new Progressable() { public void progress() { out.println("...bytes written: [ "+bytesWritten+" ]"); } }); BufferedWriter br = new BufferedWriter( new OutputStreamWriter( os, "UTF-8" ) ); br.write("Hello World"); br.close(); hdfs.close();
/** * Set the column family for the output of this job. * * @param conf Job configuration you are about to run * @param keyspace * @param columnFamily */ public static void setOutputColumnFamily(Configuration conf, String keyspace, String columnFamily) { setOutputKeyspace(conf, keyspace); setOutputColumnFamily(conf, columnFamily); }
/** * Increment the counter. Works with both Hadoop 1 and 2 */ public static void incrementCounter(Counter counter, long increment) { // incrementing a count might be called often. Might be affected by // cost of invoke(). might be good candidate to handle in a shim. // (TODO Raghu) figure out how achieve such a build with maven invoke(INCREMENT_COUNTER_METHOD, counter, increment); } }
/** * Invoke getCounter() on TaskInputOutputContext. Works with both * Hadoop 1 and 2. */ public static Counter getCounter(TaskInputOutputContext context, String groupName, String counterName) { return (Counter) invoke(GET_COUNTER_METHOD, context, groupName, counterName); }
/** * Invoke getConfiguration() on JobContext. Works with both * Hadoop 1 and 2. */ public static Configuration getConfiguration(JobContext context) { return (Configuration) invoke(GET_CONFIGURATION_METHOD, context); }