@Override public void setConf(final Configuration config) { super.setConf(config); // Copy some Titan configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat ConfigHelper.setInputInitialAddress(config, titanConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]); if (titanConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) ConfigHelper.setInputRpcPort(config, String.valueOf(titanConf.get(GraphDatabaseConfiguration.STORAGE_PORT))); if (titanConf.has(GraphDatabaseConfiguration.AUTH_USERNAME)) ConfigHelper.setInputKeyspaceUserName(config, titanConf.get(GraphDatabaseConfiguration.AUTH_USERNAME)); if (titanConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) ConfigHelper.setInputKeyspacePassword(config, titanConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD)); // Copy keyspace, force the CF setting to edgestore, honor widerows when set final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false); // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false ConfigHelper.setInputColumnFamily(config, titanConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE), mrConf.get(TitanHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows); log.debug("Set keyspace: {}", titanConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE)); // Set the column slice bounds via Faunus's vertex query filter final SlicePredicate predicate = new SlicePredicate(); final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE); predicate.setSlice_range(getSliceRange(TitanHadoopSetupCommon.DEFAULT_SLICE_QUERY, rangeBatchSize)); // TODO stop slicing the whole row ConfigHelper.setInputSlicePredicate(config, predicate); }
protected void validateConfiguration(Configuration conf) { if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) { throw new UnsupportedOperationException("you must set the keyspace and table with setInputColumnFamily()"); } if (ConfigHelper.getInputInitialAddress(conf) == null) throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node with setInputInitialAddress"); if (ConfigHelper.getInputPartitioner(conf) == null) throw new UnsupportedOperationException("You must set the Cassandra partitioner class with setInputPartitioner"); }
if (ConfigHelper.getInputSlicePredicate(conf) == null) ConfigHelper.setInputSlicePredicate(conf, predicate); ConfigHelper.setInputSplitSize(conf, Integer.parseInt(System.getenv(PIG_INPUT_SPLIT_SIZE))); ConfigHelper.setInputRange(conf, getIndexExpressions()); ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password); ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass!= null) ConfigHelper.setInputPartitioner(conf, partitionerClass); if (rpcPort != null) ConfigHelper.setInputRpcPort(conf, rpcPort); if (initHostAddress != null) ConfigHelper.setInputInitialAddress(conf, initHostAddress); ConfigHelper.setInputColumnFamily(conf, keyspace, column_family, widerows); setConnectionInformation(); if (ConfigHelper.getInputRpcPort(conf) == 0) throw new IOException("PIG_INPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getInputInitialAddress(conf) == null) throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getInputPartitioner(conf) == null) throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set"); if (loadSignature == null)
job.setMapOutputValueClass(ObjectWritable.class); ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), getConf().get("cassandraHost")); ConfigHelper.setInputRpcPort(job.getConfiguration(), getConf().get("cassandraPort")); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), getConf().get("cassandraHost")); ConfigHelper.setOutputRpcPort(job.getConfiguration(), getConf().get("cassandraPort")); ConfigHelper.setInputColumnFamily(job.getConfiguration(), getConf().get("inputKeyspace"), getConf().get("inputColumnFamily")); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), getConf().get("outputKeyspace"), getConf().get("outputColumnFamily")); job.getConfiguration().set("source", getConf().get("source")); MAX_COLUMNS_PER_ROW); sp.setSlice_range(sr); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), sp);
ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password); if (splitSize > 0) ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass!= null) ConfigHelper.setOutputPartitioner(conf, partitionerClass); if (rpcPort != null) ConfigHelper.setOutputRpcPort(conf, rpcPort); ConfigHelper.setInputRpcPort(conf, rpcPort); ConfigHelper.setOutputInitialAddress(conf, initHostAddress); ConfigHelper.setInputInitialAddress(conf, initHostAddress); ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family); setConnectionInformation(); if (ConfigHelper.getOutputRpcPort(conf) == 0) throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getOutputInitialAddress(conf) == null) throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getOutputPartitioner(conf) == null) throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
@Override public void setConf(final Configuration config) { this.graph = new FaunusTitanCassandraGraph(GraphFactory.generateTitanConfiguration(config, FAUNUS_GRAPH_INPUT_TITAN)); this.vertexQuery = VertexQueryFilter.create(config); this.pathEnabled = config.getBoolean(FaunusCompiler.PATH_ENABLED, false); config.set("cassandra.input.keyspace", config.get(FAUNUS_GRAPH_INPUT_TITAN_STORAGE_KEYSPACE)); ConfigHelper.setInputColumnFamily(config, ConfigHelper.getInputKeyspace(config), Backend.EDGESTORE_NAME); final SlicePredicate predicate = new SlicePredicate(); predicate.setSlice_range(getSliceRange(this.vertexQuery, config.getInt("cassandra.range.batch.size", Integer.MAX_VALUE))); ConfigHelper.setInputSlicePredicate(config, predicate); ConfigHelper.setInputInitialAddress(config, config.get(FAUNUS_GRAPH_INPUT_TITAN_STORAGE_HOSTNAME)); ConfigHelper.setInputRpcPort(config, config.get(FAUNUS_GRAPH_INPUT_TITAN_STORAGE_PORT)); config.set("storage.read-only", "true"); config.set("autotype", "none"); this.config = config; }
ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);
ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password); CqlConfigHelper.setUserNameAndPassword(conf, username, password); ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass!= null) ConfigHelper.setInputPartitioner(conf, partitionerClass); if (initHostAddress != null) ConfigHelper.setInputInitialAddress(conf, initHostAddress); if (rpcPort != null) ConfigHelper.setInputRpcPort(conf, rpcPort); if (nativePort != null) CqlConfigHelper.setInputNativePort(conf, nativePort); CqlConfigHelper.setInputNativeSSLCipherSuites(conf, nativeSSLCipherSuites); ConfigHelper.setInputColumnFamily(conf, keyspace, column_family); setConnectionInformation(); ConfigHelper.setInputSplitSize(conf, Integer.parseInt(System.getenv(PIG_INPUT_SPLIT_SIZE))); if (ConfigHelper.getInputInitialAddress(conf) == null) throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getInputPartitioner(conf) == null) throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set"); if (loadSignature == null)
KeyRange jobRange = ConfigHelper.getInputKeyRange(conf); filter = jobRange == null ? null : jobRange.row_filter; predicate = ConfigHelper.getInputSlicePredicate(conf); boolean widerows = ConfigHelper.getInputIsWide(conf); isEmptyPredicate = isEmptyPredicate(predicate); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); batchSize = ConfigHelper.getRangeBatchSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf)); keyspace = ConfigHelper.getInputKeyspace(conf); int port = ConfigHelper.getInputRpcPort(conf);
ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);
ConfigHelper.setInputRpcPort(conf, System.getenv(PIG_RPC_PORT)); ConfigHelper.setOutputRpcPort(conf, System.getenv(PIG_RPC_PORT)); ConfigHelper.setInputRpcPort(conf, System.getenv(PIG_INPUT_RPC_PORT)); if (System.getenv(PIG_OUTPUT_RPC_PORT) != null) ConfigHelper.setOutputRpcPort(conf, System.getenv(PIG_OUTPUT_RPC_PORT)); ConfigHelper.setInputInitialAddress(conf, System.getenv(PIG_INITIAL_ADDRESS)); ConfigHelper.setOutputInitialAddress(conf, System.getenv(PIG_INITIAL_ADDRESS)); ConfigHelper.setInputInitialAddress(conf, System.getenv(PIG_INPUT_INITIAL_ADDRESS)); if (System.getenv(PIG_OUTPUT_INITIAL_ADDRESS) != null) ConfigHelper.setOutputInitialAddress(conf, System.getenv(PIG_OUTPUT_INITIAL_ADDRESS)); ConfigHelper.setInputPartitioner(conf, System.getenv(PIG_PARTITIONER)); ConfigHelper.setOutputPartitioner(conf, System.getenv(PIG_PARTITIONER)); ConfigHelper.setInputPartitioner(conf, System.getenv(PIG_INPUT_PARTITIONER)); if(System.getenv(PIG_OUTPUT_PARTITIONER) != null) ConfigHelper.setOutputPartitioner(conf, System.getenv(PIG_OUTPUT_PARTITIONER)); if (System.getenv(PIG_INPUT_FORMAT) != null) inputFormatClass = getFullyQualifiedClassName(System.getenv(PIG_INPUT_FORMAT));
public static void setInputKeyspaceUserNameAndPassword(Configuration conf, String username, String password) { setInputKeyspaceUserName(conf, username); setInputKeyspacePassword(conf, password); }
/** * Set the keyspace and column family for the input of this job. * * @param conf Job configuration you are about to run * @param keyspace * @param columnFamily */ public static void setInputColumnFamily(Configuration conf, String keyspace, String columnFamily) { setInputColumnFamily(conf, keyspace, columnFamily, false); }
private void setConfigs() throws IOException keyspace = ConfigHelper.getOutputKeyspace(conf); table = ConfigHelper.getOutputColumnFamily(conf); try partitioner = ConfigHelper.getInputPartitioner(conf);
ConfigHelper.setOutputRpcPort(hadoopConf, CassandraSinkConfiguration.DEFAULT_OUTPUT_RPC_PORT); final Cluster.Builder builder = Cluster.builder().withClusterName(this.conf.getClusterName()); if (!Strings.isNullOrEmpty(ConfigHelper.getOutputKeyspaceUserName(hadoopConf)) && !Strings.isNullOrEmpty(ConfigHelper.getOutputKeyspacePassword(hadoopConf))) { builder.withCredentials( ConfigHelper.getOutputKeyspaceUserName(hadoopConf), ConfigHelper.getOutputKeyspacePassword(hadoopConf) ); ConfigHelper.setOutputColumnFamily(hadoopConf, this.conf.getKeyspace(), this.conf.getTableName());
/** * Set the column family for the output of this job. * * @param conf Job configuration you are about to run * @param keyspace * @param columnFamily */ public static void setOutputColumnFamily(Configuration conf, String keyspace, String columnFamily) { setOutputKeyspace(conf, keyspace); setOutputColumnFamily(conf, columnFamily); }
public static Cassandra.Client createAuthenticatedClient(String location, int port, Configuration conf) throws Exception { logger.debug("Creating authenticated client for CF input format"); TTransport transport; try { transport = ConfigHelper.getClientTransportFactory(conf).openTransport(location, port); } catch (Exception e) { throw new TTransportException("Failed to open a transport to " + location + ":" + port + ".", e); } TProtocol binaryProtocol = new TBinaryProtocol(transport, true, true); Cassandra.Client client = new Cassandra.Client(binaryProtocol); // log in client.set_keyspace(ConfigHelper.getInputKeyspace(conf)); if ((ConfigHelper.getInputKeyspaceUserName(conf) != null) && (ConfigHelper.getInputKeyspacePassword(conf) != null)) { Map<String, String> creds = new HashMap<String, String>(); creds.put(IAuthenticator.USERNAME_KEY, ConfigHelper.getInputKeyspaceUserName(conf)); creds.put(IAuthenticator.PASSWORD_KEY, ConfigHelper.getInputKeyspacePassword(conf)); AuthenticationRequest authRequest = new AuthenticationRequest(creds); client.login(authRequest); } logger.debug("Authenticated client for CF input format created successfully"); return client; }
private List<TokenRange> getRangeMap(Configuration conf) throws IOException { Cassandra.Client client = ConfigHelper.getClientFromInputAddressList(conf); List<TokenRange> map; try { map = client.describe_local_ring(ConfigHelper.getInputKeyspace(conf)); } catch (InvalidRequestException e) { throw new RuntimeException(e); } catch (TException e) { throw new RuntimeException(e); } return map; }
protected void validateConfiguration(Configuration conf) { if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) { throw new UnsupportedOperationException("you must set the keyspace and columnfamily with setInputColumnFamily()"); } if (ConfigHelper.getInputInitialAddress(conf) == null) throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node with setInputInitialAddress"); if (ConfigHelper.getInputPartitioner(conf) == null) throw new UnsupportedOperationException("You must set the Cassandra partitioner class with setInputPartitioner"); }
ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password); if (splitSize > 0) ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass!= null) ConfigHelper.setOutputPartitioner(conf, partitionerClass); if (rpcPort != null) ConfigHelper.setOutputRpcPort(conf, rpcPort); ConfigHelper.setInputRpcPort(conf, rpcPort); ConfigHelper.setOutputInitialAddress(conf, initHostAddress); ConfigHelper.setInputInitialAddress(conf, initHostAddress); ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family); CqlConfigHelper.setOutputCql(conf, outputQuery); if (ConfigHelper.getOutputRpcPort(conf) == 0) throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getOutputInitialAddress(conf) == null) throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getOutputPartitioner(conf) == null) throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");