com.datastax.spark.connector.japi.RDDJavaFunctions java code examples

/**
 * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD}
 * instance.
 */
public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) {
  return new RDDJavaFunctions<>(rdd);
}

@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
  try
  {
    Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
    ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
    JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
    CassandraJavaUtil.javaFunctions(personRDD)
        .writerBuilder(m.getSchema(), m.getTableName(), CassandraJavaUtil.mapToRow(m.getEntityClazz()))
        .saveToCassandra();
    return true;
  }
  catch (Exception e)
  {
    throw new KunderaException("Cannot persist object(s)", e);
  }
}

/**
 * Applies a function to each item, and groups consecutive items having the same value together.
 * Contrary to `groupBy`, items from the same group must be already next to each other in the
 * original collection. Works locally on each partition, so items from different partitions will
 * never be placed in the same group.
 */
public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) {
  return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag);
}

/**
 * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName}
 * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that
 * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark
 * Partitions that will be created in this repartitioning event. The calling RDD must have rows that
 * can be converted into the partition key of the given Cassandra Table.
 */
public JavaRDD<T> repartitionByCassandraReplica(
    String keyspaceName,
    String tableName,
    int partitionsPerHost,
    ColumnSelector partitionkeyMapper,
    RowWriterFactory<T> rowWriterFactory
) {
  CassandraConnector connector = defaultConnector();
  ClassTag<T> ctT = rdd.toJavaRDD().classTag();
  CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica(
      keyspaceName,
      tableName,
      partitionsPerHost,
      partitionkeyMapper,
      connector,
      ctT,
      rowWriterFactory);
  return new JavaRDD<>(newRDD, ctT);
}

/**
 * Applies a function to each item, and groups consecutive items having the same value together.
 * Contrary to `groupBy`, items from the same group must be already next to each other in the
 * original collection. Works locally on each partition, so items from different partitions will
 * never be placed in the same group.
 */
public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) {
  return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag);
}

/**
 * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName}
 * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that
 * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark
 * Partitions that will be created in this repartitioning event. The calling RDD must have rows that
 * can be converted into the partition key of the given Cassandra Table.
 */
public JavaRDD<T> repartitionByCassandraReplica(
    String keyspaceName,
    String tableName,
    int partitionsPerHost,
    ColumnSelector partitionkeyMapper,
    RowWriterFactory<T> rowWriterFactory
) {
  CassandraConnector connector = defaultConnector();
  ClassTag<T> ctT = rdd.toJavaRDD().classTag();
  CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica(
      keyspaceName,
      tableName,
      partitionsPerHost,
      partitionkeyMapper,
      connector,
      ctT,
      rowWriterFactory);
  return new JavaRDD<>(newRDD, ctT);
}

/**
 * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD}
 * instance.
 */
public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) {
  return new RDDJavaFunctions<>(rdd);
}

/**
 * Applies a function to each item, and groups consecutive items having the same value together.
 * Contrary to `groupBy`, items from the same group must be already next to each other in the
 * original collection. Works locally on each partition, so items from different partitions will
 * never be placed in the same group.
 */
public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) {
  return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag);
}

/**
 * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName}
 * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that
 * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark
 * Partitions that will be created in this repartitioning event. The calling RDD must have rows that
 * can be converted into the partition key of the given Cassandra Table.
 */
public JavaRDD<T> repartitionByCassandraReplica(
    String keyspaceName,
    String tableName,
    int partitionsPerHost,
    ColumnSelector partitionkeyMapper,
    RowWriterFactory<T> rowWriterFactory
) {
  CassandraConnector connector = defaultConnector();
  ClassTag<T> ctT = rdd.toJavaRDD().classTag();
  CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica(
      keyspaceName,
      tableName,
      partitionsPerHost,
      partitionkeyMapper,
      connector,
      ctT,
      rowWriterFactory);
  return new JavaRDD<>(newRDD, ctT);
}

private void store(JavaSparkContext sc, List<Dependency> links) {
 CassandraDependencies dependencies = new CassandraDependencies(links, day);
 javaFunctions(sc.parallelize(Collections.singletonList(dependencies)))
   .writerBuilder(keyspace, "dependencies", mapToRow(CassandraDependencies.class))
   .saveToCassandra();
}

/**
 * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD}
 * instance.
 */
public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) {
  return new RDDJavaFunctions<>(rdd);
}

/**
 * Applies a function to each item, and groups consecutive items having the same value together.
 * Contrary to `groupBy`, items from the same group must be already next to each other in the
 * original collection. Works locally on each partition, so items from different partitions will
 * never be placed in the same group.
 */
public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) {
  return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag);
}

/**
 * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName}
 * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that
 * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark
 * Partitions that will be created in this repartitioning event. The calling RDD must have rows that
 * can be converted into the partition key of the given Cassandra Table.
 */
public JavaRDD<T> repartitionByCassandraReplica(
    String keyspaceName,
    String tableName,
    int partitionsPerHost,
    ColumnSelector partitionkeyMapper,
    RowWriterFactory<T> rowWriterFactory
) {
  CassandraConnector connector = defaultConnector();
  ClassTag<T> ctT = rdd.toJavaRDD().classTag();
  CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica(
      keyspaceName,
      tableName,
      partitionsPerHost,
      partitionkeyMapper,
      connector,
      ctT,
      rowWriterFactory);
  return new JavaRDD<>(newRDD, ctT);
}

javaFunctions(productsRDD).writerBuilder("java_api", "products", productWriter).saveToCassandra();
javaFunctions(salesRDD).writerBuilder("java_api", "sales", saleWriter).saveToCassandra();

/**
 * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD}
 * instance.
 */
public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) {
  return new RDDJavaFunctions<>(rdd);
}

/**
 * Applies a function to each item, and groups consecutive items having the same value together.
 * Contrary to `groupBy`, items from the same group must be already next to each other in the
 * original collection. Works locally on each partition, so items from different partitions will
 * never be placed in the same group.
 */
public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) {
  return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag);
}

/**
 * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName}
 * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that
 * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark
 * Partitions that will be created in this repartitioning event. The calling RDD must have rows that
 * can be converted into the partition key of the given Cassandra Table.
 */
public JavaRDD<T> repartitionByCassandraReplica(
    String keyspaceName,
    String tableName,
    int partitionsPerHost,
    ColumnSelector partitionkeyMapper,
    RowWriterFactory<T> rowWriterFactory
) {
  CassandraConnector connector = defaultConnector();
  ClassTag<T> ctT = rdd.toJavaRDD().classTag();
  CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica(
      keyspaceName,
      tableName,
      partitionsPerHost,
      partitionkeyMapper,
      connector,
      ctT,
      rowWriterFactory);
  return new JavaRDD<>(newRDD, ctT);
}

javaFunctions(summariesRDD).writerBuilder("java_api", "summaries", summaryWriter).saveToCassandra();

/**
 * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD}
 * instance.
 */
public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) {
  return new RDDJavaFunctions<>(rdd);
}

/**
 * @see {@link #spanBy(Function, ClassTag)}
 */
public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, Class<K> keyClass) {
  return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, getClassTag(keyClass));
}

Javadoc

A Java API wrapper over RDD to provide Spark Cassandra Connector functionality.

To obtain an instance of this wrapper, use one of the factory methods in CassandraJavaUtilclass.

Most used methods

<init>
defaultConnector
spanBy
Applies a function to each item, and groups consecutive items having the same value together. Contra
writerBuilder

Popular in Java

Reading from database using SQL prepared statement
notifyDataSetChanged (ArrayAdapter)
getApplicationContext (Context)
getSupportFragmentManager (FragmentActivity)
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
JList (javax.swing)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Top PhpStorm plugins

How to useRDDJavaFunctions in com.datastax.spark.connector.japi

Best Java code snippets using com.datastax.spark.connector.japi.RDDJavaFunctions (Showing top 20 results out of 315)

How to use
RDDJavaFunctions
in
com.datastax.spark.connector.japi