/** * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD} * instance. */ public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) { return new RDDJavaFunctions<>(rdd); }
@Override public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient) { try { Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList(); ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz()); JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD(); CassandraJavaUtil.javaFunctions(personRDD) .writerBuilder(m.getSchema(), m.getTableName(), CassandraJavaUtil.mapToRow(m.getEntityClazz())) .saveToCassandra(); return true; } catch (Exception e) { throw new KunderaException("Cannot persist object(s)", e); } }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName} * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark * Partitions that will be created in this repartitioning event. The calling RDD must have rows that * can be converted into the partition key of the given Cassandra Table. */ public JavaRDD<T> repartitionByCassandraReplica( String keyspaceName, String tableName, int partitionsPerHost, ColumnSelector partitionkeyMapper, RowWriterFactory<T> rowWriterFactory ) { CassandraConnector connector = defaultConnector(); ClassTag<T> ctT = rdd.toJavaRDD().classTag(); CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica( keyspaceName, tableName, partitionsPerHost, partitionkeyMapper, connector, ctT, rowWriterFactory); return new JavaRDD<>(newRDD, ctT); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName} * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark * Partitions that will be created in this repartitioning event. The calling RDD must have rows that * can be converted into the partition key of the given Cassandra Table. */ public JavaRDD<T> repartitionByCassandraReplica( String keyspaceName, String tableName, int partitionsPerHost, ColumnSelector partitionkeyMapper, RowWriterFactory<T> rowWriterFactory ) { CassandraConnector connector = defaultConnector(); ClassTag<T> ctT = rdd.toJavaRDD().classTag(); CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica( keyspaceName, tableName, partitionsPerHost, partitionkeyMapper, connector, ctT, rowWriterFactory); return new JavaRDD<>(newRDD, ctT); }
/** * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD} * instance. */ public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) { return new RDDJavaFunctions<>(rdd); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName} * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark * Partitions that will be created in this repartitioning event. The calling RDD must have rows that * can be converted into the partition key of the given Cassandra Table. */ public JavaRDD<T> repartitionByCassandraReplica( String keyspaceName, String tableName, int partitionsPerHost, ColumnSelector partitionkeyMapper, RowWriterFactory<T> rowWriterFactory ) { CassandraConnector connector = defaultConnector(); ClassTag<T> ctT = rdd.toJavaRDD().classTag(); CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica( keyspaceName, tableName, partitionsPerHost, partitionkeyMapper, connector, ctT, rowWriterFactory); return new JavaRDD<>(newRDD, ctT); }
private void store(JavaSparkContext sc, List<Dependency> links) { CassandraDependencies dependencies = new CassandraDependencies(links, day); javaFunctions(sc.parallelize(Collections.singletonList(dependencies))) .writerBuilder(keyspace, "dependencies", mapToRow(CassandraDependencies.class)) .saveToCassandra(); }
/** * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD} * instance. */ public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) { return new RDDJavaFunctions<>(rdd); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName} * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark * Partitions that will be created in this repartitioning event. The calling RDD must have rows that * can be converted into the partition key of the given Cassandra Table. */ public JavaRDD<T> repartitionByCassandraReplica( String keyspaceName, String tableName, int partitionsPerHost, ColumnSelector partitionkeyMapper, RowWriterFactory<T> rowWriterFactory ) { CassandraConnector connector = defaultConnector(); ClassTag<T> ctT = rdd.toJavaRDD().classTag(); CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica( keyspaceName, tableName, partitionsPerHost, partitionkeyMapper, connector, ctT, rowWriterFactory); return new JavaRDD<>(newRDD, ctT); }
javaFunctions(productsRDD).writerBuilder("java_api", "products", productWriter).saveToCassandra(); javaFunctions(salesRDD).writerBuilder("java_api", "sales", saleWriter).saveToCassandra();
/** * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD} * instance. */ public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) { return new RDDJavaFunctions<>(rdd); }
/** * Applies a function to each item, and groups consecutive items having the same value together. * Contrary to `groupBy`, items from the same group must be already next to each other in the * original collection. Works locally on each partition, so items from different partitions will * never be placed in the same group. */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, ClassTag<K> keyClassTag) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, keyClassTag); }
/** * Repartitions the data (via a shuffle) based upon the replication of the given {@code keyspaceName} * and {@code tableName}. Calling this method before using joinWithCassandraTable will ensure that * requests will be coordinator local. {@code partitionsPerHost} Controls the number of Spark * Partitions that will be created in this repartitioning event. The calling RDD must have rows that * can be converted into the partition key of the given Cassandra Table. */ public JavaRDD<T> repartitionByCassandraReplica( String keyspaceName, String tableName, int partitionsPerHost, ColumnSelector partitionkeyMapper, RowWriterFactory<T> rowWriterFactory ) { CassandraConnector connector = defaultConnector(); ClassTag<T> ctT = rdd.toJavaRDD().classTag(); CassandraPartitionedRDD<T> newRDD = rddFunctions.repartitionByCassandraReplica( keyspaceName, tableName, partitionsPerHost, partitionkeyMapper, connector, ctT, rowWriterFactory); return new JavaRDD<>(newRDD, ctT); }
javaFunctions(summariesRDD).writerBuilder("java_api", "summaries", summaryWriter).saveToCassandra();
/** * A static factory method to create a {@link RDDJavaFunctions} based on an existing {@link RDD} * instance. */ public static <T> RDDJavaFunctions<T> javaFunctions(RDD<T> rdd) { return new RDDJavaFunctions<>(rdd); }
/** * @see {@link #spanBy(Function, ClassTag)} */ public <K> JavaPairRDD<K, Iterable<R>> spanBy(Function<R, K> f, Class<K> keyClass) { return CassandraJavaUtil.javaFunctions(rdd()).spanBy(f, getClassTag(keyClass)); }