@Override public <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) { if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from"); return JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(Constants.getMemoryLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), memoryKey)).toJavaRDD()); } }
@Override public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) { if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from"); Spark.create(sparkContext.sc()); final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open()); return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD()); }
public <K, V> JavaPairRDD<K, V> createRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, String sourceName, Class<K> keyClass, Class<V> valueClass) { Set<String> inputNames = sourceInputs.get(sourceName); if (inputNames == null || inputNames.isEmpty()) { // should never happen if validation happened correctly at pipeline configure time throw new IllegalArgumentException( sourceName + " has no input. Please check that the source calls setInput at some input."); } JavaPairRDD<K, V> inputRDD = JavaPairRDD.fromJavaRDD(jsc.<Tuple2<K, V>>emptyRDD()); for (String inputName : inputNames) { inputRDD = inputRDD.union(createInputRDD(sec, jsc, inputName, keyClass, valueClass)); } return inputRDD; }
@Override public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) { if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from"); Spark.create(sparkContext.sc()); final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open()); return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD()); }
@Override public <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) { if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from"); return JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(Constants.getMemoryLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), memoryKey)).toJavaRDD()); } }
JavaRDD<Tuple2<String, Double>> tupleDividedRDD = tupleCombinedRDD.map(keyAndsumAndCount -> new Tuple2<>(keyAndsumAndCount._1(), (double) keyAndsumAndCount._2()._1() / keyAndsumAndCount._2()._2())); JavaPairRDD<String, Double> averageRDDtheHardWay = JavaPairRDD.fromJavaRDD(tupleDividedRDD);
@Override public void setUp() throws IOException { super.setUp(); List<Tuple2<Long, Vector>> tinyCorpus = new ArrayList<>(); for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { tinyCorpus.add(new Tuple2<>((Long) LDASuite.tinyCorpus()[i]._1(), LDASuite.tinyCorpus()[i]._2())); } JavaRDD<Tuple2<Long, Vector>> tmpCorpus = jsc.parallelize(tinyCorpus, 2); corpus = JavaPairRDD.fromJavaRDD(tmpCorpus); }
@Override public void setUp() throws IOException { super.setUp(); List<Tuple2<Long, Vector>> tinyCorpus = new ArrayList<>(); for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { tinyCorpus.add(new Tuple2<>((Long) LDASuite.tinyCorpus()[i]._1(), LDASuite.tinyCorpus()[i]._2())); } JavaRDD<Tuple2<Long, Vector>> tmpCorpus = jsc.parallelize(tinyCorpus, 2); corpus = JavaPairRDD.fromJavaRDD(tmpCorpus); }
@Test public void localLdaMethods() { JavaRDD<Tuple2<Long, Vector>> docs = jsc.parallelize(toyData, 2); JavaPairRDD<Long, Vector> pairedDocs = JavaPairRDD.fromJavaRDD(docs); // check: topicDistributions assertEquals(toyModel.topicDistributions(pairedDocs).count(), pairedDocs.count()); // check: logPerplexity double logPerplexity = toyModel.logPerplexity(pairedDocs); // check: logLikelihood. List<Tuple2<Long, Vector>> docsSingleWord = new ArrayList<>(); docsSingleWord.add(new Tuple2<>(0L, Vectors.dense(1.0, 0.0, 0.0))); JavaPairRDD<Long, Vector> single = JavaPairRDD.fromJavaRDD(jsc.parallelize(docsSingleWord)); double logLikelihood = toyModel.logLikelihood(single); }
@Override public void setUp() throws IOException { super.setUp(); List<Tuple2<Long, Vector>> tinyCorpus = new ArrayList<>(); for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { tinyCorpus.add(new Tuple2<>((Long) LDASuite.tinyCorpus()[i]._1(), LDASuite.tinyCorpus()[i]._2())); } JavaRDD<Tuple2<Long, Vector>> tmpCorpus = jsc.parallelize(tinyCorpus, 2); corpus = JavaPairRDD.fromJavaRDD(tmpCorpus); }
@Test public void localLdaMethods() { JavaRDD<Tuple2<Long, Vector>> docs = jsc.parallelize(toyData, 2); JavaPairRDD<Long, Vector> pairedDocs = JavaPairRDD.fromJavaRDD(docs); // check: topicDistributions assertEquals(toyModel.topicDistributions(pairedDocs).count(), pairedDocs.count()); // check: logPerplexity double logPerplexity = toyModel.logPerplexity(pairedDocs); // check: logLikelihood. List<Tuple2<Long, Vector>> docsSingleWord = new ArrayList<>(); docsSingleWord.add(new Tuple2<>(0L, Vectors.dense(1.0, 0.0, 0.0))); JavaPairRDD<Long, Vector> single = JavaPairRDD.fromJavaRDD(jsc.parallelize(docsSingleWord)); double logLikelihood = toyModel.logLikelihood(single); }
@Test public void localLdaMethods() { JavaRDD<Tuple2<Long, Vector>> docs = jsc.parallelize(toyData, 2); JavaPairRDD<Long, Vector> pairedDocs = JavaPairRDD.fromJavaRDD(docs); // check: topicDistributions assertEquals(toyModel.topicDistributions(pairedDocs).count(), pairedDocs.count()); // check: logPerplexity double logPerplexity = toyModel.logPerplexity(pairedDocs); // check: logLikelihood. List<Tuple2<Long, Vector>> docsSingleWord = new ArrayList<>(); docsSingleWord.add(new Tuple2<>(0L, Vectors.dense(1.0, 0.0, 0.0))); JavaPairRDD<Long, Vector> single = JavaPairRDD.fromJavaRDD(jsc.parallelize(docsSingleWord)); double logLikelihood = toyModel.logLikelihood(single); }
JavaRDD<Tuple2<Integer, String>> rdd3 = (JavaRDD<Tuple2<Integer, String>>)listOfRDDs.get(2); JavaPairRDD<Integer, String> prdd3 = JavaPairRDD.fromJavaRDD(rdd3); PairFunction<Integer, Integer, Integer> mapToTuple = (PairFunction<Integer, Integer, Integer>) i -> new Tuple2<>(i, i);
JavaRDD<Integer> rdd2 = (JavaRDD<Integer>) listOfRDDs.get(1); JavaRDD<Tuple2<Integer, String>> rdd3 = (JavaRDD<Tuple2<Integer, String>>) listOfRDDs.get(2); JavaPairRDD<Integer, String> prdd3 = JavaPairRDD.fromJavaRDD(rdd3); PairFunction<Integer, Integer, Integer> mapToTuple = (Integer i) -> new Tuple2<>(i, i);
JavaRDD<Integer> rdd2 = (JavaRDD<Integer>) listOfRDDs.get(1); JavaRDD<Tuple2<Integer, String>> rdd3 = (JavaRDD<Tuple2<Integer, String>>) listOfRDDs.get(2); JavaPairRDD<Integer, String> prdd3 = JavaPairRDD.fromJavaRDD(rdd3); PairFunction<Integer, Integer, Integer> mapToTuple = (Integer i) -> new Tuple2<>(i, i);
JavaRDD<Tuple2<Integer, String>> rdd3 = (JavaRDD<Tuple2<Integer, String>>)listOfRDDs.get(2); JavaPairRDD<Integer, String> prdd3 = JavaPairRDD.fromJavaRDD(rdd3); PairFunction<Integer, Integer, Integer> mapToTuple = (PairFunction<Integer, Integer, Integer>) i -> new Tuple2<>(i, i);
JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD(tmpRDD);
JavaPairRDD<String, Integer> initialRDD = JavaPairRDD.fromJavaRDD(tmpRDD);
.fromJavaRDD(mappedGoraRdd).reduceByKey(redFunc).mapToPair(metricFunc);
JavaPairRDD.fromJavaRDD(rdd.toJavaRDD());