public static void main(String[] args) throws Exception { if (args.length != 3) { throw new Exception("Usage LoadHive sparkMaster tbl"); } String master = args[0]; String tbl = args[1]; JavaSparkContext sc = new JavaSparkContext( master, "loadhive", System.getenv("SPARK_HOME"), System.getenv("JARS")); SQLContext sqlCtx = new SQLContext(sc); DataFrame rdd = sqlCtx.sql("SELECT key, value FROM src"); JavaRDD<Integer> squaredKeys = rdd.toJavaRDD().map(new SquareKey()); List<Integer> result = squaredKeys.collect(); for (Integer elem : result) { System.out.println(elem); } } }
System.out.println(row.get(0)); JavaRDD<String> topTweetText = topTweets.toJavaRDD().map(new Function<Row, String>() { public String call(Row row) { return row.getString(0);
// SQL can be run over RDDs that have been registered as tables. DataFrame teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19"); List<String> teenagerNames = teenagers.toJavaRDD().map( new Function<Row, String>() { public String call(Row row) { return "Name: " + row.getString(0); } }).collect();
DataFrame greaterThan25 = resultingDataFrame.where(resultingDataFrame.col("AGE").geq(25)); DataFrame lessThanGreaterThan = greaterThan25.where(resultingDataFrame.col("AGE").leq(50)); JavaRDD<Row> resultFromQuery = lessThanGreaterThan.toJavaRDD();
DataFrame predictions = cvModel.transform(testingFrame); JavaRDD<Tuple2<Object, Object>> scoreAndLabels = predictions.toJavaRDD().map( new Function<Row, Tuple2<Object, Object>>() { @Override public Tuple2<Object, Object> call(Row r) { Double score = r.getDouble(4); Double label = r.getDouble(1); return new Tuple2<Object, Object>(score, label); } }); BinaryClassificationMetrics metrics = new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels));
DataFrame df; SQLContext sqlContext; Long start; Long end; JavaPairRDD<Row, Long> indexedRDD = df.toJavaRDD().zipWithIndex(); JavaRDD filteredRDD = indexedRDD.filter((Tuple2<Row,Long> v1) -> v1._2 >= start && v1._2 < end); DataFrame filteredDataFrame = sqlContext.createDataFrame(filteredRDD, df.schema());
this.numLabels = (int)row.getDouble(0) + 1; JavaRDD<Row> rows = df.sqlContext().sql("SELECT label, features FROM dft").toJavaRDD();