public static void main(String[] args) throws Exception { if (args.length != 2) { throw new Exception("Usage LoadJsonWithSparkSQL sparkMaster jsonFile"); } String master = args[0]; String jsonFile = args[1]; JavaSparkContext sc = new JavaSparkContext( master, "loadJsonwithsparksql"); SQLContext sqlCtx = new SQLContext(sc); DataFrame input = sqlCtx.jsonFile(jsonFile); input.printSchema(); } }
public static void main(String[] args) throws Exception { if (args.length != 3) { throw new Exception("Usage LoadHive sparkMaster tbl"); } String master = args[0]; String tbl = args[1]; JavaSparkContext sc = new JavaSparkContext( master, "loadhive", System.getenv("SPARK_HOME"), System.getenv("JARS")); SQLContext sqlCtx = new SQLContext(sc); DataFrame rdd = sqlCtx.sql("SELECT key, value FROM src"); JavaRDD<Integer> squaredKeys = rdd.toJavaRDD().map(new SquareKey()); List<Integer> result = squaredKeys.collect(); for (Integer elem : result) { System.out.println(elem); } } }
SparkConf conf = new SparkConf(); JavaSparkContext sc = new JavaSparkContext(conf); SQLContext sqlCtx = new SQLContext(sc); DataFrame input = sqlCtx.jsonFile(inputFile);
/** * Overridden superclass constructor. * * @param master the url of the master node. * @param appName the name of the application. */ public DeepSparkContext(String master, String appName) { super(master, appName); sqlContext = new SQLContext(this); }
/** * Overridden superclass constructor. * * @param sc an already created spark context. */ public DeepSparkContext(SparkContext sc) { super(sc); sqlContext = new SQLContext(this); }
/** * Overridden superclass constructor. * * @param master the url of the master node. * @param appName the name of the application. * @param sparkHome the spark home folder. * @param jars the jar file(s) to serialize and send to all the cluster nodes. */ public DeepSparkContext(String master, String appName, String sparkHome, String[] jars) { super(master, appName, sparkHome, jars); sqlContext = new SQLContext(this); }
/** * Overridden superclass constructor. * * @param master the url of the master node. * @param appName the name of the application. * @param sparkHome the spark home folder. * @param jars the jar file(s) to serialize and send to all the cluster nodes. */ public DeepSparkContext(String master, String appName, String sparkHome, String[] jars) { super(master, appName, sparkHome, jars); sqlContext = new SQLContext(this); }
@Override public SQLContext sqlctx() { if (sqlctx == null) { synchronized (this) { if (sqlctx == null) { sqlctx = new SQLContext(sc); } } } return sqlctx; }
/** * Overridden superclass constructor. * * @param sc an already created spark context. */ public DeepSparkContext(SparkContext sc) { super(sc); sqlContext = new SQLContext(this); }
/** * Overridden superclass constructor. * * @param master the url of the master node. * @param appName the name of the application. */ public DeepSparkContext(String master, String appName) { super(master, appName); sqlContext = new SQLContext(this); }
/** * Overridden superclass constructor. * * @param master the url of the master node. * @param appName the name of the application. * @param sparkHome the spark home folder. * @param jarFile the jar file to serialize and send to all the cluster nodes. */ public DeepSparkContext(String master, String appName, String sparkHome, String jarFile) { super(master, appName, sparkHome, jarFile); sqlContext = new SQLContext(this); }
public WhitespaceClassifier(Lexicon lexicon, Map<String, Pattern> patterns) { sqlContext = new SQLContext(jsc); this.lexicon = lexicon; this.patterns = patterns; }
public DeepSparkContext(DeepSparkConfig deepSparkConfig) { super(deepSparkConfig); sqlContext = new SQLContext(this); }
/** * Overridden superclass constructor. * * @param master the url of the master node. * @param appName the name of the application. * @param sparkHome the spark home folder. * @param jarFile the jar file to serialize and send to all the cluster nodes. */ public DeepSparkContext(String master, String appName, String sparkHome, String jarFile) { super(master, appName, sparkHome, jarFile); sqlContext = new SQLContext(this); }
SQLContext sqlContext = new SQLContext(ctx);
SQLContext sqlContext = new SQLContext(ctx);
/** * Get the {@link SQLContext} for producing datasets and data frames. * @return the {@link SQLContext}. */ public static SQLContext getSqlContext() { if(sqlContext==null){ sqlContext = new SQLContext(getSparkSession()); } return sqlContext; }
private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplicatedPartitionPath, String repairedOutputPath, String basePath) throws Exception { DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath, new SQLContext(jsc), FSUtils.getFs(basePath, jsc.hadoopConfiguration())); job.fixDuplicates(true); return 0; }
public static void main(String[] args) throws Exception { String arffFile = "datasets/simulated/syntheticData.arff"; String sparkFile = "datasets/simulated/syntheticData.json"; SparkConf conf = new SparkConf().setAppName("SparkLink!").setMaster("local"); SparkContext sc = new SparkContext(conf); SQLContext sqlContext = new SQLContext(sc); JavaSparkContext jsc = new JavaSparkContext(sc); ARFFtoSparkFormat(arffFile, sparkFile, "json", sqlContext, jsc); }
@Before public void init() throws IOException { // Initialize a local spark env jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieMergeOnReadTable")); // Create a temp folder as the base path TemporaryFolder folder = new TemporaryFolder(); folder.create(); basePath = folder.getRoot().getAbsolutePath(); jsc.hadoopConfiguration().addResource(dfs.getConf()); dfs.mkdirs(new Path(basePath)); HoodieTestUtils.initTableType(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ); sqlContext = new SQLContext(jsc); // SQLContext stuff }