public static void main(String[] args) throws Exception { if (args.length != 2) { throw new Exception("Usage LoadJsonWithSparkSQL sparkMaster jsonFile"); } String master = args[0]; String jsonFile = args[1]; JavaSparkContext sc = new JavaSparkContext( master, "loadJsonwithsparksql"); SQLContext sqlCtx = new SQLContext(sc); DataFrame input = sqlCtx.jsonFile(jsonFile); input.printSchema(); } }
JavaSparkContext sc = new JavaSparkContext(conf); SQLContext sqlCtx = new SQLContext(sc); DataFrame input = sqlCtx.jsonFile(inputFile);
public static void main(String[] args) { //Sample data-frame loaded from a JSON file DataFrame usersDf = sqlContext.jsonFile("spark-save-to-db/src/main/resources/users.json"); //Save data-frame to MySQL (or any other JDBC supported databases) //Choose one of 2 options depending on your requirement (Not both). //Option 1: Create new table and insert all records. usersDf.createJDBCTable(MYSQL_CONNECTION_URL, "users", true); //Option 2: Insert all records to an existing table. usersDf.insertIntoJDBC(MYSQL_CONNECTION_URL, "users", false); } }
@Override public List<String> call(JobContext jc) throws Exception { InputStream source = getClass().getResourceAsStream("/testweet.json"); // Save the resource as a file in HDFS (or the local tmp dir when using a local filesystem). URI input; File local = File.createTempFile("tweets", ".json", jc.getLocalTmpDir()); Files.copy(source, local.toPath(), StandardCopyOption.REPLACE_EXISTING); FileSystem fs = FileSystem.get(jc.sc().sc().hadoopConfiguration()); if ("file".equals(fs.getUri().getScheme())) { input = local.toURI(); } else { String uuid = UUID.randomUUID().toString(); Path target = new Path("/tmp/" + uuid + "-tweets.json"); fs.copyFromLocalFile(new Path(local.toURI()), target); input = target.toUri(); } SQLContext sqlctx = useHiveContext ? jc.hivectx() : jc.sqlctx(); sqlctx.jsonFile(input.toString()).registerTempTable("tweets"); List<String> tweetList = new ArrayList<>(); Row[] result = (Row[])(sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10") .collect()); for (Row r : result) { tweetList.add(r.toString()); } return tweetList; }