org.apache.spark.sql.SQLContext.sql java code examples

 public static void main(String[] args) throws Exception {
    if (args.length != 3) {
   throw new Exception("Usage LoadHive sparkMaster tbl");
    }
  String master = args[0];
  String tbl = args[1];

    JavaSparkContext sc = new JavaSparkContext(
   master, "loadhive", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  SQLContext sqlCtx = new SQLContext(sc);
  DataFrame rdd = sqlCtx.sql("SELECT key, value FROM src");
  JavaRDD<Integer> squaredKeys = rdd.toJavaRDD().map(new SquareKey());
  List<Integer> result = squaredKeys.collect();
  for (Integer elem : result) {
   System.out.println(elem);
  }
  }
}

DataFrame topTweets = sqlCtx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10");
Row[] result = topTweets.collect();
for (Row row : result) {
DataFrame tweetLength = sqlCtx.sql("SELECT stringLengthJava('text') FROM tweets LIMIT 10");
Row[] lengths = tweetLength.collect();
for (Row row : result) {

  @Override
  public void saveDataFrame(DataFrame dataFrame, Class<?> entityClazz, Map<String, Object> properties)
  {
    dataFrame.sqlContext().sql("use " + (String) properties.get(KEYSPACE));
    dataFrame.write().insertInto((String) properties.get(TABLE));
  }
}

/**
 * Executes a SparkSQL query over the configured JavaSQLContext.
 * @param query SparkSQL query.
 * @return A JavaSchemaRDD containing the result of the executed query.
 */
public DataFrame sql(String query) {
  return sqlContext.sql(query);
}

/**
 * Executes a SparkSQL query over the configured JavaSQLContext.
 * @param query SparkSQL query.
 * @return A JavaSchemaRDD containing the result of the executed query.
 */
public DataFrame sql(String query) {
  return sqlContext.sql(query);
}

phoenixDataSet.createOrReplaceTempView(tableName);
Dataset<Row> dataset =
    sqlContext.sql("SELECT col1+col2, col4, a_string FROM " + tableName
        + " ORDER BY col1+col2, col4");
List<Row> rows = dataset.collectAsList();

Dataset<Row> teenagers = sqlContext.sql("SELECT name FROM people WHERE country = 'USA' AND age >= 13 AND age <= 19");
 sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
teenagerNames = teenagers2.toJavaRDD().map(new Function<Row, String>() {
 @Override
Dataset<Row> teenagers3 = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
Dataset<Row> peopleWithCity = sqlContext.sql("SELECT name, address.city FROM people2");
List<String> nameAndCity = peopleWithCity.toJavaRDD().map(new Function<Row, String>() {
 @Override

Dataset<Row> teenagers = sqlContext.sql("SELECT name FROM people WHERE country = 'USA' AND age >= 13 AND age <= 19");
 sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
teenagerNames = teenagers2.toJavaRDD()
    .map((Row row) -> "Name: " + row.getString(0)).collect();
Dataset<Row> teenagers3 = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
Dataset<Row> peopleWithCity = sqlContext.sql("SELECT name, address.city FROM people2");
List<String> nameAndCity = peopleWithCity.toJavaRDD()
    .map((Row row) -> "Name: " + row.getString(0) + ", City: " + row.getString(1)).collect();

        + tableName2 + " order by `cf2.d`";
Dataset<Row> dataset =
    sqlContext.sql(query);
List<Row> rows = dataset.collectAsList();
ResultSet rs = new SparkResultSet(rows, dataset.columns());

        + " T2 ON T1.A_STRING = T2.A_STRING ORDER BY T1.`CF1.B`";
Dataset<Row> dataset =
    sqlContext.sql(query);
List<Row> rows = dataset.collectAsList();
ResultSet rs = new SparkResultSet(rows, dataset.columns());
    "SELECT T1.A_STRING, T2.COL1 FROM " + tableName1 + " T1 JOIN " + tableName2
        + " T2 ON T1.A_STRING = T2.A_STRING ORDER BY T2.COL1";
dataset =  sqlContext.sql(query);
rows = dataset.collectAsList();
rs = new SparkResultSet(rows, dataset.columns());

phoenixDataSet.createOrReplaceTempView(tableName);
Dataset<Row> dataset =
    sqlContext.sql("SELECT A_STRING, `CF1.A`, `CF1.B`, COL1, `CF2.C`, `CF2.D`, COL2 from "
        + tableName + " ORDER BY `CF1.A`,`CF2.C`");
List<Row> rows = dataset.collectAsList();
    sqlContext.sql("SELECT A_STRING, `CF1.A`, `CF1.B`, COL1, `CF2.C`, `CF2.D`, COL2 from "
        + tableName + " ORDER BY COL2");
rows = dataset.collectAsList();

  public static ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder, String url, Configuration config)
      throws SQLException {
    SQLContext sqlContext = getSparkSession().sqlContext();

    boolean forceRowKeyOrder =
        conn.unwrap(PhoenixConnection.class).getQueryServices().getProps()
            .getBoolean(QueryServices.FORCE_ROW_KEY_ORDER_ATTRIB, false);
    // if we are forcing row key order we have to add an ORDER BY
    // here we assume that the required columns are in the primary key column order
    String prevOrderBy = queryBuilder.getOrderByClause();
    if (forceRowKeyOrder &&  (queryBuilder.getOrderByClause()==null || queryBuilder.getOrderByClause().isEmpty())) {
      queryBuilder.setOrderByClause(Joiner.on(", ").join(queryBuilder.getRequiredColumns()));
    }

    // create PhoenixRDD using the table name and columns that are required by the query
    // since we don't set the predicate filtering is done after rows are returned from spark
    Dataset phoenixDataSet = getSparkSession().read().format("phoenix")
        .option(DataSourceOptions.TABLE_KEY, queryBuilder.getFullTableName())
        .option(PhoenixDataSource.ZOOKEEPER_URL, url).load();

    phoenixDataSet.createOrReplaceTempView(queryBuilder.getFullTableName());
    Dataset<Row> dataset = sqlContext.sql(queryBuilder.build());
    SparkPlan plan = dataset.queryExecution().executedPlan();
    List<Row> rows = dataset.collectAsList();
    queryBuilder.setOrderByClause(prevOrderBy);
    ResultSet rs = new SparkResultSet(rows, dataset.columns());
    return rs;
  }
}

@After
public void tearDown() throws IOException {
 // Clean up tables.
 if (sqlContext != null) {
  sqlContext.sql("DROP TABLE IF EXISTS javaSavedTable");
  sqlContext.sql("DROP TABLE IF EXISTS externalTable");
 }
}

@After
public void tearDown() throws IOException {
 // Clean up tables.
 if (sqlContext != null) {
  sqlContext.sql("DROP TABLE IF EXISTS javaSavedTable");
  sqlContext.sql("DROP TABLE IF EXISTS externalTable");
 }
}

@After
public void tearDown() throws IOException {
 // Clean up tables.
 if (hc != null) {
  hc.sql("DROP TABLE IF EXISTS window_table");
 }
}

@After
public void tearDown() throws IOException {
 // Clean up tables.
 if (hc != null) {
  hc.sql("DROP TABLE IF EXISTS window_table");
 }
}

@Test
public void deepSparkContextSQL() {
  deepSparkContext = createDeepSparkContext();
  DeepSparkContext deepSparkContextSpy = PowerMockito.spy(deepSparkContext);
  SQLContext sqlContext = mock(SQLContext.class);
  Whitebox.setInternalState(deepSparkContextSpy, "sc", sparkContext);
  Whitebox.setInternalState(deepSparkContextSpy, "sqlContext", sqlContext);
  String query = "SELECT * FROM input";
  deepSparkContextSpy.sql(query);
  verify(sqlContext).sql(query);
}

 @Test
 public void saveTableAndQueryIt() {
  Map<String, String> options = new HashMap<>();
  df.write()
   .format("org.apache.spark.sql.json")
   .mode(SaveMode.Append)
   .options(options)
   .saveAsTable("javaSavedTable");

  checkAnswer(
   sqlContext.sql("SELECT * FROM javaSavedTable"),
   df.collectAsList());
 }
}

@Test
public void saveTableAndQueryIt() {
 checkAnswer(
  df.select(avg("key").over(
   Window.partitionBy("value").orderBy("key").rowsBetween(-1, 1))),
  hc.sql("SELECT avg(key) " +
   "OVER (PARTITION BY value " +
   "      ORDER BY key " +
   "      ROWS BETWEEN 1 preceding and 1 following) " +
   "FROM window_table").collectAsList());
}

@Test
public void saveTableAndQueryIt() {
 checkAnswer(
  df.select(avg("key").over(
   Window.partitionBy("value").orderBy("key").rowsBetween(-1, 1))),
  hc.sql("SELECT avg(key) " +
   "OVER (PARTITION BY value " +
   "      ORDER BY key " +
   "      ROWS BETWEEN 1 preceding and 1 following) " +
   "FROM window_table").collectAsList());
}

Popular methods of SQLContext

Popular in Java

Making http requests using okhttp
setScale (BigDecimal)
addToBackStack (FragmentTransaction)
onRequestPermissionsResult (Fragment)
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
JTextField (javax.swing)
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Best plugins for Eclipse

How to use sqlmethodin org.apache.spark.sql.SQLContext

Best Java code snippets using org.apache.spark.sql.SQLContext.sql (Showing top 20 results out of 315)

How to use
sql
method
in
org.apache.spark.sql.SQLContext