org.apache.spark.sql.SparkSession.sqlContext java code examples

conn.commit();
SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
    .option(DataSourceOptions.TABLE_KEY, tableName)

SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
        .option(DataSourceOptions.TABLE_KEY, tableName1)

SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
    .option(DataSourceOptions.TABLE_KEY, tableName1)

SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
    .option(DataSourceOptions.TABLE_KEY, tableName)

  public static ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder, String url, Configuration config)
      throws SQLException {
    SQLContext sqlContext = getSparkSession().sqlContext();

    boolean forceRowKeyOrder =
        conn.unwrap(PhoenixConnection.class).getQueryServices().getProps()
            .getBoolean(QueryServices.FORCE_ROW_KEY_ORDER_ATTRIB, false);
    // if we are forcing row key order we have to add an ORDER BY
    // here we assume that the required columns are in the primary key column order
    String prevOrderBy = queryBuilder.getOrderByClause();
    if (forceRowKeyOrder &&  (queryBuilder.getOrderByClause()==null || queryBuilder.getOrderByClause().isEmpty())) {
      queryBuilder.setOrderByClause(Joiner.on(", ").join(queryBuilder.getRequiredColumns()));
    }

    // create PhoenixRDD using the table name and columns that are required by the query
    // since we don't set the predicate filtering is done after rows are returned from spark
    Dataset phoenixDataSet = getSparkSession().read().format("phoenix")
        .option(DataSourceOptions.TABLE_KEY, queryBuilder.getFullTableName())
        .option(PhoenixDataSource.ZOOKEEPER_URL, url).load();

    phoenixDataSet.createOrReplaceTempView(queryBuilder.getFullTableName());
    Dataset<Row> dataset = sqlContext.sql(queryBuilder.build());
    SparkPlan plan = dataset.queryExecution().executedPlan();
    List<Row> rows = dataset.collectAsList();
    queryBuilder.setOrderByClause(prevOrderBy);
    ResultSet rs = new SparkResultSet(rows, dataset.columns());
    return rs;
  }
}

@Override
public SQLContext sqlContext() {
  return SparkContextUtil.getSparkSession(context, store.getProperties()).sqlContext();
}

/**
 * Constructs the all important HiveContext, then evaluates the wrapped Statement.
 * Currently, the HiveContext is made as a singleton.
 *
 * @throws Throwable as required by the Statement class
 */
@Override
public void evaluate() throws Throwable {
  if (sparkSqlContextSingleton == null) {
    SparkConf sparkConf = new SparkConf().setAppName("HiveQLUnit").setMaster(serverAddress);
    SparkSession sparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate();
    sparkSqlContextSingleton = sparkSession.sqlContext();
  }
  wrappedStatement.evaluate();
}

    DataTypes.createStructField("foe2", DataTypes.StringType, false) });
Dataset<Row> df = spark.sqlContext().createDataFrame(rowRDD, schema).toDF();

@Test
public void testInputRepartitionColumns() throws Exception {
 Map<String, Object> configMap = Maps.newHashMap();
 configMap.put(DataStep.INPUT_TYPE + "." + InputFactory.TYPE_CONFIG_NAME, DummyInput.class.getName());
 configMap.put(DataStep.INPUT_TYPE + "." + "starting.partitions", 10);
 configMap.put(BatchStep.REPARTITION_COLUMNS_PROPERTY, Lists.newArrayList("modulo"));
 Config config = ConfigFactory.parseMap(configMap);
 BatchStep batchStep = new BatchStep("test");
 batchStep.configure(config);
 batchStep.submit(Sets.<Step>newHashSet());
 Dataset<Row> df = batchStep.getData();
 int numPartitions = df.javaRDD().getNumPartitions();
 assertEquals(Contexts.getSparkSession().sqlContext().getConf("spark.sql.shuffle.partitions"),
   Integer.toString(numPartitions));
}

 private void start() {
  SparkSession spark = SparkSession.builder().appName(
    "Build a DataFrame from Scratch").master("local[*]")
    .getOrCreate();

  List<String> stringAsList = new ArrayList<>();
  stringAsList.add("bar");

  JavaSparkContext sparkContext = new JavaSparkContext(spark.sparkContext());

  JavaRDD<Row> rowRDD = sparkContext.parallelize(stringAsList).map((
    String row) -> RowFactory.create(row));

  // Creates schema
  StructType schema = DataTypes.createStructType(
    new StructField[] { DataTypes.createStructField("foe",
      DataTypes.StringType, false) });

  Dataset<Row> df = spark.sqlContext().createDataFrame(rowRDD, schema).toDF();

  log.debug("** Schema: ");
  df.printSchema();

  log.debug("** Data: ");
  df.show();

  sparkContext.close();
 }
}

Popular methods of SparkSession

Popular in Java

Parsing JSON documents to java classes using gson
getSupportFragmentManager (FragmentActivity)
getResourceAsStream (ClassLoader)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
BoxLayout (javax.swing)
CodeWhisperer alternatives

How to use sqlContextmethodin org.apache.spark.sql.SparkSession

Best Java code snippets using org.apache.spark.sql.SparkSession.sqlContext (Showing top 10 results out of 315)

How to use
sqlContext
method
in
org.apache.spark.sql.SparkSession