conn.commit(); SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext(); Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, tableName)
SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext(); Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, tableName1)
SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext(); Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, tableName1)
SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext(); Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, tableName)
public static ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder, String url, Configuration config) throws SQLException { SQLContext sqlContext = getSparkSession().sqlContext(); boolean forceRowKeyOrder = conn.unwrap(PhoenixConnection.class).getQueryServices().getProps() .getBoolean(QueryServices.FORCE_ROW_KEY_ORDER_ATTRIB, false); // if we are forcing row key order we have to add an ORDER BY // here we assume that the required columns are in the primary key column order String prevOrderBy = queryBuilder.getOrderByClause(); if (forceRowKeyOrder && (queryBuilder.getOrderByClause()==null || queryBuilder.getOrderByClause().isEmpty())) { queryBuilder.setOrderByClause(Joiner.on(", ").join(queryBuilder.getRequiredColumns())); } // create PhoenixRDD using the table name and columns that are required by the query // since we don't set the predicate filtering is done after rows are returned from spark Dataset phoenixDataSet = getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, queryBuilder.getFullTableName()) .option(PhoenixDataSource.ZOOKEEPER_URL, url).load(); phoenixDataSet.createOrReplaceTempView(queryBuilder.getFullTableName()); Dataset<Row> dataset = sqlContext.sql(queryBuilder.build()); SparkPlan plan = dataset.queryExecution().executedPlan(); List<Row> rows = dataset.collectAsList(); queryBuilder.setOrderByClause(prevOrderBy); ResultSet rs = new SparkResultSet(rows, dataset.columns()); return rs; } }
@Override public SQLContext sqlContext() { return SparkContextUtil.getSparkSession(context, store.getProperties()).sqlContext(); }
/** * Constructs the all important HiveContext, then evaluates the wrapped Statement. * Currently, the HiveContext is made as a singleton. * * @throws Throwable as required by the Statement class */ @Override public void evaluate() throws Throwable { if (sparkSqlContextSingleton == null) { SparkConf sparkConf = new SparkConf().setAppName("HiveQLUnit").setMaster(serverAddress); SparkSession sparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate(); sparkSqlContextSingleton = sparkSession.sqlContext(); } wrappedStatement.evaluate(); }
DataTypes.createStructField("foe2", DataTypes.StringType, false) }); Dataset<Row> df = spark.sqlContext().createDataFrame(rowRDD, schema).toDF();
@Test public void testInputRepartitionColumns() throws Exception { Map<String, Object> configMap = Maps.newHashMap(); configMap.put(DataStep.INPUT_TYPE + "." + InputFactory.TYPE_CONFIG_NAME, DummyInput.class.getName()); configMap.put(DataStep.INPUT_TYPE + "." + "starting.partitions", 10); configMap.put(BatchStep.REPARTITION_COLUMNS_PROPERTY, Lists.newArrayList("modulo")); Config config = ConfigFactory.parseMap(configMap); BatchStep batchStep = new BatchStep("test"); batchStep.configure(config); batchStep.submit(Sets.<Step>newHashSet()); Dataset<Row> df = batchStep.getData(); int numPartitions = df.javaRDD().getNumPartitions(); assertEquals(Contexts.getSparkSession().sqlContext().getConf("spark.sql.shuffle.partitions"), Integer.toString(numPartitions)); }
private void start() { SparkSession spark = SparkSession.builder().appName( "Build a DataFrame from Scratch").master("local[*]") .getOrCreate(); List<String> stringAsList = new ArrayList<>(); stringAsList.add("bar"); JavaSparkContext sparkContext = new JavaSparkContext(spark.sparkContext()); JavaRDD<Row> rowRDD = sparkContext.parallelize(stringAsList).map(( String row) -> RowFactory.create(row)); // Creates schema StructType schema = DataTypes.createStructType( new StructField[] { DataTypes.createStructField("foe", DataTypes.StringType, false) }); Dataset<Row> df = spark.sqlContext().createDataFrame(rowRDD, schema).toDF(); log.debug("** Schema: "); df.printSchema(); log.debug("** Data: "); df.show(); sparkContext.close(); } }