@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
@Override public DataFrameReader option(final String key, final boolean value) { super.option(key, value); return this; }
@Override public DataFrameReader option(final String key, final double value) { super.option(key, value); return this; }
@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
@Override public DataFrameReader option(final String key, final long value) { super.option(key, value); return this; }
private void prepare(Map<String, String> options, StructType schema, Filter schemaFilter) { if (schema != null) { dfr.schema(schema); } if (schemaFilter != null) { dfr.option("schemaFilter", N1QLRelation.filterToExpression(schemaFilter)); } if (options != null) { dfr.options(options); } }
@Override public DataFrameReader option(final String key, final String value) { super.option(key, value); return this; }
.option(DataSourceOptions.TABLE_KEY, tableName1) .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load(); phoenixDataSet.createOrReplaceTempView(tableName1); phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, tableName2) .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load(); phoenixDataSet.createOrReplaceTempView(tableName2);
SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext(); Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, tableName1) .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load(); phoenixDataSet.createOrReplaceTempView(tableName1); phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, tableName2) .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load(); phoenixDataSet.createOrReplaceTempView(tableName2);
public static ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder, String url, Configuration config) throws SQLException { SQLContext sqlContext = getSparkSession().sqlContext(); boolean forceRowKeyOrder = conn.unwrap(PhoenixConnection.class).getQueryServices().getProps() .getBoolean(QueryServices.FORCE_ROW_KEY_ORDER_ATTRIB, false); // if we are forcing row key order we have to add an ORDER BY // here we assume that the required columns are in the primary key column order String prevOrderBy = queryBuilder.getOrderByClause(); if (forceRowKeyOrder && (queryBuilder.getOrderByClause()==null || queryBuilder.getOrderByClause().isEmpty())) { queryBuilder.setOrderByClause(Joiner.on(", ").join(queryBuilder.getRequiredColumns())); } // create PhoenixRDD using the table name and columns that are required by the query // since we don't set the predicate filtering is done after rows are returned from spark Dataset phoenixDataSet = getSparkSession().read().format("phoenix") .option(DataSourceOptions.TABLE_KEY, queryBuilder.getFullTableName()) .option(PhoenixDataSource.ZOOKEEPER_URL, url).load(); phoenixDataSet.createOrReplaceTempView(queryBuilder.getFullTableName()); Dataset<Row> dataset = sqlContext.sql(queryBuilder.build()); SparkPlan plan = dataset.queryExecution().executedPlan(); List<Row> rows = dataset.collectAsList(); queryBuilder.setOrderByClause(prevOrderBy); ResultSet rs = new SparkResultSet(rows, dataset.columns()); return rs; } }
@Override public Dataset<Row> read() throws Exception { String connection = config.getString(CONNECTION_CONFIG_NAME); String tableName = config.getString(TABLE_NAME_CONFIG_NAME); Dataset<Row> tableDF = Contexts.getSparkSession().read() .format("org.apache.kudu.spark.kudu") .option("kudu.master", connection) .option("kudu.table", tableName) .load(); return tableDF; }
private void start() { SparkSession spark = SparkSession.builder().appName("For Each Claim") .master("local").getOrCreate(); String filename = "data/claims.csv"; Dataset<Row> claimsDf = spark.read().format("csv").option("inferSchema", "true").option("header", "true") .load(filename); claimsDf.show(); claimsDf.foreach(new ClaimPrepAndProcess()); } }
private void start() { SparkSession spark = SparkSession.builder().appName("For Each Book").master( "local").getOrCreate(); String filename = "data/books.csv"; Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true") .option("header", "true") .load(filename); df.show(); df.foreach(new BookPrinter()); } }
@Test public void verifyLibSVMDF() { Dataset<Row> dataset = spark.read().format("libsvm").option("vectorType", "dense") .load(path); Assert.assertEquals("label", dataset.columns()[0]); Assert.assertEquals("features", dataset.columns()[1]); Row r = dataset.first(); Assert.assertEquals(1.0, r.getDouble(0), 1e-15); DenseVector v = r.getAs(1); Assert.assertEquals(Vectors.dense(1.0, 0.0, 2.0, 0.0, 3.0, 0.0), v); } }
@Test public void verifyLibSVMDF() { Dataset<Row> dataset = spark.read().format("libsvm").option("vectorType", "dense") .load(path); Assert.assertEquals("label", dataset.columns()[0]); Assert.assertEquals("features", dataset.columns()[1]); Row r = dataset.first(); Assert.assertEquals(1.0, r.getDouble(0), 1e-15); DenseVector v = r.getAs(1); Assert.assertEquals(Vectors.dense(1.0, 0.0, 2.0, 0.0, 3.0, 0.0), v); } }
@Test public void verifyLibSVMDF() { Dataset<Row> dataset = spark.read().format("libsvm").option("vectorType", "dense") .load(path); Assert.assertEquals("label", dataset.columns()[0]); Assert.assertEquals("features", dataset.columns()[1]); Row r = dataset.first(); Assert.assertEquals(1.0, r.getDouble(0), 1e-15); DenseVector v = r.getAs(1); Assert.assertEquals(Vectors.dense(1.0, 0.0, 2.0, 0.0, 3.0, 0.0), v); } }