org.apache.spark.sql.DataFrameReader.option java code examples

@Test
public void testOptionsAPI() {
 HashMap<String, String> map = new HashMap<String, String>();
 map.put("e", "1");
 spark
   .read()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .text()
   .write()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .format("org.apache.spark.sql.test")
   .save();
}

@Override
public DataFrameReader option(final String key, final boolean value) {
 super.option(key, value);
 return this;
}

@Override
public DataFrameReader option(final String key, final double value) {
 super.option(key, value);
 return this;
}

@Test
public void testOptionsAPI() {
 HashMap<String, String> map = new HashMap<String, String>();
 map.put("e", "1");
 spark
   .read()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .text()
   .write()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .format("org.apache.spark.sql.test")
   .save();
}

    .option(DataSourceOptions.TABLE_KEY, tableName)
    .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
phoenixDataSet.createOrReplaceTempView(tableName);
Dataset<Row> dataset =

@Test
public void testOptionsAPI() {
 HashMap<String, String> map = new HashMap<String, String>();
 map.put("e", "1");
 spark
   .read()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .text()
   .write()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .format("org.apache.spark.sql.test")
   .save();
}

@Override
public DataFrameReader option(final String key, final long value) {
 super.option(key, value);
 return this;
}

private void prepare(Map<String, String> options, StructType schema, Filter schemaFilter) {
  if (schema != null) {
    dfr.schema(schema);
  }
  if (schemaFilter != null) {
    dfr.option("schemaFilter", N1QLRelation.filterToExpression(schemaFilter));
  }
  if (options != null) {
    dfr.options(options);
  }
}

@Override
public DataFrameReader option(final String key, final String value) {
 super.option(key, value);
 return this;
}

        .option(DataSourceOptions.TABLE_KEY, tableName1)
        .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
phoenixDataSet.createOrReplaceTempView(tableName1);
phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
    .option(DataSourceOptions.TABLE_KEY, tableName2)
    .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
phoenixDataSet.createOrReplaceTempView(tableName2);

SQLContext sqlContext = SparkUtil.getSparkSession().sqlContext();
Dataset phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
    .option(DataSourceOptions.TABLE_KEY, tableName1)
    .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
phoenixDataSet.createOrReplaceTempView(tableName1);
phoenixDataSet = SparkUtil.getSparkSession().read().format("phoenix")
    .option(DataSourceOptions.TABLE_KEY, tableName2)
    .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
phoenixDataSet.createOrReplaceTempView(tableName2);

    .option(DataSourceOptions.TABLE_KEY, tableName)
    .option(PhoenixDataSource.ZOOKEEPER_URL, getUrl()).load();
phoenixDataSet.createOrReplaceTempView(tableName);
Dataset<Row> dataset =

  public static ResultSet executeQuery(Connection conn, QueryBuilder queryBuilder, String url, Configuration config)
      throws SQLException {
    SQLContext sqlContext = getSparkSession().sqlContext();

    boolean forceRowKeyOrder =
        conn.unwrap(PhoenixConnection.class).getQueryServices().getProps()
            .getBoolean(QueryServices.FORCE_ROW_KEY_ORDER_ATTRIB, false);
    // if we are forcing row key order we have to add an ORDER BY
    // here we assume that the required columns are in the primary key column order
    String prevOrderBy = queryBuilder.getOrderByClause();
    if (forceRowKeyOrder &&  (queryBuilder.getOrderByClause()==null || queryBuilder.getOrderByClause().isEmpty())) {
      queryBuilder.setOrderByClause(Joiner.on(", ").join(queryBuilder.getRequiredColumns()));
    }

    // create PhoenixRDD using the table name and columns that are required by the query
    // since we don't set the predicate filtering is done after rows are returned from spark
    Dataset phoenixDataSet = getSparkSession().read().format("phoenix")
        .option(DataSourceOptions.TABLE_KEY, queryBuilder.getFullTableName())
        .option(PhoenixDataSource.ZOOKEEPER_URL, url).load();

    phoenixDataSet.createOrReplaceTempView(queryBuilder.getFullTableName());
    Dataset<Row> dataset = sqlContext.sql(queryBuilder.build());
    SparkPlan plan = dataset.queryExecution().executedPlan();
    List<Row> rows = dataset.collectAsList();
    queryBuilder.setOrderByClause(prevOrderBy);
    ResultSet rs = new SparkResultSet(rows, dataset.columns());
    return rs;
  }
}

@Override
public Dataset<Row> read() throws Exception {
 String connection = config.getString(CONNECTION_CONFIG_NAME);
 String tableName = config.getString(TABLE_NAME_CONFIG_NAME);
 Dataset<Row> tableDF = Contexts.getSparkSession().read()
                          .format("org.apache.kudu.spark.kudu")
                          .option("kudu.master", connection)
                          .option("kudu.table", tableName)
                          .load();
 return tableDF;
}

 private void start() {
  SparkSession spark = SparkSession.builder().appName("CSV to Dataset")
    .master("local").getOrCreate();

  String filename = "data/csv-q.txt";
  Dataset<Row> df = spark.read().option("inferSchema", "true").option(
    "header", "true").csv(filename);
  df.show();
  df.printSchema();

 }
}

 private void start() {
  SparkSession spark = SparkSession.builder().appName("For Each Claim")
    .master("local").getOrCreate();

  String filename = "data/claims.csv";
  Dataset<Row> claimsDf = spark.read().format("csv").option("inferSchema",
    "true").option("header", "true")
    .load(filename);
  claimsDf.show();

  claimsDf.foreach(new ClaimPrepAndProcess());
 }
}

 private void start() {
  SparkSession spark = SparkSession.builder().appName("For Each Book").master(
    "local").getOrCreate();

  String filename = "data/books.csv";
  Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true")
    .option("header", "true")
    .load(filename);
  df.show();

  df.foreach(new BookPrinter());
 }
}

 @Test
 public void verifyLibSVMDF() {
  Dataset<Row> dataset = spark.read().format("libsvm").option("vectorType", "dense")
   .load(path);
  Assert.assertEquals("label", dataset.columns()[0]);
  Assert.assertEquals("features", dataset.columns()[1]);
  Row r = dataset.first();
  Assert.assertEquals(1.0, r.getDouble(0), 1e-15);
  DenseVector v = r.getAs(1);
  Assert.assertEquals(Vectors.dense(1.0, 0.0, 2.0, 0.0, 3.0, 0.0), v);
 }
}

 @Test
 public void verifyLibSVMDF() {
  Dataset<Row> dataset = spark.read().format("libsvm").option("vectorType", "dense")
   .load(path);
  Assert.assertEquals("label", dataset.columns()[0]);
  Assert.assertEquals("features", dataset.columns()[1]);
  Row r = dataset.first();
  Assert.assertEquals(1.0, r.getDouble(0), 1e-15);
  DenseVector v = r.getAs(1);
  Assert.assertEquals(Vectors.dense(1.0, 0.0, 2.0, 0.0, 3.0, 0.0), v);
 }
}

 @Test
 public void verifyLibSVMDF() {
  Dataset<Row> dataset = spark.read().format("libsvm").option("vectorType", "dense")
   .load(path);
  Assert.assertEquals("label", dataset.columns()[0]);
  Assert.assertEquals("features", dataset.columns()[1]);
  Row r = dataset.first();
  Assert.assertEquals(1.0, r.getDouble(0), 1e-15);
  DenseVector v = r.getAs(1);
  Assert.assertEquals(Vectors.dense(1.0, 0.0, 2.0, 0.0, 3.0, 0.0), v);
 }
}

Popular methods of DataFrameReader

orc,
table

Popular in Java

Parsing JSON documents to java classes using gson
getResourceAsStream (ClassLoader)
setScale (BigDecimal)
runOnUiThread (Activity)
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Best plugins for Eclipse

How to use optionmethodin org.apache.spark.sql.DataFrameReader

Best Java code snippets using org.apache.spark.sql.DataFrameReader.option (Showing top 20 results out of 315)

How to use
option
method
in
org.apache.spark.sql.DataFrameReader