/** * This only tests whether API compiles, but does not run it as orc() * cannot be run without Hive classes. */ public void testOrcAPI() { spark.read().schema(schema).orc(); spark.read().schema(schema).orc(input); spark.read().schema(schema).orc(input, input, input); spark.read().schema(schema).orc(new String[]{input, input}) .write().orc(output); } }
/** * This only tests whether API compiles, but does not run it as orc() * cannot be run without Hive classes. */ public void testOrcAPI() { spark.read().schema(schema).orc(); spark.read().schema(schema).orc(input); spark.read().schema(schema).orc(input, input, input); spark.read().schema(schema).orc(new String[]{input, input}) .write().orc(output); } }
/** * This only tests whether API compiles, but does not run it as orc() * cannot be run without Hive classes. */ public void testOrcAPI() { spark.read().schema(schema).orc(); spark.read().schema(schema).orc(input); spark.read().schema(schema).orc(input, input, input); spark.read().schema(schema).orc(new String[]{input, input}) .write().orc(output); } }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void saveAndLoadWithSchema() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList()); } }
@Test public void testBeanWithArrayFieldDeserialization() { Encoder<Record> encoder = Encoders.bean(Record.class); Dataset<Record> dataset = spark .read() .format("json") .schema("id int, intervals array<struct<startTime: bigint, endTime: bigint>>") .load("src/test/resources/test-data/with-array-fields.json") .as(encoder); List<Record> records = dataset.collectAsList(); Assert.assertEquals(records, RECORDS); }
@Test public void testBeanWithArrayFieldDeserialization() { Encoder<Record> encoder = Encoders.bean(Record.class); Dataset<Record> dataset = spark .read() .format("json") .schema("id int, intervals array<struct<startTime: bigint, endTime: bigint>>") .load("src/test/resources/test-data/with-array-fields.json") .as(encoder); List<Record> records = dataset.collectAsList(); Assert.assertEquals(records, RECORDS); }
Assert.assertEquals(expectedResult, actual1); Dataset<Row> df2 = spark.read().schema(expectedSchema).json(jsonDS); StructType actualSchema2 = df2.schema(); Assert.assertEquals(expectedSchema, actualSchema2);
Assert.assertEquals(expectedResult, actual1); Dataset<Row> df2 = spark.read().schema(expectedSchema).json(jsonDS); StructType actualSchema2 = df2.schema(); Assert.assertEquals(expectedSchema, actualSchema2);
@Test public void testBeanWithArrayFieldDeserialization() { Encoder<Record> encoder = Encoders.bean(Record.class); Dataset<Record> dataset = spark .read() .format("json") .schema(createSchema()) .load("src/test/resources/test-data/with-array-fields.json") .as(encoder); List<Record> records = dataset.collectAsList(); Assert.assertEquals(records, RECORDS); }