@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
@Test public void testOptionsAPI() { HashMap<String, String> map = new HashMap<String, String>(); map.put("e", "1"); spark .read() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .text() .write() .option("a", "1") .option("b", 1) .option("c", 1.0) .option("d", true) .options(map) .format("org.apache.spark.sql.test") .save(); }
@Override public Dataset<Row> text(final scala.collection.Seq<String> paths) { final boolean userTriggered = initializeFunction(paths); final Dataset<Row> result = Dataset.from(super.text(paths)); this.setIsUserTriggered(userTriggered); return result; }
@Override public Dataset<Row> text(final String path) { final boolean userTriggered = initializeFunction(path); final Dataset<Row> result = Dataset.from(super.text(path)); this.setIsUserTriggered(userTriggered); return result; }
@Override public Dataset<Row> text(final String... paths) { final boolean userTriggered = initializeFunction(paths); final Dataset<Row> result = Dataset.from(super.text(paths)); this.setIsUserTriggered(userTriggered); return result; }
public static Dataset<Row> loadFile(String inputFormat, String inputPath, SparkSession spark) { if (inputFormat == null || inputFormat.isEmpty() || inputFormat.equalsIgnoreCase("text")) { return spark.read().text(inputPath); } else if (inputFormat.equalsIgnoreCase("parquet")) { return spark.read().parquet(inputPath); } else if (inputFormat.equalsIgnoreCase("csv")) { return spark.read().option("header", "false").csv(inputPath); } else if (inputFormat.equalsIgnoreCase("csv_with_header")) { return spark.read().option("header", "true").csv(inputPath); } else if (inputFormat.equalsIgnoreCase("json")) { return spark.read().json(inputPath); } else { throw new RuntimeException(String.format("Unsupported inputFormat: %s, %s", inputFormat, inputPath)); } }
private void start() { SparkSession spark = SparkSession.builder() .appName("Dataset from Text File") .master("local[*]") .getOrCreate(); String filename = "data/simple-data-file.txt"; Dataset<Row> df = spark.read().text(filename); df.show(); } }
private void start() { SparkSession spark = SparkSession.builder() .appName("Dataset from Text File") .master("local[*]") .getOrCreate(); String filename = "data/simple-data-file.txt"; Dataset<Row> df = spark.read().text(filename); df.show(); } }
private Dataset<Row> readText(String path) throws Exception { Dataset<Row> lines = Contexts.getSparkSession().read().text(path); if (translatorConfig != null) { Dataset<Tuple2<String, String>> keyedLines = lines.map( new PrepareLineForTranslationFunction(), Encoders.tuple(Encoders.STRING(), Encoders.STRING())); TranslateFunction<String, String> translateFunction = getTranslateFunction(translatorConfig); return keyedLines.flatMap(translateFunction, RowEncoder.apply(translateFunction.getSchema())); } else { return lines; } }