org.apache.spark.sql.Dataset.flatMap java code examples

@Test
public void testCommonOperation() {
 List<String> data = Arrays.asList("hello", "world");
 Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
 Assert.assertEquals("hello", ds.first());
 Dataset<String> filtered = ds.filter((FilterFunction<String>) v -> v.startsWith("h"));
 Assert.assertEquals(Arrays.asList("hello"), filtered.collectAsList());
 Dataset<Integer> mapped =
  ds.map((MapFunction<String, Integer>) String::length, Encoders.INT());
 Assert.assertEquals(Arrays.asList(5, 5), mapped.collectAsList());
 Dataset<String> parMapped = ds.mapPartitions((MapPartitionsFunction<String, String>) it -> {
  List<String> ls = new LinkedList<>();
  while (it.hasNext()) {
   ls.add(it.next().toUpperCase(Locale.ROOT));
  }
  return ls.iterator();
 }, Encoders.STRING());
 Assert.assertEquals(Arrays.asList("HELLO", "WORLD"), parMapped.collectAsList());
 Dataset<String> flatMapped = ds.flatMap((FlatMapFunction<String, String>) s -> {
  List<String> ls = new LinkedList<>();
  for (char c : s.toCharArray()) {
   ls.add(String.valueOf(c));
  }
  return ls.iterator();
 }, Encoders.STRING());
 Assert.assertEquals(
  Arrays.asList("h", "e", "l", "l", "o", "w", "o", "r", "l", "d"),
  flatMapped.collectAsList());
}

@Test
public void testCommonOperation() {
 List<String> data = Arrays.asList("hello", "world");
 Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
 Assert.assertEquals("hello", ds.first());
 Dataset<String> filtered = ds.filter((FilterFunction<String>) v -> v.startsWith("h"));
 Assert.assertEquals(Arrays.asList("hello"), filtered.collectAsList());
 Dataset<Integer> mapped =
  ds.map((MapFunction<String, Integer>) String::length, Encoders.INT());
 Assert.assertEquals(Arrays.asList(5, 5), mapped.collectAsList());
 Dataset<String> parMapped = ds.mapPartitions((MapPartitionsFunction<String, String>) it -> {
  List<String> ls = new LinkedList<>();
  while (it.hasNext()) {
   ls.add(it.next().toUpperCase(Locale.ROOT));
  }
  return ls.iterator();
 }, Encoders.STRING());
 Assert.assertEquals(Arrays.asList("HELLO", "WORLD"), parMapped.collectAsList());
 Dataset<String> flatMapped = ds.flatMap((FlatMapFunction<String, String>) s -> {
  List<String> ls = new LinkedList<>();
  for (char c : s.toCharArray()) {
   ls.add(String.valueOf(c));
  }
  return ls.iterator();
 }, Encoders.STRING());
 Assert.assertEquals(
  Arrays.asList("h", "e", "l", "l", "o", "w", "o", "r", "l", "d"),
  flatMapped.collectAsList());
}

@Test
public void testCommonOperation() {
 List<String> data = Arrays.asList("hello", "world");
 Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
 Assert.assertEquals("hello", ds.first());
 Dataset<String> filtered = ds.filter((FilterFunction<String>) v -> v.startsWith("h"));
 Assert.assertEquals(Arrays.asList("hello"), filtered.collectAsList());
 Dataset<Integer> mapped =
  ds.map((MapFunction<String, Integer>) String::length, Encoders.INT());
 Assert.assertEquals(Arrays.asList(5, 5), mapped.collectAsList());
 Dataset<String> parMapped = ds.mapPartitions((MapPartitionsFunction<String, String>) it -> {
  List<String> ls = new LinkedList<>();
  while (it.hasNext()) {
   ls.add(it.next().toUpperCase(Locale.ROOT));
  }
  return ls.iterator();
 }, Encoders.STRING());
 Assert.assertEquals(Arrays.asList("HELLO", "WORLD"), parMapped.collectAsList());
 Dataset<String> flatMapped = ds.flatMap((FlatMapFunction<String, String>) s -> {
  List<String> ls = new LinkedList<>();
  for (char c : s.toCharArray()) {
   ls.add(String.valueOf(c));
  }
  return ls.iterator();
 }, Encoders.STRING());
 Assert.assertEquals(
  Arrays.asList("h", "e", "l", "l", "o", "w", "o", "r", "l", "d"),
  flatMapped.collectAsList());
}

    .flatMap(messageRouterFunction(profilerProps, profiles, globals), Encoders.bean(MessageRoute.class));
LOG.debug("Generated {} message route(s)", routes.cache().count());

@Override
public ConceptMaps withConceptMaps(Dataset<ConceptMap> conceptMaps) {
 Dataset<UrlAndVersion> newMembers = getUrlAndVersions(conceptMaps);
 if (hasDuplicateUrlAndVersions(newMembers) || conceptMaps.count() != newMembers.count()) {
  throw new IllegalArgumentException(
    "Cannot add concept maps having duplicate conceptMapUri and conceptMapVersion");
 }
 // Remove the concept contents for persistence. This is most easily done in the ConceptMap
 // object by setting the group to an empty list.
 Dataset<ConceptMap> withoutConcepts = conceptMaps
   .map((MapFunction<ConceptMap,ConceptMap>) conceptMap -> {
    // Remove the elements rather than the groups to preserved the
    // "unmapped" structure in a group that can refer to other
    // concept maps.
    ConceptMap withoutElements = conceptMap.copy();
    List<ConceptMapGroupComponent> updatedGroups = new ArrayList<>();
    for (ConceptMapGroupComponent group: withoutElements.getGroup()) {
     group.setElement(new ArrayList<>());
     updatedGroups.add(group);
    }
    withoutElements.setGroup(updatedGroups);
    return withoutElements;
   }, CONCEPT_MAP_ENCODER);
 Dataset<Mapping> newMappings = conceptMaps.flatMap(ConceptMaps::expandMappingsIterator,
   MAPPING_ENCODER);
 return withConceptMaps(withoutConcepts, newMappings);
}

@Override
public ConceptMaps withConceptMaps(Dataset<ConceptMap> conceptMaps) {
 Dataset<UrlAndVersion> newMembers = getUrlAndVersions(conceptMaps);
 if (hasDuplicateUrlAndVersions(newMembers) || conceptMaps.count() != newMembers.count()) {
  throw new IllegalArgumentException(
    "Cannot add concept maps having duplicate conceptMapUri and conceptMapVersion");
 }
 // Remove the concept contents for persistence. This is most easily done in the ConceptMap
 // object by setting the group to an empty list.
 Dataset<ConceptMap> withoutConcepts = conceptMaps
   .map((MapFunction<ConceptMap,ConceptMap>) conceptMap -> {
    // Remove the elements rather than the groups to preserved the
    // "unmapped" structure in a group that can refer to other
    // concept maps.
    ConceptMap withoutElements = conceptMap.copy();
    List<ConceptMapGroupComponent> updatedGroups = new ArrayList<>();
    for (ConceptMapGroupComponent group: withoutElements.getGroup()) {
     group.setElement(new ArrayList<>());
     updatedGroups.add(group);
    }
    withoutElements.setGroup(updatedGroups);
    return withoutElements;
   }, CONCEPT_MAP_ENCODER);
 Dataset<Mapping> newMappings = conceptMaps.flatMap(ConceptMaps::expandMappingsIterator,
   MAPPING_ENCODER);
 return withConceptMaps(withoutConcepts, newMappings);
}

@Override
public ConceptMaps withConceptMaps(Dataset<ConceptMap> conceptMaps) {
 Dataset<UrlAndVersion> newMembers = getUrlAndVersions(conceptMaps);
 if (hasDuplicateUrlAndVersions(newMembers) || conceptMaps.count() != newMembers.count()) {
  throw new IllegalArgumentException(
    "Cannot add concept maps having duplicate conceptMapUri and conceptMapVersion");
 }
 // Remove the concept contents for persistence. This is most easily done in the ConceptMap
 // object by setting the group to an empty list.
 Dataset<ConceptMap> withoutConcepts = conceptMaps
   .map((MapFunction<ConceptMap,ConceptMap>) conceptMap -> {
    // Remove the elements rather than the groups to preserved the
    // "unmapped" structure in a group that can refer to other
    // concept maps.
    ConceptMap withoutElements = conceptMap.copy();
    List<ConceptMapGroupComponent> updatedGroups = new ArrayList<>();
    for (ConceptMapGroupComponent group: withoutElements.getGroup()) {
     group.setElement(new ArrayList<>());
     updatedGroups.add(group);
    }
    withoutElements.setGroup(updatedGroups);
    return withoutElements;
   }, CONCEPT_MAP_ENCODER);
 Dataset<Mapping> newMappings = conceptMaps.flatMap(ConceptMaps::expandMappingsIterator,
   MAPPING_ENCODER);
 return withConceptMaps(withoutConcepts, newMappings);
}

}, VALUE_SET_ENCODER);
Dataset<Value> newValues = valueSets.flatMap(ValueSets::expandValuesIterator,
  getValueEncoder());

}, VALUE_SET_ENCODER);
Dataset<Value> newValues = valueSets.flatMap(ValueSets::expandValuesIterator,
  getValueEncoder());

}, VALUE_SET_ENCODER);
Dataset<Value> newValues = valueSets.flatMap(ValueSets::expandValuesIterator,
  getValueEncoder());

private Dataset<Row> readText(String path) throws Exception {
 Dataset<Row> lines = Contexts.getSparkSession().read().text(path);
 if (translatorConfig != null) {
  Dataset<Tuple2<String, String>> keyedLines = lines.map(
    new PrepareLineForTranslationFunction(), Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
  TranslateFunction<String, String> translateFunction = getTranslateFunction(translatorConfig);
  return keyedLines.flatMap(translateFunction, RowEncoder.apply(translateFunction.getSchema()));
 } else {
  return lines;
 }
}

Dataset<Row> wordCounts = lines.flatMap(
  (FlatMapFunction<String, String>) x -> Arrays.asList(x.split(" ")).iterator(),
  Encoders.STRING()).groupBy("value").count();

Popular methods of Dataset

Popular in Java

Start an intent from android
findViewById (Activity)
getResourceAsStream (ClassLoader)
runOnUiThread (Activity)
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Menu (java.awt)
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top PhpStorm plugins

How to use flatMapmethodin org.apache.spark.sql.Dataset

Best Java code snippets using org.apache.spark.sql.Dataset.flatMap (Showing top 12 results out of 315)

How to use
flatMap
method
in
org.apache.spark.sql.Dataset