org.apache.spark.sql.DataFrameWriter.insertInto java code examples

  @Override
  public void saveDataFrame(DataFrame dataFrame, Class<?> entityClazz, Map<String, Object> properties)
  {
    dataFrame.sqlContext().sql("use " + (String) properties.get(KEYSPACE));
    dataFrame.write().insertInto((String) properties.get(TABLE));
  }
}

@Override
public boolean persist(List listEntity, EntityMetadata m, SparkClient sparkClient)
{
  try
  {
    Seq s = scala.collection.JavaConversions.asScalaBuffer(listEntity).toList();
    ClassTag tag = scala.reflect.ClassTag$.MODULE$.apply(m.getEntityClazz());
    JavaRDD personRDD = sparkClient.sparkContext.parallelize(s, 1, tag).toJavaRDD();
    DataFrame df = sparkClient.sqlContext.createDataFrame(personRDD, m.getEntityClazz());
    sparkClient.sqlContext.sql("use " + m.getSchema());
    if (logger.isDebugEnabled())
    {
      logger.info("Below are the registered table with hive context: ");
      sparkClient.sqlContext.sql("show tables").show();
    }
    df.write().insertInto(m.getTableName());
    return true;
  }
  catch (Exception e)
  {
    throw new KunderaException("Cannot persist object(s)", e);
  }
}

/**
 * Writes mapping records to a table. This class ensures the columns and partitions are mapped
 * properly, and is a workaround similar to the problem described <a
 * href="http://stackoverflow.com/questions/35313077/pyspark-order-of-column-on-write-to-mysql-with-jdbc">here</a>.
 *
 * @param mappings a dataset of mapping records
 * @param tableName the table to write them to
 */
private static void writeMappingsToTable(Dataset<Mapping> mappings,
  String tableName) {
 // Note the last two columns here must be the partitioned-by columns
 // in order and in lower case for Spark to properly match
 // them to the partitions.
 Dataset<Row> orderedColumnDataset =
   mappings.select("sourceValueSet",
     "targetValueSet",
     "sourceSystem",
     "sourceValue",
     "targetSystem",
     "targetValue",
     "equivalence",
     "conceptmapuri",
     "conceptmapversion");
 orderedColumnDataset
   .write()
   .insertInto(tableName);
}

/**
 * Writes mapping records to a table. This class ensures the columns and partitions are mapped
 * properly, and is a workaround similar to the problem described <a
 * href="http://stackoverflow.com/questions/35313077/pyspark-order-of-column-on-write-to-mysql-with-jdbc">here</a>.
 *
 * @param mappings a dataset of mapping records
 * @param tableName the table to write them to
 */
private static void writeMappingsToTable(Dataset<Mapping> mappings,
  String tableName) {
 // Note the last two columns here must be the partitioned-by columns
 // in order and in lower case for Spark to properly match
 // them to the partitions.
 Dataset<Row> orderedColumnDataset =
   mappings.select("sourceValueSet",
     "targetValueSet",
     "sourceSystem",
     "sourceValue",
     "targetSystem",
     "targetValue",
     "equivalence",
     "conceptmapuri",
     "conceptmapversion");
 orderedColumnDataset
   .write()
   .insertInto(tableName);
}

/**
 * Writes ancestor records to a table. This class ensures the columns and partitions are mapped
 * properly, and is a workaround similar to the problem described <a
 * href="http://stackoverflow.com/questions/35313077/pyspark-order-of-column-on-write-to-mysql-with-jdbc">here</a>.
 *
 * @param ancestors a dataset of ancestor records
 * @param tableName the table to write them to
 */
private static void writeAncestorsToTable(Dataset<Ancestor> ancestors, String tableName) {
 Dataset<Row> orderedColumnDataset = ancestors.select("descendantSystem",
   "descendantValue",
   "ancestorSystem",
   "ancestorValue",
   "uri",
   "version");
 orderedColumnDataset.write()
   .mode(SaveMode.ErrorIfExists)
   .insertInto(tableName);
}

/**
 * Writes ancestor records to a table. This class ensures the columns and partitions are mapped
 * properly, and is a workaround similar to the problem described <a
 * href="http://stackoverflow.com/questions/35313077/pyspark-order-of-column-on-write-to-mysql-with-jdbc">here</a>.
 *
 * @param ancestors a dataset of ancestor records
 * @param tableName the table to write them to
 */
private static void writeAncestorsToTable(Dataset<Ancestor> ancestors, String tableName) {
 Dataset<Row> orderedColumnDataset = ancestors.select("descendantSystem",
   "descendantValue",
   "ancestorSystem",
   "ancestorValue",
   "uri",
   "version");
 orderedColumnDataset.write()
   .mode(SaveMode.ErrorIfExists)
   .insertInto(tableName);
}

 /**
  * Writes value records to a table. This class ensures the columns and partitions are mapped
  * properly, and is a workaround similar to the problem described <a
  * href="http://stackoverflow.com/questions/35313077/pyspark-order-of-column-on-write-to-mysql-with-jdbc">here</a>.
  *
  * @param values a dataset of value records
  * @param tableName the table to write them to
  */
 private static void writeValuesToTable(Dataset<Value> values, String tableName) {

  // Note the last two columns here must be the partitioned-by columns in order and in lower case
  // for Spark to properly match them to the partitions
  Dataset<Row> orderColumnDataset = values.select("system",
    "version",
    "value",
    "valueseturi",
    "valuesetversion");

  orderColumnDataset.write()
    .mode(SaveMode.ErrorIfExists)
    .insertInto(tableName);
 }
}

 /**
  * Writes value records to a table. This class ensures the columns and partitions are mapped
  * properly, and is a workaround similar to the problem described <a
  * href="http://stackoverflow.com/questions/35313077/pyspark-order-of-column-on-write-to-mysql-with-jdbc">here</a>.
  *
  * @param values a dataset of value records
  * @param tableName the table to write them to
  */
 private static void writeValuesToTable(Dataset<Value> values, String tableName) {

  // Note the last two columns here must be the partitioned-by columns in order and in lower case
  // for Spark to properly match them to the partitions
  Dataset<Row> orderColumnDataset = values.select("system",
    "version",
    "value",
    "valueseturi",
    "valuesetversion");

  orderColumnDataset.write()
    .mode(SaveMode.ErrorIfExists)
    .insertInto(tableName);
 }
}

@Override
public void applyBulkMutations(List<Tuple2<MutationType, Dataset<Row>>> planned) {    
 for (Tuple2<MutationType, Dataset<Row>> plan : planned) {
  MutationType mutationType = plan._1();
  Dataset<Row> mutation = (doesAlignColumns) ? alignColumns(plan._2()) : plan._2();
  DataFrameWriter<Row> writer = mutation.write();
  if (partitionColumns != null) {
   writer = writer.partitionBy(partitionColumns);
  }
  if (options != null) {
   writer = writer.options(options);
  }
  switch (mutationType) {
   case INSERT:
    writer = writer.mode(SaveMode.Append);
    break;
   case OVERWRITE:
    writer = writer.mode(SaveMode.Overwrite);
    break;
   default:
    throw new RuntimeException("Hive output does not support mutation type: " + mutationType);
  }
  writer.insertInto(tableName);
 }
}

.insertInto(conceptMapTable);

.insertInto(conceptMapTable);

.insertInto(valueSetTable);

.insertInto(valueSetTable);

Popular methods of DataFrameWriter

Popular in Java

Updating database using SQL prepared statement
notifyDataSetChanged (ArrayAdapter)
setContentView (Activity)
getResourceAsStream (ClassLoader)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Top Sublime Text plugins

How to use insertIntomethodin org.apache.spark.sql.DataFrameWriter

Best Java code snippets using org.apache.spark.sql.DataFrameWriter.insertInto (Showing top 13 results out of 315)

How to use
insertInto
method
in
org.apache.spark.sql.DataFrameWriter