private StructType createSchema() { StructType intervalType = new StructType(new StructField[] { new StructField("startTime", DataTypes.LongType, true, Metadata.empty()), new StructField("endTime", DataTypes.LongType, true, Metadata.empty()) }); DataType intervalsType = new ArrayType(intervalType, false); return new StructType(new StructField[] { new StructField("id", DataTypes.IntegerType, true, Metadata.empty()), new StructField("intervals", intervalsType, true, Metadata.empty()) }); }
new StructField("word", DataTypes.StringType, false, Metadata.empty()), new StructField("neighbour", DataTypes.StringType, false, Metadata.empty()), new StructField("frequency", DataTypes.IntegerType, false, Metadata.empty())});
@Test public void testCreateStructTypeFromList(){ List<StructField> fields1 = new ArrayList<>(); fields1.add(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema1 = StructType$.MODULE$.apply(fields1); Assert.assertEquals(0, schema1.fieldIndex("id")); List<StructField> fields2 = Arrays.asList(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema2 = StructType$.MODULE$.apply(fields2); Assert.assertEquals(0, schema2.fieldIndex("id")); }
@Test public void testCreateStructTypeFromList(){ List<StructField> fields1 = new ArrayList<>(); fields1.add(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema1 = StructType$.MODULE$.apply(fields1); Assert.assertEquals(0, schema1.fieldIndex("id")); List<StructField> fields2 = Arrays.asList(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema2 = StructType$.MODULE$.apply(fields2); Assert.assertEquals(0, schema2.fieldIndex("id")); }
@Test public void testCreateStructTypeFromList(){ List<StructField> fields1 = new ArrayList<>(); fields1.add(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema1 = StructType$.MODULE$.apply(fields1); Assert.assertEquals(0, schema1.fieldIndex("id")); List<StructField> fields2 = Arrays.asList(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema2 = StructType$.MODULE$.apply(fields2); Assert.assertEquals(0, schema2.fieldIndex("id")); }
public TopNLocationByTweets(SparkJdbcSourceWriter rdbmsWriter,int topN) { super(rdbmsWriter,topN); this.tableName = "tweets_location"; this.schema = new StructType(new StructField[] { new StructField("location", DataTypes.StringType, false, null), new StructField("count", DataTypes.IntegerType, false, null) }); }
public TopNUsersWithMaxFollowers(SparkJdbcSourceWriter rdbmsWriter,int topN) { super(rdbmsWriter,topN); this.tableName = "user_followers"; this.schema = new StructType(new StructField[] { new StructField("username", DataTypes.StringType, false, null), new StructField("count", DataTypes.IntegerType, false, null) }); }
public StructType add(String name, DataType dataType, Metadata metadata) { return new StructType(on( new ArrayList<>(fields.size() + 1), ArrayList::addAll, fields, ArrayList::add, new StructField(name, dataType, metadata))); }
@Override public DataType sqlType() { return new StructType( new StructField[] {new StructField("wkb", DataTypes.BinaryType, true, Metadata.empty())}); }
public static StructType fromAttributes(List<? extends Attribute> attributes) { return new StructType(attributes.stream().map(a -> new StructField(a.name(), a.dataType())).collect(Collectors.toList())); } }
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( new StructField("c", mapType, true, Metadata.empty()), schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true, Metadata.empty()), schema.apply("e")); Row first = df.select("a", "b", "c", "d", "e").first();
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( new StructField("c", mapType, true, Metadata.empty()), schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true, Metadata.empty()), schema.apply("e")); Row first = df.select("a", "b", "c", "d", "e").first();
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( new StructField("c", mapType, true, Metadata.empty()), schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true, Metadata.empty()), schema.apply("e")); Row first = df.select("a", "b", "c", "d", "e").first();
private static StructField parseField(Config fieldsConfig) { String name = fieldsConfig.getString(FIELD_NAME_CONFIG); DataType type = parseDataType(fieldsConfig); return new StructField(name, type, true, Metadata.empty()); }
public static StructField[] extractFieldsFromColumns(Map<String, ColumnDefinition> columns) { StructField[] resFields = new StructField[columns.size()]; int i = 0; for (Map.Entry<String, ColumnDefinition> entry : columns.entrySet()) { String type = entry.getValue().getType().name(); resFields[i] = new StructField(entry.getKey(), AnalyticsCommonUtils.stringToDataType(type), true, Metadata.empty()); i++; } return resFields; }
@Override public DataFrame transform(DataFrame dataset) { JavaRDD<Row> output = dataset.javaRDD().map(new DecodeFunction()); StructType schema = new StructType(new StructField[]{ new StructField("sentence", DataTypes.StringType, false, Metadata.empty()), new StructField("prediction", DataTypes.StringType, false, Metadata.empty()) }); return dataset.sqlContext().createDataFrame(output, schema); }
@Override public StructType transformSchema(StructType schema) { return SchemaUtils.appendColumn(schema, new StructField("prediction", DataTypes.StringType, false, Metadata.empty())); }
private void start() { SparkSession spark = SparkSession.builder().appName("First Prediction") .master("local").getOrCreate(); StructType schema = new StructType( new StructField[] { new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata .empty()), }); // TODO this example is not working yet }
private static StructField getStructFieldFromCell(Cell cell) { Metadata metadata = null; StructField field = new StructField(cell.getName(), getDataType(cell.getValue()), false,metadata); return field; }
private static StructField getStructFieldFromCell(Cell cell) { Metadata metadata = null; StructField field = new StructField(cell.getName(), getDataType(cell.getValue()), false,metadata); return field; }