private StructType createSchema() { StructType intervalType = new StructType(new StructField[] { new StructField("startTime", DataTypes.LongType, true, Metadata.empty()), new StructField("endTime", DataTypes.LongType, true, Metadata.empty()) }); DataType intervalsType = new ArrayType(intervalType, false); return new StructType(new StructField[] { new StructField("id", DataTypes.IntegerType, true, Metadata.empty()), new StructField("intervals", intervalsType, true, Metadata.empty()) }); }
@Test public void testCreateStructTypeFromList(){ List<StructField> fields1 = new ArrayList<>(); fields1.add(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema1 = StructType$.MODULE$.apply(fields1); Assert.assertEquals(0, schema1.fieldIndex("id")); List<StructField> fields2 = Arrays.asList(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema2 = StructType$.MODULE$.apply(fields2); Assert.assertEquals(0, schema2.fieldIndex("id")); }
@Test public void testCreateStructTypeFromList(){ List<StructField> fields1 = new ArrayList<>(); fields1.add(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema1 = StructType$.MODULE$.apply(fields1); Assert.assertEquals(0, schema1.fieldIndex("id")); List<StructField> fields2 = Arrays.asList(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema2 = StructType$.MODULE$.apply(fields2); Assert.assertEquals(0, schema2.fieldIndex("id")); }
@Test public void testCreateStructTypeFromList(){ List<StructField> fields1 = new ArrayList<>(); fields1.add(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema1 = StructType$.MODULE$.apply(fields1); Assert.assertEquals(0, schema1.fieldIndex("id")); List<StructField> fields2 = Arrays.asList(new StructField("id", DataTypes.StringType, true, Metadata.empty())); StructType schema2 = StructType$.MODULE$.apply(fields2); Assert.assertEquals(0, schema2.fieldIndex("id")); }
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( new StructField("c", mapType, true, Metadata.empty()), schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true, Metadata.empty()), schema.apply("e")); Row first = df.select("a", "b", "c", "d", "e").first(); Assert.assertEquals(bean.getA(), first.getDouble(0), 0.0);
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( new StructField("c", mapType, true, Metadata.empty()), schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true, Metadata.empty()), schema.apply("e")); Row first = df.select("a", "b", "c", "d", "e").first(); Assert.assertEquals(bean.getA(), first.getDouble(0), 0.0);
void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) { StructType schema = df.schema(); Assert.assertEquals(new StructField("a", DoubleType$.MODULE$, false, Metadata.empty()), schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( new StructField("c", mapType, true, Metadata.empty()), schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true, Metadata.empty()), schema.apply("e")); Row first = df.select("a", "b", "c", "d", "e").first(); Assert.assertEquals(bean.getA(), first.getDouble(0), 0.0);
public static StructField[] extractFieldsFromString(String schemaString) { String[] strFields = schemaString.split(","); StructField[] resFields = new StructField[(strFields.length)]; String name, type; String[] strFieldTokens; for (int i = 0; i < strFields.length; i++) { strFieldTokens = strFields[i].trim().split(" "); name = strFieldTokens[0].trim(); type = strFieldTokens[1].trim().toLowerCase(); StructField field = new StructField(name, AnalyticsCommonUtils.stringToDataType(type), true, Metadata.empty()); resFields[i] = field; } return resFields; }
private static StructField parseField(Config fieldsConfig) { String name = fieldsConfig.getString(FIELD_NAME_CONFIG); DataType type = parseDataType(fieldsConfig); return new StructField(name, type, true, Metadata.empty()); }
@Override public DataType struct(Types.StructType struct, List<DataType> fieldResults) { List<Types.NestedField> fields = struct.fields(); List<StructField> sparkFields = Lists.newArrayListWithExpectedSize(fieldResults.size()); for (int i = 0; i < fields.size(); i += 1) { Types.NestedField field = fields.get(i); DataType type = fieldResults.get(i); sparkFields.add(StructField.apply(field.name(), type, field.isOptional(), Metadata.empty())); } return StructType$.MODULE$.apply(sparkFields); }
public static StructField[] extractFieldsFromColumns(Map<String, ColumnDefinition> columns) { StructField[] resFields = new StructField[columns.size()]; int i = 0; for (Map.Entry<String, ColumnDefinition> entry : columns.entrySet()) { String type = entry.getValue().getType().name(); resFields[i] = new StructField(entry.getKey(), AnalyticsCommonUtils.stringToDataType(type), true, Metadata.empty()); i++; } return resFields; }
@Override public StructType transformSchema(StructType schema) { return SchemaUtils.appendColumn(schema, new StructField("prediction", DataTypes.StringType, false, Metadata.empty())); }
@Override public DataType sqlType() { return new StructType( new StructField[] {new StructField("wkb", DataTypes.BinaryType, true, Metadata.empty())}); }
@Override public DataFrame transform(DataFrame dataset) { JavaRDD<Row> output = dataset.javaRDD().map(new DecodeFunction()); StructType schema = new StructType(new StructField[]{ new StructField("sentence", DataTypes.StringType, false, Metadata.empty()), new StructField("prediction", DataTypes.StringType, false, Metadata.empty()) }); return dataset.sqlContext().createDataFrame(output, schema); }
private void start() { SparkSession spark = SparkSession.builder().appName("First Prediction") .master("local").getOrCreate(); StructType schema = new StructType( new StructField[] { new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata .empty()), }); // TODO this example is not working yet }
/** * Tags a distributed list of sentences and writes the result to an output file with * a desired output format. * @param sentences * @param outputFileName * @param outputFormat */ public void tag(JavaRDD<Row> sentences, String outputFileName, OutputFormat outputFormat) { StructType schema = new StructType(new StructField[]{ new StructField("sentence", DataTypes.StringType, false, Metadata.empty()) }); SQLContext sqlContext = new SQLContext(jsc); DataFrame input = sqlContext.createDataFrame(sentences, schema); tag(input, outputFileName, outputFormat); }
@Test public void javaCompatibilityTest() { StopWordsRemover remover = new StopWordsRemover() .setInputCol("raw") .setOutputCol("filtered"); List<Row> data = Arrays.asList( RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")) ); StructType schema = new StructType(new StructField[]{ new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> dataset = spark.createDataFrame(data, schema); remover.transform(dataset).collect(); } }
@Test public void javaCompatibilityTest() { StopWordsRemover remover = new StopWordsRemover() .setInputCol("raw") .setOutputCol("filtered"); List<Row> data = Arrays.asList( RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")) ); StructType schema = new StructType(new StructField[]{ new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> dataset = spark.createDataFrame(data, schema); remover.transform(dataset).collect(); } }
@Test public void javaCompatibilityTest() { double[] input = new double[]{1D, 2D, 3D, 4D}; Dataset<Row> dataset = spark.createDataFrame( Arrays.asList(RowFactory.create(Vectors.dense(input))), new StructType(new StructField[]{ new StructField("vec", (new VectorUDT()), false, Metadata.empty()) })); double[] expectedResult = input.clone(); (new DoubleDCT_1D(input.length)).forward(expectedResult, true); DCT dct = new DCT() .setInputCol("vec") .setOutputCol("resultVec"); List<Row> result = dct.transform(dataset).select("resultVec").collectAsList(); Vector resultVec = result.get(0).getAs("resultVec"); Assert.assertArrayEquals(expectedResult, resultVec.toArray(), 1e-6); } }