/** * Creates an ArrayType by specifying the data type of elements ({@code elementType}) and * whether the array contains null values ({@code containsNull}). */ public static ArrayType createArrayType(DataType elementType, boolean containsNull) { if (elementType == null) { throw new IllegalArgumentException("elementType should not be null."); } return new ArrayType(elementType, containsNull); }
/** * Creates an ArrayType by specifying the data type of elements ({@code elementType}). * The field of {@code containsNull} is set to {@code true}. */ public static ArrayType createArrayType(DataType elementType) { if (elementType == null) { throw new IllegalArgumentException("elementType should not be null."); } return new ArrayType(elementType, true); }
/** * Creates an ArrayType by specifying the data type of elements ({@code elementType}). * The field of {@code containsNull} is set to {@code true}. */ public static ArrayType createArrayType(DataType elementType) { if (elementType == null) { throw new IllegalArgumentException("elementType should not be null."); } return new ArrayType(elementType, true); }
/** * Creates an ArrayType by specifying the data type of elements ({@code elementType}) and * whether the array contains null values ({@code containsNull}). */ public static ArrayType createArrayType(DataType elementType, boolean containsNull) { if (elementType == null) { throw new IllegalArgumentException("elementType should not be null."); } return new ArrayType(elementType, containsNull); }
/** * Creates an ArrayType by specifying the data type of elements ({@code elementType}). * The field of {@code containsNull} is set to {@code true}. */ public static ArrayType createArrayType(DataType elementType) { if (elementType == null) { throw new IllegalArgumentException("elementType should not be null."); } return new ArrayType(elementType, true); }
/** * Creates an ArrayType by specifying the data type of elements ({@code elementType}) and * whether the array contains null values ({@code containsNull}). */ public static ArrayType createArrayType(DataType elementType, boolean containsNull) { if (elementType == null) { throw new IllegalArgumentException("elementType should not be null."); } return new ArrayType(elementType, containsNull); }
schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true,
schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true,
schema.apply("a")); Assert.assertEquals( new StructField("b", new ArrayType(IntegerType$.MODULE$, true), true, Metadata.empty()), schema.apply("b")); ArrayType valueType = new ArrayType(DataTypes.IntegerType, false); MapType mapType = new MapType(DataTypes.StringType, valueType, true); Assert.assertEquals( schema.apply("c")); Assert.assertEquals( new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()), schema.apply("d")); Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true,
private StructType createSchema() { StructType intervalType = new StructType(new StructField[] { new StructField("startTime", DataTypes.LongType, true, Metadata.empty()), new StructField("endTime", DataTypes.LongType, true, Metadata.empty()) }); DataType intervalsType = new ArrayType(intervalType, false); return new StructType(new StructField[] { new StructField("id", DataTypes.IntegerType, true, Metadata.empty()), new StructField("intervals", intervalsType, true, Metadata.empty()) }); }
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) });
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) });
@Test public void testJavaWord2Vec() { StructType schema = new StructType(new StructField[]{ new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) }); Dataset<Row> documentDF = spark.createDataFrame( Arrays.asList( RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))), RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))), RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))), schema); Word2Vec word2Vec = new Word2Vec() .setInputCol("text") .setOutputCol("result") .setVectorSize(3) .setMinCount(0); Word2VecModel model = word2Vec.fit(documentDF); Dataset<Row> result = model.transform(documentDF); for (Row r : result.select("result").collectAsList()) { double[] polyFeatures = ((Vector) r.get(0)).toArray(); Assert.assertEquals(polyFeatures.length, 3); } } }
@Test public void testJavaWord2Vec() { StructType schema = new StructType(new StructField[]{ new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) }); Dataset<Row> documentDF = spark.createDataFrame( Arrays.asList( RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))), RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))), RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))), schema); Word2Vec word2Vec = new Word2Vec() .setInputCol("text") .setOutputCol("result") .setVectorSize(3) .setMinCount(0); Word2VecModel model = word2Vec.fit(documentDF); Dataset<Row> result = model.transform(documentDF); for (Row r : result.select("result").collectAsList()) { double[] polyFeatures = ((Vector) r.get(0)).toArray(); Assert.assertEquals(polyFeatures.length, 3); } } }
@Test public void testJavaWord2Vec() { StructType schema = new StructType(new StructField[]{ new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) }); Dataset<Row> documentDF = spark.createDataFrame( Arrays.asList( RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))), RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))), RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))), schema); Word2Vec word2Vec = new Word2Vec() .setInputCol("text") .setOutputCol("result") .setVectorSize(3) .setMinCount(0); Word2VecModel model = word2Vec.fit(documentDF); Dataset<Row> result = model.transform(documentDF); for (Row r : result.select("result").collectAsList()) { double[] polyFeatures = ((Vector) r.get(0)).toArray(); Assert.assertEquals(polyFeatures.length, 3); } } }
new StructField("context", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) });
new StructField("context", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) });