@Test public void testNumericAttribute() { NumericAttribute attr = NumericAttribute.defaultAttr() .withName("age").withIndex(0).withMin(0.0).withMax(1.0).withStd(0.5).withSparsity(0.4); Assert.assertEquals(attr.withoutIndex(), Attribute.fromStructField(attr.toStructField())); }
@Test public void vectorSlice() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr().withName("f1"), NumericAttribute.defaultAttr().withName("f2"), NumericAttribute.defaultAttr().withName("f3") }; AttributeGroup group = new AttributeGroup("userFeatures", attrs); List<Row> data = Arrays.asList( RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})), RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0)) ); Dataset<Row> dataset = spark.createDataFrame(data, (new StructType()).add(group.toStructField())); VectorSlicer vectorSlicer = new VectorSlicer() .setInputCol("userFeatures").setOutputCol("features"); vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"}); Dataset<Row> output = vectorSlicer.transform(dataset); for (Row r : output.select("userFeatures", "features").takeAsList(2)) { Vector features = r.getAs(1); Assert.assertEquals(features.size(), 2); } } }
@Test public void vectorSlice() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr().withName("f1"), NumericAttribute.defaultAttr().withName("f2"), NumericAttribute.defaultAttr().withName("f3") }; AttributeGroup group = new AttributeGroup("userFeatures", attrs); List<Row> data = Arrays.asList( RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})), RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0)) ); Dataset<Row> dataset = spark.createDataFrame(data, (new StructType()).add(group.toStructField())); VectorSlicer vectorSlicer = new VectorSlicer() .setInputCol("userFeatures").setOutputCol("features"); vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"}); Dataset<Row> output = vectorSlicer.transform(dataset); for (Row r : output.select("userFeatures", "features").takeAsList(2)) { Vector features = r.getAs(1); Assert.assertEquals(features.size(), 2); } } }
@Test public void vectorSlice() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr().withName("f1"), NumericAttribute.defaultAttr().withName("f2"), NumericAttribute.defaultAttr().withName("f3") }; AttributeGroup group = new AttributeGroup("userFeatures", attrs); List<Row> data = Arrays.asList( RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})), RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0)) ); Dataset<Row> dataset = spark.createDataFrame(data, (new StructType()).add(group.toStructField())); VectorSlicer vectorSlicer = new VectorSlicer() .setInputCol("userFeatures").setOutputCol("features"); vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"}); Dataset<Row> output = vectorSlicer.transform(dataset); for (Row r : output.select("userFeatures", "features").takeAsList(2)) { Vector features = r.getAs(1); Assert.assertEquals(features.size(), 2); } } }
@Test public void testNumericAttribute() { NumericAttribute attr = NumericAttribute.defaultAttr() .withName("age").withIndex(0).withMin(0.0).withMax(1.0).withStd(0.5).withSparsity(0.4); Assert.assertEquals(attr.withoutIndex(), Attribute.fromStructField(attr.toStructField())); }
@Test public void testNumericAttribute() { NumericAttribute attr = NumericAttribute.defaultAttr() .withName("age").withIndex(0).withMin(0.0).withMax(1.0).withStd(0.5).withSparsity(0.4); Assert.assertEquals(attr.withoutIndex(), Attribute.fromStructField(attr.toStructField())); }
@Test public void testAttributeGroup() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr(), NominalAttribute.defaultAttr(), BinaryAttribute.defaultAttr().withIndex(0), NumericAttribute.defaultAttr().withName("age").withSparsity(0.8), NominalAttribute.defaultAttr().withName("size").withValues("small", "medium", "large"), BinaryAttribute.defaultAttr().withName("clicked").withValues("no", "yes"), NumericAttribute.defaultAttr(), NumericAttribute.defaultAttr() }; AttributeGroup group = new AttributeGroup("user", attrs); Assert.assertEquals(8, group.size()); Assert.assertEquals("user", group.name()); Assert.assertEquals(NumericAttribute.defaultAttr().withIndex(0), group.getAttr(0)); Assert.assertEquals(3, group.indexOf("age")); Assert.assertFalse(group.hasAttr("abc")); Assert.assertEquals(group, AttributeGroup.fromStructField(group.toStructField())); } }
@Test public void testAttributeGroup() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr(), NominalAttribute.defaultAttr(), BinaryAttribute.defaultAttr().withIndex(0), NumericAttribute.defaultAttr().withName("age").withSparsity(0.8), NominalAttribute.defaultAttr().withName("size").withValues("small", "medium", "large"), BinaryAttribute.defaultAttr().withName("clicked").withValues("no", "yes"), NumericAttribute.defaultAttr(), NumericAttribute.defaultAttr() }; AttributeGroup group = new AttributeGroup("user", attrs); Assert.assertEquals(8, group.size()); Assert.assertEquals("user", group.name()); Assert.assertEquals(NumericAttribute.defaultAttr().withIndex(0), group.getAttr(0)); Assert.assertEquals(3, group.indexOf("age")); Assert.assertFalse(group.hasAttr("abc")); Assert.assertEquals(group, AttributeGroup.fromStructField(group.toStructField())); } }
@Test public void testAttributeGroup() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr(), NominalAttribute.defaultAttr(), BinaryAttribute.defaultAttr().withIndex(0), NumericAttribute.defaultAttr().withName("age").withSparsity(0.8), NominalAttribute.defaultAttr().withName("size").withValues("small", "medium", "large"), BinaryAttribute.defaultAttr().withName("clicked").withValues("no", "yes"), NumericAttribute.defaultAttr(), NumericAttribute.defaultAttr() }; AttributeGroup group = new AttributeGroup("user", attrs); Assert.assertEquals(8, group.size()); Assert.assertEquals("user", group.name()); Assert.assertEquals(NumericAttribute.defaultAttr().withIndex(0), group.getAttr(0)); Assert.assertEquals(3, group.indexOf("age")); Assert.assertFalse(group.hasAttr("abc")); Assert.assertEquals(group, AttributeGroup.fromStructField(group.toStructField())); } }