@Test public void standardScaler() { // The tests are to check Java compatibility. List<VectorIndexerSuite.FeatureData> points = Arrays.asList( new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0)) ); Dataset<Row> dataFrame = spark.createDataFrame(jsc.parallelize(points, 2), VectorIndexerSuite.FeatureData.class); StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures") .setWithStd(true) .setWithMean(false); // Compute summary statistics by fitting the StandardScaler StandardScalerModel scalerModel = scaler.fit(dataFrame); // Normalize each feature to have unit standard deviation. Dataset<Row> scaledData = scalerModel.transform(dataFrame); scaledData.count(); } }
@Test public void standardScaler() { // The tests are to check Java compatibility. List<VectorIndexerSuite.FeatureData> points = Arrays.asList( new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0)) ); Dataset<Row> dataFrame = spark.createDataFrame(jsc.parallelize(points, 2), VectorIndexerSuite.FeatureData.class); StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures") .setWithStd(true) .setWithMean(false); // Compute summary statistics by fitting the StandardScaler StandardScalerModel scalerModel = scaler.fit(dataFrame); // Normalize each feature to have unit standard deviation. Dataset<Row> scaledData = scalerModel.transform(dataFrame); scaledData.count(); } }
@Test public void standardScaler() { // The tests are to check Java compatibility. List<VectorIndexerSuite.FeatureData> points = Arrays.asList( new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0)) ); Dataset<Row> dataFrame = spark.createDataFrame(jsc.parallelize(points, 2), VectorIndexerSuite.FeatureData.class); StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures") .setWithStd(true) .setWithMean(false); // Compute summary statistics by fitting the StandardScaler StandardScalerModel scalerModel = scaler.fit(dataFrame); // Normalize each feature to have unit standard deviation. Dataset<Row> scaledData = scalerModel.transform(dataFrame); scaledData.count(); } }