/** * Creates default parameters. */ public TransitionClassifierParams() { minFF = new DoubleParam(this, "minFF", "min feature frequency", ParamValidators.gt(0)); setDefault(minFF, 2.0); numFeatures = new IntParam(this, "numFeatures", "number of features used in feature hashing", ParamValidators.gt(0)); setDefault(numFeatures, 1000); regParam = new DoubleParam(this, "regParam", "regularization parameter", ParamValidators.gtEq(0d)); setDefault(regParam, 0.0); maxIter = new IntParam(this, "maxIter", "max number of iterations", ParamValidators.gt(0)); setDefault(maxIter, 100); tolerance = new DoubleParam(this, "tolerance", "convergence tolerance of iterations", ParamValidators.gt(0)); setDefault(tolerance, 1E-5); }
@Test public void normalizer() { // The tests are to check Java compatibility. JavaRDD<VectorIndexerSuite.FeatureData> points = jsc.parallelize(Arrays.asList( new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0)) )); Dataset<Row> dataFrame = spark.createDataFrame(points, VectorIndexerSuite.FeatureData.class); Normalizer normalizer = new Normalizer() .setInputCol("features") .setOutputCol("normFeatures"); // Normalize each Vector using $L^2$ norm. Dataset<Row> l2NormData = normalizer.transform(dataFrame, normalizer.p().w(2)); l2NormData.count(); // Normalize each Vector using $L^\infty$ norm. Dataset<Row> lInfNormData = normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY)); lInfNormData.count(); } }
@Test public void normalizer() { // The tests are to check Java compatibility. JavaRDD<VectorIndexerSuite.FeatureData> points = jsc.parallelize(Arrays.asList( new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0)) )); Dataset<Row> dataFrame = spark.createDataFrame(points, VectorIndexerSuite.FeatureData.class); Normalizer normalizer = new Normalizer() .setInputCol("features") .setOutputCol("normFeatures"); // Normalize each Vector using $L^2$ norm. Dataset<Row> l2NormData = normalizer.transform(dataFrame, normalizer.p().w(2)); l2NormData.count(); // Normalize each Vector using $L^\infty$ norm. Dataset<Row> lInfNormData = normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY)); lInfNormData.count(); } }
@Test public void normalizer() { // The tests are to check Java compatibility. JavaRDD<VectorIndexerSuite.FeatureData> points = jsc.parallelize(Arrays.asList( new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)), new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0)) )); Dataset<Row> dataFrame = spark.createDataFrame(points, VectorIndexerSuite.FeatureData.class); Normalizer normalizer = new Normalizer() .setInputCol("features") .setOutputCol("normFeatures"); // Normalize each Vector using $L^2$ norm. Dataset<Row> l2NormData = normalizer.transform(dataFrame, normalizer.p().w(2)); l2NormData.count(); // Normalize each Vector using $L^\infty$ norm. Dataset<Row> lInfNormData = normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY)); lInfNormData.count(); } }
/** * Creates default parameters. */ public CMMParams() { minFF = new DoubleParam(this, "minFF", "min feature frequency", ParamValidators.gt(0)); setDefault(minFF, 2.0); numFeatures = new IntParam(this, "numFeatures", "number of features used in feature hashing", ParamValidators.gt(0)); setDefault(numFeatures, 1000); regParam = new DoubleParam(this, "regParam", "regularization parameter", ParamValidators.gtEq(0d)); setDefault(regParam, 0.0); maxIter = new IntParam(this, "maxIter", "max number of iterations", ParamValidators.gt(0)); setDefault(maxIter, 100); tolerance = new DoubleParam(this, "tolerance", "convergence tolerance of iterations", ParamValidators.gt(0)); setDefault(tolerance, 1E-5); markovOrder = new IntParam(this, "markovOrder", "Markov order of the model", ParamValidators.gt(0)); setDefault(markovOrder, 1); }
model.transform(dataset, model.threshold().w(0.0), model.probabilityCol().w("myProb")) .createOrReplaceTempView("predNotAllZero"); Dataset<Row> predNotAllZero = spark.sql("SELECT prediction, myProb FROM predNotAllZero"); LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); LogisticRegression parent2 = (LogisticRegression) model2.parent(); Assert.assertEquals(5, parent2.getMaxIter());
private void init() { myIntParam_ = new IntParam(this, "myIntParam", "this is an int param", ParamValidators.gt(0)); myDoubleParam_ = new DoubleParam(this, "myDoubleParam", "this is a double param", ParamValidators.inRange(0.0, 1.0)); List<String> validStrings = Arrays.asList("a", "b"); myStringParam_ = new Param<>(this, "myStringParam", "this is a string param", ParamValidators.inArray(validStrings)); myDoubleArrayParam_ = new DoubleArrayParam(this, "myDoubleArrayParam", "this is a double param"); setDefault(myIntParam(), 1); setDefault(myDoubleParam(), 0.5); setDefault(myDoubleArrayParam(), new double[]{1.0, 2.0}); }
@Test public void linearRegressionWithSetters() { // Set params, train, and check as many params as we can. LinearRegression lr = new LinearRegression() .setMaxIter(10) .setRegParam(1.0).setSolver("l-bfgs"); LinearRegressionModel model = lr.fit(dataset); LinearRegression parent = (LinearRegression) model.parent(); assertEquals(10, parent.getMaxIter()); assertEquals(1.0, parent.getRegParam(), 0.0); // Call fit() with new params, and check as many params as we can. LinearRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred")); LinearRegression parent2 = (LinearRegression) model2.parent(); assertEquals(5, parent2.getMaxIter()); assertEquals(0.1, parent2.getRegParam(), 0.0); assertEquals("thePred", model2.getPredictionCol()); } }
private void init() { myIntParam_ = new IntParam(this, "myIntParam", "this is an int param", ParamValidators.gt(0)); myDoubleParam_ = new DoubleParam(this, "myDoubleParam", "this is a double param", ParamValidators.inRange(0.0, 1.0)); List<String> validStrings = Arrays.asList("a", "b"); myStringParam_ = new Param<>(this, "myStringParam", "this is a string param", ParamValidators.inArray(validStrings)); myDoubleArrayParam_ = new DoubleArrayParam(this, "myDoubleArrayParam", "this is a double param"); setDefault(myIntParam(), 1); setDefault(myDoubleParam(), 0.5); setDefault(myDoubleArrayParam(), new double[]{1.0, 2.0}); }
model.transform(dataset, model.threshold().w(0.0), model.probabilityCol().w("myProb")) .createOrReplaceTempView("predNotAllZero"); Dataset<Row> predNotAllZero = spark.sql("SELECT prediction, myProb FROM predNotAllZero"); LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); LogisticRegression parent2 = (LogisticRegression) model2.parent(); Assert.assertEquals(5, parent2.getMaxIter());
private void init() { myIntParam_ = new IntParam(this, "myIntParam", "this is an int param", ParamValidators.gt(0)); myDoubleParam_ = new DoubleParam(this, "myDoubleParam", "this is a double param", ParamValidators.inRange(0.0, 1.0)); List<String> validStrings = Arrays.asList("a", "b"); myStringParam_ = new Param<>(this, "myStringParam", "this is a string param", ParamValidators.inArray(validStrings)); myDoubleArrayParam_ = new DoubleArrayParam(this, "myDoubleArrayParam", "this is a double param"); setDefault(myIntParam(), 1); setDefault(myDoubleParam(), 0.5); setDefault(myDoubleArrayParam(), new double[]{1.0, 2.0}); }
model.transform(dataset, model.threshold().w(0.0), model.probabilityCol().w("myProb")) .createOrReplaceTempView("predNotAllZero"); Dataset<Row> predNotAllZero = spark.sql("SELECT prediction, myProb FROM predNotAllZero"); LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); LogisticRegression parent2 = (LogisticRegression) model2.parent(); Assert.assertEquals(5, parent2.getMaxIter());
@Test public void linearRegressionWithSetters() { // Set params, train, and check as many params as we can. LinearRegression lr = new LinearRegression() .setMaxIter(10) .setRegParam(1.0).setSolver("l-bfgs"); LinearRegressionModel model = lr.fit(dataset); LinearRegression parent = (LinearRegression) model.parent(); assertEquals(10, parent.getMaxIter()); assertEquals(1.0, parent.getRegParam(), 0.0); // Call fit() with new params, and check as many params as we can. LinearRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred")); LinearRegression parent2 = (LinearRegression) model2.parent(); assertEquals(5, parent2.getMaxIter()); assertEquals(0.1, parent2.getRegParam(), 0.0); assertEquals("thePred", model2.getPredictionCol()); } }
@Test public void linearRegressionWithSetters() { // Set params, train, and check as many params as we can. LinearRegression lr = new LinearRegression() .setMaxIter(10) .setRegParam(1.0).setSolver("l-bfgs"); LinearRegressionModel model = lr.fit(dataset); LinearRegression parent = (LinearRegression) model.parent(); assertEquals(10, parent.getMaxIter()); assertEquals(1.0, parent.getRegParam(), 0.0); // Call fit() with new params, and check as many params as we can. LinearRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred")); LinearRegression parent2 = (LinearRegression) model2.parent(); assertEquals(5, parent2.getMaxIter()); assertEquals(0.1, parent2.getRegParam(), 0.0); assertEquals("thePred", model2.getPredictionCol()); } }