private static <T extends Keyed> Key<T> publish(Keyed<T> keyed) { Scope.track_generic(keyed); DKV.put(keyed); return keyed._key; }
private static DataInfo makeDataInfo(Frame fr, int nResponses) { final DataInfo dinfo = new DataInfo(fr, null, nResponses, false, DataInfo.TransformType.DEMEAN, DataInfo.TransformType.NONE, true, false, false, false, false, false, null) .disableIntercept(); Scope.track_generic(dinfo); DKV.put(dinfo); return dinfo; }
@Test public void testMakeTreeKey() { try { Scope.enter(); CompressedTree ct = new CompressedTree(new byte[0], 123, 42, 17); Scope.track_generic(ct); DKV.put(ct); CompressedTree.TreeCoords tc = ct.getTreeCoords(); assertEquals(42, tc._treeId); assertEquals(17, tc._clazz); } finally { Scope.exit(); } }
params._solver = s; model = new GLM(params).trainModel().get(); Scope.track_generic(model); DataInfo tinfo = new DataInfo(train.clone(), null, 0, true, DataInfo.TransformType.STANDARDIZE, DataInfo.TransformType.NONE, false, false, false, /* weights */ false, /* offset */ false, /* fold */ false); double[] manualCoeff = getCODCoeff(train, params._alpha[0], params._lambda[0], model._ymu, tinfo); Scope.track_generic(tinfo);
Scope.track_generic(model);
private GBMModel trainGbm(final int ntrees) { Frame f = Scope.track(parse_test_file("smalldata/logreg/prostate.csv")); final String response = "CAPSULE"; f.replace(f.find(response), f.vec(response).toCategoricalVec()).remove(); DKV.put(f._key, f); GBMModel.GBMParameters gbmParams = new GBMModel.GBMParameters(); gbmParams._seed = 123; gbmParams._train = f._key; gbmParams._ignored_columns = new String[]{"ID"}; gbmParams._response_column = response; gbmParams._ntrees = ntrees; gbmParams._score_each_iteration = true; return(GBMModel) Scope.track_generic(new GBM(gbmParams).trainModel().get()); }
Scope.track_generic(model);
@Test public void testPUBDEV3500NoLeakage() throws InterruptedException, ExecutionException { Scope.enter(); Frame train = null; try { train = parse_test_file(Key.make("prostate_cat.hex"), "smalldata/prostate/prostate_cat.csv"); Scope.track(train); pcaParameters._train = train._key; pcaParameters._k = 3; pcaParameters._transform = DataInfo.TransformType.NONE; pcaParameters._pca_method = PCAModel.PCAParameters.Method.Randomized; pcaParameters._impute_missing = true; // Don't skip rows with NA entries, but impute using mean of column pcaParameters._seed = 12345; pcaParameters._use_all_factor_levels=true; PCAModel pca = null; pca = new PCA(pcaParameters).trainModel().get(); Scope.track_generic(pca); Assert.assertTrue(pca._parms._k == pca._output._std_deviation.length); } finally { Scope.exit(); } }
@Test public void testMonotoneConstraintsInverse() { Scope.enter(); try { final String response = "power (hp)"; Frame f = parse_test_file("smalldata/junit/cars.csv"); f.replace(f.find(response), f.vecs()[f.find("cylinders")].toNumericVec()).remove(); DKV.put(Scope.track(f)); GBMModel.GBMParameters parms = new GBMModel.GBMParameters(); parms._response_column = response; parms._train = f._key; parms._ignored_columns = new String[]{"name"}; parms._seed = 42; GBMModel.GBMParameters noConstrParams = (GBMModel.GBMParameters) parms.clone(); GBMModel noConstrModel = new GBM(noConstrParams).trainModel().get(); Scope.track_generic(noConstrModel); assertTrue(noConstrModel._output._varimp.toMap().get("cylinders") > 0); GBMModel.GBMParameters constrParams = (GBMModel.GBMParameters) parms.clone(); constrParams._monotone_constraints = new KeyValue[] {new KeyValue("cylinders", -1)}; GBMModel constrModel = new GBM(constrParams).trainModel().get(); Scope.track_generic(constrModel); // we essentially eliminated the effect of the feature by setting an inverted constraint assertEquals(constrModel._output._varimp.toMap().get("cylinders"), 0, 0); } finally { Scope.exit(); } }
Scope.track_generic(model);
params._solver = s; model = new GLM(params).trainModel().get(); Scope.track_generic(model);
p._epochs = 1; Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
private static void checkMonotonic(int depth) { try { Scope.enter(); int len = 10000; Frame train = makeSinFrame(len); GBMModel.GBMParameters parms = new GBMModel.GBMParameters(); parms._response_column = "y"; parms._train = train._key; parms._seed = 42; parms._max_depth = depth; parms._monotone_constraints = new KeyValue[]{new KeyValue("x", 1)}; GBMModel gbm = new GBM(parms).trainModel().get(); Scope.track_generic(gbm); Frame scored = Scope.track(gbm.score(train)); double last = -1; for (int i = 0; i < len; i++) { double pred = scored.vec(0).at(i); assertTrue("pred = " + pred + " > " + last, pred >= last); last = pred; } } finally { Scope.exit(); } }
@Test public void testIrisScoreWarning() throws InterruptedException, ExecutionException { PCAModel model = null; Frame fr = null, fr2= null; Frame tr = null, te= null; Scope.enter(); try { fr = parse_test_file("smalldata/iris/iris_wheader.csv"); tr = parse_test_file("smalldata/iris/iris_wheader_bad_cnames.csv"); Scope.track(fr); Scope.track(tr); PCAModel.PCAParameters parms = new PCAModel.PCAParameters(); parms._train=fr._key; // parms._valid = tr._key; parms._k = 4; parms._max_iterations = 1000; parms._pca_method = PCAParameters.Method.GramSVD; model = new PCA(parms).trainModel().get(); Scope.track_generic(model); // Done building model; produce a score column with cluster choices fr2 = model.score(tr); Scope.track(fr2); } finally { Scope.exit(); } }
Scope.track_generic(dinfo);
p._vec_size = 2; p._pre_trained = expected._key; Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
@Test public void testCoxPHEfron1VarScoring() { try { Scope.enter(); Frame fr = Scope.track(parse_test_file("smalldata/coxph_test/heart.csv")); CoxPHModel.CoxPHParameters parms = new CoxPHModel.CoxPHParameters(); parms._calc_cumhaz = true; parms._train = fr._key; parms._start_column = "start"; parms._stop_column = "stop"; parms._response_column = "event"; parms._ignored_columns = new String[]{"id", "year", "surgery", "transplant"}; parms._ties = CoxPHModel.CoxPHParameters.CoxPHTies.efron; assertEquals("Surv(start, stop, event) ~ age", parms.toFormula(fr)); CoxPH builder = new CoxPH(parms); CoxPHModel model = (CoxPHModel) Scope.track_generic(builder.trainModel().get()); assertNotNull(model); Frame linearPredictors = Scope.track(model.score(fr)); assertEquals(fr.numRows(), linearPredictors.numRows()); } finally { Scope.exit(); } }
@Test public void testBasic() { try { Scope.enter(); Frame train = Scope.track(parse_test_file("smalldata/anomaly/ecg_discord_train.csv")); IsolationForestModel.IsolationForestParameters p = new IsolationForestModel.IsolationForestParameters(); p._train = train._key; p._seed = 0xDECAF; p._ntrees = 7; p._sample_size = 5; IsolationForestModel model = new IsolationForest(p).trainModel().get(); assertNotNull(model); Scope.track_generic(model); Frame preds = Scope.track(model.score(train)); assertArrayEquals(new String[]{"predict", "mean_length"}, preds.names()); assertEquals(train.numRows(), preds.numRows()); assertTrue(model.testJavaScoring(train, preds, 1e-8)); } finally { Scope.exit(); } }
p._pre_trained = pretrained._key; Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
parms._distribution = DistributionFamily.multinomial; GBMModel gbm = (GBMModel) Scope.track_generic(new GBM(parms).trainModel().get());