@Test public void testCheckGradientBinomial() { try { Scope.enter(); Frame trainBinomialEnum = parse_test_file("smalldata/glm_ordinal_logit/ordinal_binomial_training_set_enum_small.csv"); convert2Enum(trainBinomialEnum, new int[]{0, 1, 2, 3, 4, 5, 6, 34}); // convert enum columns Frame trainBinomial = parse_test_file("smalldata/glm_ordinal_logit/ordinal_binomial_training_set_small.csv"); convert2Enum(trainBinomial, new int[]{34}); Scope.track(trainBinomialEnum); Scope.track(trainBinomial); checkGradientWithBinomial(trainBinomial, 34, "C35"); // only numerical columns checkGradientWithBinomial(trainBinomialEnum, 34, "C35"); // with enum and numerical columns } finally { Scope.exit(); } }
@Test public void testIrisScoreWarning() throws InterruptedException, ExecutionException { PCAModel model = null; Frame fr = null, fr2= null; Frame tr = null, te= null; Scope.enter(); try { fr = parse_test_file("smalldata/iris/iris_wheader.csv"); tr = parse_test_file("smalldata/iris/iris_wheader_bad_cnames.csv"); Scope.track(fr); Scope.track(tr); PCAModel.PCAParameters parms = new PCAModel.PCAParameters(); parms._train=fr._key; // parms._valid = tr._key; parms._k = 4; parms._max_iterations = 1000; parms._pca_method = PCAParameters.Method.GramSVD; model = new PCA(parms).trainModel().get(); Scope.track_generic(model); // Done building model; produce a score column with cluster choices fr2 = model.score(tr); Scope.track(fr2); } finally { Scope.exit(); } }
private void doCleanup() { try { if(_parms._lambda_search && _parms._is_cv_model) Scope.untrack(removeLater(_dinfo.getWeightsVec()._key)); if(!_cv && _model!=null) _model.unlock(_job); } catch(Throwable t){ // nada } } private transient Cholesky _chol;
@Test public void testBasicGBMFamily() { Scope.enter(); // Classification with Bernoulli family basicGBM("./smalldata/logreg/prostate.csv", new PrepData() { int prep(Frame fr) { fr.remove("ID").remove(); // Remove not-predictive ID int ci = fr.find("RACE"); // Change RACE to categorical Scope.track(fr.replace(ci,fr.vecs()[ci].toCategoricalVec())); return fr.find("CAPSULE"); // Prostate: predict on CAPSULE } }, false, DistributionFamily.bernoulli); Scope.exit(); }
@Test public void testMakeTreeKey() { try { Scope.enter(); CompressedTree ct = new CompressedTree(new byte[0], 123, 42, 17); Scope.track_generic(ct); DKV.put(ct); CompressedTree.TreeCoords tc = ct.getTreeCoords(); assertEquals(42, tc._treeId); assertEquals(17, tc._clazz); } finally { Scope.exit(); } }
public static Value putIfMatch( Key key, Value val, Value old ) { if( old != null ) // Have an old value? key = old._key; // Use prior key if( val != null ) val._key = key; // Insert into the K/V store Value res = STORE.putIfMatchUnlocked(key,val,old); if( res != old ) return res; // Return the failure cause // Persistence-tickle. // If the K/V mapping is going away, remove the old guy. // If the K/V mapping is changing, let the store cleaner just overwrite. // If the K/V mapping is new, let the store cleaner just create if( old != null && val == null ) old.removeIce(); // Remove the old guy if( val != null ) { dirty_store(); // Start storing the new guy Scope.track(key); } return old; // Return success }
private GBMModel trainGbm(final int ntrees) { Frame f = Scope.track(parse_test_file("smalldata/logreg/prostate.csv")); final String response = "CAPSULE"; f.replace(f.find(response), f.vec(response).toCategoricalVec()).remove(); DKV.put(f._key, f); GBMModel.GBMParameters gbmParams = new GBMModel.GBMParameters(); gbmParams._seed = 123; gbmParams._train = f._key; gbmParams._ignored_columns = new String[]{"ID"}; gbmParams._response_column = response; gbmParams._ntrees = ntrees; gbmParams._score_each_iteration = true; return(GBMModel) Scope.track_generic(new GBM(gbmParams).trainModel().get()); }
private static <T extends Keyed> Key<T> publish(Keyed<T> keyed) { Scope.track_generic(keyed); DKV.put(keyed); return keyed._key; }
@Test public void testBasicGBMFamily() { Scope.enter(); // Classification with Bernoulli family basicGBM("./smalldata/logreg/prostate.csv","prostate.hex", new PrepData() { int prep(Frame fr) { assertEquals(380,fr.numRows()); // Remove patient ID vector UKV.remove(fr.remove("ID")._key); // Change CAPSULE and RACE to categoricals Scope.track(fr.factor(fr.find("CAPSULE"))._key); Scope.track(fr.factor(fr.find("RACE" ))._key); // Prostate: predict on CAPSULE return fr.find("CAPSULE"); } }, Family.bernoulli); Scope.exit(); }
private static Frame makeSinFrame(final int len) { Vec blueprint = Scope.track(Vec.makeZero(len)); Frame train = new MRTask() { @Override public void map(Chunk c, NewChunk ncX, NewChunk ncY) { for (int i = 0; i < c._len; i++) { Random r = RandomUtils.getRNG(c.start() + i); double x = (c.start() + i) / (double) len * Math.PI / 2; double noise = Math.abs(r.nextDouble()) * 0.1; double y = Math.sin(x) + noise; ncX.addNum(x); ncY.addNum(y); } } }.doAll(new byte[]{Vec.T_NUM, Vec.T_NUM}, blueprint) .outputFrame(Key.<Frame>make(), new String[]{"x", "y"}, null); Scope.track(train); return train; }
private static DataInfo makeDataInfo(Frame fr, int nResponses) { final DataInfo dinfo = new DataInfo(fr, null, nResponses, false, DataInfo.TransformType.DEMEAN, DataInfo.TransformType.NONE, true, false, false, false, false, false, null) .disableIntercept(); Scope.track_generic(dinfo); DKV.put(dinfo); return dinfo; }
@Test public void registerTest() { Scope.enter(); try { Frame fr = new TestFrameBuilder() .withName("testFrame") .build(); Scope.track(fr); Key<Frame> keyBefore = fr._key; DKV.remove(keyBefore); Frame res = register(fr); Scope.track(res); assertNotSame(res._key, keyBefore); } finally { Scope.exit(); } } }
@Test public void testPUBDEV3500NoLeakage() throws InterruptedException, ExecutionException { Scope.enter(); Frame train = null; try { train = parse_test_file(Key.make("prostate_cat.hex"), "smalldata/prostate/prostate_cat.csv"); Scope.track(train); pcaParameters._train = train._key; pcaParameters._k = 3; pcaParameters._transform = DataInfo.TransformType.NONE; pcaParameters._pca_method = PCAModel.PCAParameters.Method.Randomized; pcaParameters._impute_missing = true; // Don't skip rows with NA entries, but impute using mean of column pcaParameters._seed = 12345; pcaParameters._use_all_factor_levels=true; PCAModel pca = null; pca = new PCA(pcaParameters).trainModel().get(); Scope.track_generic(pca); Assert.assertTrue(pca._parms._k == pca._output._std_deviation.length); } finally { Scope.exit(); } }
@Test public void testToSharedTreeSubgraph() throws IOException { int ntrees = 5; try { Scope.enter(); GBMModel model = trainGbm(ntrees); GbmMojoModel mojo = (GbmMojoModel) model.toMojo(); SharedTreeGraph expectedGraph = mojo._computeGraph(-1); assertEquals(5, expectedGraph.subgraphArray.size()); // sanity check the MOJO created graph for (int i = 0; i < ntrees; i++) { CompressedTree tree = model._output._treeKeys[i][0].get(); assertNotNull(tree); CompressedTree auxTreeInfo = model._output._treeKeysAux[i][0].get(); SharedTreeSubgraph sg = tree.toSharedTreeSubgraph(auxTreeInfo, model._output._names, model._output._domains); assertEquals(expectedGraph.subgraphArray.get(i), sg); } } finally { Scope.exit(); } }
H2O.submitTask(rb).join(); Frame rebalanced_fr = DKV.get(newKey).get(); Scope.track(rebalanced_fr); return rebalanced_fr;
protected Frame buildPredictionsForBaseModel(Model model, Frame frame) { Key<Frame> predsKey = buildPredsKey(model, frame); Frame preds = DKV.getGet(predsKey); if (preds == null) { preds = model.score(frame, predsKey.toString()); Scope.untrack(preds.keysList()); } if (_model._output._base_model_predictions_keys == null) _model._output._base_model_predictions_keys = new Key[0]; if (!ArrayUtils.contains(_model._output._base_model_predictions_keys, predsKey)){ _model._output._base_model_predictions_keys = ArrayUtils.append(_model._output._base_model_predictions_keys, predsKey); } //predictions are cleaned up by metalearner if necessary return preds; }
private static void checkMonotonic(int depth) { try { Scope.enter(); int len = 10000; Frame train = makeSinFrame(len); GBMModel.GBMParameters parms = new GBMModel.GBMParameters(); parms._response_column = "y"; parms._train = train._key; parms._seed = 42; parms._max_depth = depth; parms._monotone_constraints = new KeyValue[]{new KeyValue("x", 1)}; GBMModel gbm = new GBM(parms).trainModel().get(); Scope.track_generic(gbm); Frame scored = Scope.track(gbm.score(train)); double last = -1; for (int i = 0; i < len; i++) { double pred = scored.vec(0).at(i); assertTrue("pred = " + pred + " > " + last, pred >= last); last = pred; } } finally { Scope.exit(); } }
@Test public void testRemoteBitSet() throws Exception { Scope.enter(); // Issue a slew of remote key puts Key[] keys = new Key[32]; for( int i = 0; i < keys.length; ++i ) { Key k = keys[i] = Key.make("key"+i); byte[] bits = new byte[4]; bits[0] = (byte)i; // Each value holds a shift-count Value val = new Value(k,bits); DKV.put(k,val); } DKV.write_barrier(); RemoteBitSet r = new RemoteBitSet(); r.invoke(keys); assertEquals((int)((1L<<keys.length)-1), r._x); //for( Key k : keys ) DKV.remove(k); Scope.exit(); }
int prep(Frame fr) { assertEquals(380,fr.numRows()); // Remove patient ID vector UKV.remove(fr.remove("ID")._key); // Change CAPSULE and RACE to categoricals Scope.track(fr.factor(fr.find("CAPSULE"))._key); Scope.track(fr.factor(fr.find("RACE" ))._key); // Prostate: predict on CAPSULE return fr.find("CAPSULE"); } }, Family.bernoulli);