public boolean home() { return home_node()==H2O.SELF; } public H2ONode home_node( ) {
public final RPC<Atomic<T>> fork(Key key) { _key = key; if( key.home() ) { // Key is home? compute2(); // Also, run it blocking/now return null; } else { // Else run it remotely return RPC.call(key.home_node(),this); } }
/** Figure the number of trees to make locally, so the total hits ntrees. * Divide equally amongst all the nodes that actually have data. First: * compute how many nodes have data. Give each Node ntrees/#nodes worth of * trees. Round down for later nodes, and round up for earlier nodes. */ private int howManyTrees() { Frame fr = _fr; final long num_chunks = fr.anyVec().nChunks(); final int num_nodes = H2O.CLOUD.size(); HashSet<H2ONode> nodes = new HashSet<H2ONode>(); for( int i=0; i<num_chunks; i++ ) { nodes.add(fr.anyVec().chunkKey(i).home_node()); if( nodes.size() == num_nodes ) // All of nodes covered? break; // That means we are done. } H2ONode[] array = nodes.toArray(new H2ONode[nodes.size()]); Arrays.sort(array); // Give each H2ONode ntrees/#nodes worth of trees. Round down for later nodes, // and round up for earlier nodes int ntrees = _params.num_trees / nodes.size(); if( Arrays.binarySearch(array, H2O.SELF) < _params.num_trees - ntrees*nodes.size() ) ++ntrees; return ntrees; }
private final RPC<T> remote_compute( ArrayList<Key> keys ) { if( keys.size() == 0 ) return null; DRemoteTask rpc = clone(); rpc.setCompleter(null); rpc._keys = keys.toArray(new Key[keys.size()]); addToPendingCount(1); // Block until the RPC returns // Set self up as needing completion by this RPC: when the ACK comes back // we'll get a wakeup. return new RPC(keys.get(0).home_node(), rpc).addCompleter(this).call(); }
public ChunkProgressJob(long chunksTotal, Key destinationKey) { destination_key = destinationKey; _progress = Key.make(Key.make()._kb, (byte) 0, Key.DFJ_INTERNAL_USER, destinationKey.home_node()); UKV.put(_progress, new ChunkProgress(chunksTotal)); }
public static Key fetch(Key key) { FetchProducer fp = new FetchProducer(key); if (key.home()) fp.compute2(); else fp = RPC.call(key.home_node(), fp).get(); return fp._producer; } private FetchProducer(Key k) { _key = k; }
sb.append("<tr>"); // Row header sb.append("<td>").append(c0.chunkKey(j).home_node()) .append(", ").append(c0.chunk2StartElem(j)).append("</td>"); for( int i=0; i<cols.length; i++ ) {
/** Pretty print the Vec: [#elems, min/mean/max]{chunks,...} */ @Override public String toString() { String s = "["+length()+(_naCnt<0 ? ", {" : ","+_min+"/"+_mean+"/"+_max+", "+PrettyPrint.bytes(_size)+", {"); int nc = nChunks(); for( int i=0; i<nc; i++ ) { s += chunkKey(i).home_node()+":"+chunk2StartElem(i)+":"; // CNC: Bad plan to load remote data during a toString... messes up debug printing // Stupidly chunkForChunkIdx loads all data locally // s += chunkForChunkIdx(i).getClass().getSimpleName().replaceAll("Chunk","")+", "; } return s+"}]"; }
public GLMGridSearch(int maxP, GLM2 glm2, Key destKey){ super(glm2.self(), destKey); _glm2 = glm2; description = "GLM Grid on data " + glm2._srcDinfo.toString() ; _maxParallelism = maxP; _jobs = new GLM2[glm2.alpha.length]; _idx = new AtomicInteger(_maxParallelism); for(int i = 0; i < _jobs.length; ++i) { _jobs[i] = (GLM2)_glm2.clone(); _jobs[i]._grid = true; _jobs[i].alpha = new double[]{glm2.alpha[i]}; _jobs[i].destination_key = Key.make(glm2.destination_key + "_" + i); _jobs[i]._progressKey = Key.make(dest().toString() + "_progress_" + i, (byte) 1, Key.HIDDEN_USER_KEY, dest().home_node()); _jobs[i].job_key = Key.make(glm2.job_key + "_" + i); } }
@Override public final void compute2(){ if(_key.home()){ Value val = H2O.get(_key); if(val != null) { V v = val.get(); map(v); } tryComplete(); } else new RPC(_key.home_node(),this).addCompleter(this).call(); } // onCompletion must be empty here, may be invoked twice (on remote and local)
static public Value DputIfMatch( Key key, Value val, Value old, Futures fs, boolean dontCache ) { // First: I must block repeated remote PUTs to the same Key until all prior // ones complete - the home node needs to see these PUTs in order. // Repeated PUTs on the home node are already ordered. if( old != null && !key.home() ) old.startRemotePut(); // local update first, since this is a weak update Value res = H2O.putIfMatch(key,val,old); if( res != old ) // Failed? return res; // Return fail value // Check for trivial success: no need to invalidate remotes if the new // value equals the old. if( old != null && old == val ) return old; // Trivial success? if( old != null && val != null && val.equals(old) ) return old; // Less trivial success, but no network i/o // Before we start doing distributed writes... block until the cloud // stablizes. After we start doing distrubuted writes, it is an error to // change cloud shape - the distributed writes will be in the wrong place. Paxos.lockCloud(); // The 'D' part of DputIfMatch: do Distribution. // If PUT is on HOME, invalidate remote caches // If PUT is on non-HOME, replicate/push to HOME if( key.home() ) { // On HOME? if( old != null ) old.lockAndInvalidate(H2O.SELF,fs); } else { // On non-HOME? // Start a write, but do not block for it TaskPutKey.put(key.home_node(),key,val,fs, dontCache); } return old; }
@Override public final void compute2( ) { assert _key.home() : "Atomic on wrong node; SELF="+H2O.SELF+ ", key_home="+_key.home_node()+", key_is_home="+_key.home()+", class="+getClass(); Futures fs = new Futures(); // Must block on all invalidates eventually Value val1 = DKV.get(_key); while( true ) { // Run users' function. This is supposed to read-only from val1 and // return new val2 to atomically install. Value val2 = atomic(val1); if( val2 == null ) break; // ABORT: they gave up assert val1 != val2; // No returning the same Value // Attempt atomic update Value res = DKV.DputIfMatch(_key,val2,val1,fs); if( res == val1 ) { // Success? onSuccess(val1); // Call user's post-XTN function fs.blockForPending(); // Block for any pending invalidates on the atomic update break; } val1 = res; // Otherwise try again with the current value } // and retry _key = null; // No need for key no more, don't send it back tryComplete(); // Tell F/J this task is done }
dp.exec(new Frame(vec)); for( int i = 0; i < vec.nChunks(); ++i ) _chunk2Enum[chunkOff + i] = vec.chunkKey(i).home_node().index(); } else { ParseProgressMonitor pmon = new ParseProgressMonitor(_progress);
public GLM2 fork(H2OCountedCompleter cc){ if(!_grid)source.read_lock(self()); // keep *this* separate from what's stored in K/V as job (will be changing it!) Futures fs = new Futures(); _progressKey = Key.make(dest().toString() + "_progress", (byte) 1, Key.HIDDEN_USER_KEY, dest().home_node()); int total = max_iter; if(lambda_search) total = MAX_ITERATIONS_PER_LAMBDA*nlambdas; GLM2_Progress progress = new GLM2_Progress(total*(n_folds > 1?(n_folds+1):1)); LogInfo("created progress " + progress); DKV.put(_progressKey,progress,fs); fs.blockForPending(); _fjtask = new H2O.H2OEmptyCompleter(cc); H2OCountedCompleter fjtask = new GLMJobCompleter(_fjtask); GLM2 j = (GLM2)clone(); j.start(_fjtask); // modifying GLM2 object, don't want job object to be the same instance H2O.submitTask(fjtask); return j; }
for(int ci=0; ci<chunksCount; ci++) { Key cKey = _data.anyVec().chunkKey(ci); _rowsPerNode[cKey.home_node().index()] += _data.anyVec().chunkLen(ci);
/** * Finds what nodes actually do carry some of data of a given Frame * @param fr frame to find nodes for * @return FrameNodes */ public static FrameNodes findFrameNodes(Frame fr) { // Count on how many nodes the data resides boolean[] nodesHoldingFrame = new boolean[H2O.CLOUD.size()]; Vec vec = fr.anyVec(); for(int chunkNr = 0; chunkNr < vec.nChunks(); chunkNr++) { int home = vec.chunkKey(chunkNr).home_node().index(); if (! nodesHoldingFrame[home]) nodesHoldingFrame[home] = true; } return new FrameNodes(fr, nodesHoldingFrame); }