/** * Consumes the next entries from a parallel interator. * * @param it the left iterator. * @param it2 the right iterator. * @return a tuple. This tuple can have either of its two entries as NULL once * the end of one iterator has been reached. If both iterators are at * their end it simply returns null. */ public static <K, V> Tuple<K, V> consumeNext(final Iterator<K> it, final Iterator<V> it2) { K nextK = null; V nextV = null; if (it.hasNext()) { nextK = it.next(); } if (it2.hasNext()) { nextV = it2.next(); } if (nextK == null && nextV == null) { return null; } else { return new Tuple<>(nextK, nextV); } }
return new Tuple3<>(docList, new DenseDoubleVector(prediction.toArray()), nameMapping);
@Override public void train(Iterable<DoubleVector> features, Iterable<DoubleVector> outcome) { // zip the streams and construct the kd tree Stream<Tuple<DoubleVector, DoubleVector>> stream = StreamUtils.zip( StreamSupport.stream(features.spliterator(), false), StreamSupport.stream(outcome.spliterator(), false), (l, r) -> new Tuple<>(l, r)); tree.constructWithPayload(stream); }
/** * @return the normalized matrix (0 mean and stddev of 1) as well as the mean * and the stddev. */ public static Tuple3<DoubleMatrix, DoubleVector, DoubleVector> meanNormalizeColumns( DoubleMatrix x) { DenseDoubleMatrix toReturn = new DenseDoubleMatrix(x.getRowCount(), x.getColumnCount()); final int length = x.getColumnCount(); DoubleVector meanVector = new DenseDoubleVector(length); DoubleVector stddevVector = new DenseDoubleVector(length); for (int col = 0; col < length; col++) { DoubleVector column = x.getColumnVector(col); double mean = column.sum() / column.getLength(); meanVector.set(col, mean); double var = column.subtract(mean).pow(2).sum() / column.getLength(); stddevVector.set(col, Math.sqrt(var)); } for (int col = 0; col < length; col++) { DoubleVector column = x.getColumnVector(col) .subtract(meanVector.get(col)).divide(stddevVector.get(col)); toReturn.setColumn(col, column.toArray()); } return new Tuple3<>(toReturn, meanVector, stddevVector); }
/** * Constructs the similarity aggregation by seed tokens to expand and a given * bipartite graph. The bipartite graph is represented as a three tuple, which * consists of the vertices (called (candidate-) terms or entities) on the * first item, the context vertices on the second item and the edges between * those is a NxM matrix, where n is the entity tokens count and m is the * number of the context vertices. Alpha is the constant weighting factor used * throughout the paper (usually 0.5). The distance measurer to be used must * be also defined. */ public IterativeSimilarityAggregation(String[] seedTokens, Tuple<String[], DoubleMatrix> bipartiteGraph, double alpha, DistanceMeasurer distance) { this.seedTokens = seedTokens; this.termNodes = bipartiteGraph.getFirst(); // make sure we transpose to have a better distance lookup this.weightMatrix = bipartiteGraph.getSecond().transpose(); this.alpha = alpha; this.similarityMeasurer = new SimilarityMeasurer(distance); init(); }
/** * Filters all examples where the feature at the given index has NOT the * specific value. So the returned lists contain only vectors where the * feature has the specific value. * * @return a new tuple of two new lists (features and their outcome). */ private Tuple<List<DoubleVector>, List<DoubleVector>> filterNominal( List<DoubleVector> features, List<DoubleVector> outcome, int bestSplitIndex, int nominalValue) { List<DoubleVector> newFeatures = Lists.newArrayList(); List<DoubleVector> newOutcomes = Lists.newArrayList(); Iterator<DoubleVector> featureIterator = features.iterator(); Iterator<DoubleVector> outcomeIterator = outcome.iterator(); while (featureIterator.hasNext()) { DoubleVector feature = featureIterator.next(); DoubleVector out = outcomeIterator.next(); if (((int) feature.get(bestSplitIndex)) == nominalValue) { newFeatures.add(feature); newOutcomes.add(out); } } return new Tuple<>(newFeatures, newOutcomes); }
featureIterator, outcomeIterator); int numDistinctClasses = first.getSecond().getDimension(); .getFirst().getDimension()); observe(first.getFirst(), first.getSecond(), numDistinctClasses, tokenPerClass, numDocumentsPerClass); int numDocumentsSeen = 1; while ((first = Iterables.consumeNext(featureIterator, outcomeIterator)) != null) { observe(first.getFirst(), first.getSecond(), numDistinctClasses, tokenPerClass, numDocumentsPerClass); numDocumentsSeen++;
return new Tuple<>(newFeatures, newOutcomes);
node.children[cIndex] = build(filtered.getFirst(), filtered.getSecond(), newPossibleFeatures, level + 1); cIndex++; bestSplit.getNumericalSplitValue(), false); if (filterNumeric.getFirst().isEmpty() || filterNumericHigher.getFirst().isEmpty()) { newPossibleFeatures.remove(bestSplitIndex); } else { AbstractTreeNode lower = build(filterNumeric.getFirst(), filterNumeric.getSecond(), new TIntHashSet(newPossibleFeatures), level + 1); AbstractTreeNode higher = build(filterNumericHigher.getFirst(), filterNumericHigher.getSecond(), new TIntHashSet(newPossibleFeatures), level + 1);
return new Tuple<>(features, outcome);
points.size()).mapToObj(i -> new Tuple<>(points.get(i), i));
values.size()).mapToObj(i -> new Tuple<>(values.get(i), i));
DoubleMatrix featuresWithBias = sparse ? new SparseDoubleRowMatrix(bias, featureMatrix) : new DenseDoubleMatrix(bias, featureMatrix); batches.add(new Tuple<>(featuresWithBias, outcomeMat));
@Override public Tuple<Double, DoubleVector> call() throws Exception { // loop over all particles and calculate new positions for (int particleIndex = range.getStart(); particleIndex < range.getEnd(); particleIndex++) { DoubleVector currentPosition = particlePositions[particleIndex]; DoubleVector currentBest = particlePersonalBestPositions[particleIndex]; DenseDoubleVector vec = new DenseDoubleVector(dim); for (int index = 0; index < vec.getDimension(); index++) { double value = (phi * currentPosition.get(index)) // inertia + (alpha * random.nextDouble() * (currentBest.get(index) - currentPosition .get(index))) // personal memory + (beta * random.nextDouble() * (globalBestPosition.get(index) - currentPosition .get(index))); // group memory vec.set(index, value); } particlePositions[particleIndex] = vec; double cost = f.evaluateCost(vec).getCost(); // check if we have a personal best if (cost < particlePersonalBestCost[particleIndex]) { particlePersonalBestCost[particleIndex] = cost; particlePersonalBestPositions[particleIndex] = vec; // if we had a personal best, do we have a better global? if (cost < globalCost) { globalCost = cost; globalBestPosition = vec; } } } return new Tuple<>(globalCost, globalBestPosition); }
return new Tuple<>(matrix, meanVector);
return new Tuple<>(mean, stdVector);