static Vector average(List<Vector> list) { // find sum double[] sum = new double[list.get(0).size()]; for (Vector v : list) { for (int i = 0; i < sum.length; i++) { sum[i] += v.apply(i); } } // find averages... int numOfVectors = list.size(); for (int i = 0; i < sum.length; i++) { sum[i] = sum[i] / numOfVectors; } return new DenseVector(sum); }
static Vector average(Vector vec, Integer numVectors) { double[] avg = new double[vec.size()]; for (int i = 0; i < avg.length; i++) { // avg[i] = vec.apply(i) * (1.0 / numVectors); avg[i] = vec.apply(i) / ((double) numVectors); } return new DenseVector(avg); }
static Vector average(List<Vector> list) { // find sum double[] sum = new double[list.get(0).size()]; for (Vector v : list) { for (int i = 0; i < sum.length; i++) { sum[i] += v.apply(i); } } // find averages... int numOfVectors = list.size(); for (int i = 0; i < sum.length; i++) { sum[i] = sum[i] / numOfVectors; } return new DenseVector(sum); }
static Vector average(Vector vec, Integer numVectors) { double[] avg = new double[vec.size()]; for (int i = 0; i < avg.length; i++) { // avg[i] = vec.apply(i) * (1.0 / numVectors); avg[i] = vec.apply(i) / ((double) numVectors); } return new DenseVector(avg); }
if (k > 0) { for (int j : features.toSparse().indices()) score[k] += weights.apply((k-1) * d + j); if (score[k] > maxScore) { maxScore = score[k];
Vector vector = (Vector) sample; if (mHasLabels) { label = vector.apply(vector.size() - 1); features = Arrays.copyOf(vector.toArray(), vector.size() - 1); } else {
@Test public void tfIdf() { // The tests are to check Java compatibility. HashingTF tf = new HashingTF(); @SuppressWarnings("unchecked") JavaRDD<List<String>> documents = jsc.parallelize(Arrays.asList( Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2); JavaRDD<Vector> termFreqs = tf.transform(documents); termFreqs.collect(); IDF idf = new IDF(); JavaRDD<Vector> tfIdfs = idf.fit(termFreqs).transform(termFreqs); List<Vector> localTfIdfs = tfIdfs.collect(); int indexOfThis = tf.indexOf("this"); for (Vector v : localTfIdfs) { Assert.assertEquals(0.0, v.apply(indexOfThis), 1e-15); } }
@Test public void tfIdfMinimumDocumentFrequency() { // The tests are to check Java compatibility. HashingTF tf = new HashingTF(); @SuppressWarnings("unchecked") JavaRDD<List<String>> documents = jsc.parallelize(Arrays.asList( Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2); JavaRDD<Vector> termFreqs = tf.transform(documents); termFreqs.collect(); IDF idf = new IDF(2); JavaRDD<Vector> tfIdfs = idf.fit(termFreqs).transform(termFreqs); List<Vector> localTfIdfs = tfIdfs.collect(); int indexOfThis = tf.indexOf("this"); for (Vector v : localTfIdfs) { Assert.assertEquals(0.0, v.apply(indexOfThis), 1e-15); } }
@Test public void tfIdfMinimumDocumentFrequency() { // The tests are to check Java compatibility. HashingTF tf = new HashingTF(); @SuppressWarnings("unchecked") JavaRDD<List<String>> documents = jsc.parallelize(Arrays.asList( Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2); JavaRDD<Vector> termFreqs = tf.transform(documents); termFreqs.collect(); IDF idf = new IDF(2); JavaRDD<Vector> tfIdfs = idf.fit(termFreqs).transform(termFreqs); List<Vector> localTfIdfs = tfIdfs.collect(); int indexOfThis = tf.indexOf("this"); for (Vector v : localTfIdfs) { Assert.assertEquals(0.0, v.apply(indexOfThis), 1e-15); } }
@Test public void tfIdf() { // The tests are to check Java compatibility. HashingTF tf = new HashingTF(); @SuppressWarnings("unchecked") JavaRDD<List<String>> documents = jsc.parallelize(Arrays.asList( Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2); JavaRDD<Vector> termFreqs = tf.transform(documents); termFreqs.collect(); IDF idf = new IDF(); JavaRDD<Vector> tfIdfs = idf.fit(termFreqs).transform(termFreqs); List<Vector> localTfIdfs = tfIdfs.collect(); int indexOfThis = tf.indexOf("this"); for (Vector v : localTfIdfs) { Assert.assertEquals(0.0, v.apply(indexOfThis), 1e-15); } }
@Test public void tfIdf() { // The tests are to check Java compatibility. HashingTF tf = new HashingTF(); @SuppressWarnings("unchecked") JavaRDD<List<String>> documents = jsc.parallelize(Arrays.asList( Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2); JavaRDD<Vector> termFreqs = tf.transform(documents); termFreqs.collect(); IDF idf = new IDF(); JavaRDD<Vector> tfIdfs = idf.fit(termFreqs).transform(termFreqs); List<Vector> localTfIdfs = tfIdfs.collect(); int indexOfThis = tf.indexOf("this"); for (Vector v : localTfIdfs) { Assert.assertEquals(0.0, v.apply(indexOfThis), 1e-15); } }
@Test public void tfIdfMinimumDocumentFrequency() { // The tests are to check Java compatibility. HashingTF tf = new HashingTF(); @SuppressWarnings("unchecked") JavaRDD<List<String>> documents = jsc.parallelize(Arrays.asList( Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2); JavaRDD<Vector> termFreqs = tf.transform(documents); termFreqs.collect(); IDF idf = new IDF(2); JavaRDD<Vector> tfIdfs = idf.fit(termFreqs).transform(termFreqs); List<Vector> localTfIdfs = tfIdfs.collect(); int indexOfThis = tf.indexOf("this"); for (Vector v : localTfIdfs) { Assert.assertEquals(0.0, v.apply(indexOfThis), 1e-15); } }