@Test public void testEnumeration() { double[] apriori = {0, 1, 2, 3, 4}; doTestEnumeration(apriori, new VectorView(new DenseVector(new double[]{ -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), 2, 5)); doTestEnumeration(apriori, new DenseVector(new double[]{0, 1, 2, 3, 4})); Vector sparse = new RandomAccessSparseVector(5); sparse.set(0, 0); sparse.set(1, 1); sparse.set(2, 2); sparse.set(3, 3); sparse.set(4, 4); doTestEnumeration(apriori, sparse); sparse = new SequentialAccessSparseVector(5); sparse.set(0, 0); sparse.set(1, 1); sparse.set(2, 2); sparse.set(3, 3); sparse.set(4, 4); doTestEnumeration(apriori, sparse); }
RandomAccessSparseVector randomAccessLeft = new RandomAccessSparseVector(3); Vector sequentialAccessLeft = new SequentialAccessSparseVector(3); Vector right = new DenseVector(3); randomAccessLeft.setQuick(0, 1); randomAccessLeft.setQuick(1, 2); randomAccessLeft.setQuick(2, 3); sequentialAccessLeft.setQuick(0,1); sequentialAccessLeft.setQuick(1,2); sequentialAccessLeft.setQuick(2,3); Vector rightBar = new RandomAccessSparseVector(3); assertFalse(randomAccessLeft.equals(right)); right = new DenseVector(4); assertFalse(randomAccessLeft.equals(right)); randomAccessLeft = new RandomAccessSparseVector(2); randomAccessLeft.setQuick(0, 1); randomAccessLeft.setQuick(1, 2); assertFalse(randomAccessLeft.equals(right)); RandomAccessSparseVector sparse = new RandomAccessSparseVector(3); randomAccessLeft = new RandomAccessSparseVector(3); sparse.setQuick(0, 1); sparse.setQuick(1, 2); sparse.setQuick(2, 3); randomAccessLeft.setQuick(0, 1); randomAccessLeft.setQuick(1, 2); randomAccessLeft.setQuick(2, 3);
/** * Copy the vector for fast operations. * * @return a Vector */ @Override protected Vector createOptimizedCopy() { return new RandomAccessSparseVector(size()).assign(this); }
@Override public RandomAccessSparseVector clone() { return new RandomAccessSparseVector(size(), values.clone()); }
@Override protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException { int row = r.get(); for (Vector.Element e : v.get().nonZeroes()) { RandomAccessSparseVector tmp = new RandomAccessSparseVector(newNumCols, 1); tmp.setQuick(row, e.get()); r.set(e.index()); ctx.write(r, new VectorWritable(tmp)); } } }
@Override protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx) throws IOException, InterruptedException { Vector sampledRowVector = sampleDown(vectorWritable.get(), ctx); Vector rowVector = similarity.normalize(sampledRowVector); int numNonZeroEntries = 0; double maxValue = Double.MIN_VALUE; for (Vector.Element element : rowVector.nonZeroes()) { RandomAccessSparseVector partialColumnVector = new RandomAccessSparseVector(Integer.MAX_VALUE); partialColumnVector.setQuick(row.get(), element.get()); ctx.write(new IntWritable(element.index()), new VectorWritable(partialColumnVector)); numNonZeroEntries++; if (maxValue < element.get()) { maxValue = element.get(); } } if (threshold != NO_THRESHOLD) { nonZeroEntries.setQuick(row.get(), numNonZeroEntries); maxValues.setQuick(row.get(), maxValue); } norms.setQuick(row.get(), similarity.norm(rowVector)); ctx.getCounter(Counters.ROWS).increment(1); }
@Override protected void map(IntWritable index, VectorWritable value, Context ctx) throws IOException, InterruptedException { Vector instance = value.get(); if (weightsPerFeature == null) { weightsPerFeature = new RandomAccessSparseVector(instance.size(), instance.getNumNondefaultElements()); } int label = index.get(); weightsPerFeature.assign(instance, Functions.PLUS); weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum()); }
/** * write a two-dimensional double array to an SequenceFile<IntWritable,VectorWritable> */ public static void writeDistributedRowMatrix(double[][] entries, FileSystem fs, Configuration conf, Path path) throws IOException { SequenceFile.Writer writer = null; try { writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); for (int n = 0; n < entries.length; n++) { Vector v = new RandomAccessSparseVector(entries[n].length); for (int m = 0; m < entries[n].length; m++) { v.setQuick(m, entries[n][m]); } writer.append(new IntWritable(n), new VectorWritable(v)); } } finally { Closeables.close(writer, false); } }
@Override protected void reduce(IntWritable row, Iterable<DistributedRowMatrix.MatrixEntryWritable> values, Context context) throws IOException, InterruptedException { int size = context.getConfiguration().getInt(Keys.AFFINITY_DIMENSIONS, Integer.MAX_VALUE); RandomAccessSparseVector out = new RandomAccessSparseVector(size, 100); for (DistributedRowMatrix.MatrixEntryWritable element : values) { out.setQuick(element.getCol(), element.getVal()); if (log.isDebugEnabled()) { log.debug("(DEBUG - REDUCE) Row[{}], Column[{}], Value[{}]", row.get(), element.getCol(), element.getVal()); } } SequentialAccessSparseVector output = new SequentialAccessSparseVector(out); context.write(row, new VectorWritable(output)); } }
@Test public void testVectorDistanceInvertedMapper() throws Exception { Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable>.Context context = EasyMock.createMock(Mapper.Context.class); Vector expectVec = new DenseVector(new double[]{Math.sqrt(2.0), 1.0}); context.write(new Text("other"), new VectorWritable(expectVec)); EasyMock.replay(context); Vector vector = new NamedVector(new RandomAccessSparseVector(2), "other"); vector.set(0, 2); vector.set(1, 2); VectorDistanceInvertedMapper mapper = new VectorDistanceInvertedMapper(); setField(mapper, "measure", new EuclideanDistanceMeasure()); Collection<NamedVector> seedVectors = Lists.newArrayList(); Vector seed1 = new RandomAccessSparseVector(2); seed1.set(0, 1); seed1.set(1, 1); Vector seed2 = new RandomAccessSparseVector(2); seed2.set(0, 2); seed2.set(1, 1); seedVectors.add(new NamedVector(seed1, "foo")); seedVectors.add(new NamedVector(seed2, "foo2")); setField(mapper, "seedVectors", seedVectors); mapper.map(new IntWritable(123), new VectorWritable(vector), context); EasyMock.verify(context); }
@Override protected void map(VarLongWritable rowIndex, VectorWritable vectorWritable, Context ctx) throws IOException, InterruptedException { Vector userRatings = vectorWritable.get(); int column = TasteHadoopUtils.idToIndex(rowIndex.get()); itemVectorWritable.setWritesLaxPrecision(true); Vector itemVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1); for (Vector.Element elem : userRatings.nonZeroes()) { itemID.set(elem.index()); itemVector.setQuick(column, elem.get()); itemVectorWritable.set(itemVector); ctx.write(itemID, itemVectorWritable); // reset vector for reuse itemVector.setQuick(elem.index(), 0.0); } }
Vector v1 = v0.assign(new Normal(0, 1, gen)); assertEquals(v0.get(12), v1.get(12), 0); v0.set(12, gen.nextDouble()); assertEquals(v0.get(12), v1.get(12), 0); Vector v2 = vectorToTest(20).assign(new Normal(0, 1, gen)); Vector dv1 = new DenseVector(v1); Vector dv2 = new DenseVector(v2); Vector sv1 = new RandomAccessSparseVector(v1); Vector sv2 = new RandomAccessSparseVector(v2); assertEquals(0, dv1.plus(dv2).getDistanceSquared(v1.plus(v2)), FUZZ);
Vector toSave = new DenseVector(VECTOR); DummyRecordWriter<IntWritable, VectorWritable> writer = new DummyRecordWriter<IntWritable, VectorWritable>(); Vector v = new RandomAccessSparseVector(MATRIX[i].length); v.assign(MATRIX[i]); mapper.map(new IntWritable(i), new VectorWritable(v), context); List<VectorWritable> list = writer.getValue(new IntWritable(i)); assertEquals("Only one vector per key", 1, list.size()); Vector v = list.get(0).get(); for (int j = 0; j < MATRIX[i].length; j++) { double total = Math.sqrt(VECTOR[i]) * Math.sqrt(VECTOR[j]) * MATRIX[i][j]; assertEquals("Product matrix elements", total, v.get(j),EPSILON);
RandomAccessSparseVector toAdd = new RandomAccessSparseVector(RAW_DIMENSIONS); toAdd.assign(RAW[i]); mapper.map(new IntWritable(i), new VectorWritable(toAdd), mapContext); Vector v = list.get(0).get(); for (int i = 0; i < v.size(); i++) { assertEquals("Element sum is correct", rowSum(RAW[i]), v.get(i),0.01);
public void reduce(IntWritable itemIndex1, Iterable<IntWritable> itemIndex2s, Context context) throws IOException, InterruptedException { Vector cooccurrenceRow = new RandomAccessSparseVector( Integer.MAX_VALUE, 100); for (IntWritable intWritable : itemIndex2s) { int itemIndex2 = intWritable.get(); cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0); } context.write(itemIndex1, new VectorWritable(cooccurrenceRow)); } }
/** * tests {@link SimilarityMatrixRowWrapperMapper} */ @Test public void testSimilarityMatrixRowWrapperMapper() throws Exception { Mapper<IntWritable,VectorWritable,VarIntWritable,VectorOrPrefWritable>.Context context = EasyMock.createMock(Mapper.Context.class); context.write(EasyMock.eq(new VarIntWritable(12)), vectorOfVectorOrPrefWritableMatches(MathHelper.elem(34, 0.5), MathHelper.elem(56, 0.7))); EasyMock.replay(context); RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); vector.set(12, 1.0); vector.set(34, 0.5); vector.set(56, 0.7); new SimilarityMatrixRowWrapperMapper().map(new IntWritable(12), new VectorWritable(vector), context); EasyMock.verify(context); }
@Test public void testMatrixDiagonalizeMapper() throws Exception { MatrixDiagonalizeMapper mapper = new MatrixDiagonalizeMapper(); Configuration conf = getConfiguration(); conf.setInt(Keys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS); // set up the dummy writers DummyRecordWriter<NullWritable, IntDoublePairWritable> writer = new DummyRecordWriter<NullWritable, IntDoublePairWritable>(); Mapper<IntWritable, VectorWritable, NullWritable, IntDoublePairWritable>.Context context = DummyRecordWriter.build(mapper, conf, writer); // perform the mapping for (int i = 0; i < RAW_DIMENSIONS; i++) { RandomAccessSparseVector toAdd = new RandomAccessSparseVector(RAW_DIMENSIONS); toAdd.assign(RAW[i]); mapper.map(new IntWritable(i), new VectorWritable(toAdd), context); } // check the number of the results assertEquals("Number of map results", RAW_DIMENSIONS, writer.getValue(NullWritable.get()).size()); }
@Test public void testMax() { Vector vec1 = new RandomAccessSparseVector(3); double max = vec1.maxValue(); assertEquals(0, idx); vec1 = new RandomAccessSparseVector(3); vec1.setQuick(0, -1); assertEquals(1, idx); vec1 = new DenseVector(3); vec1.setQuick(0, -1); assertEquals(1, idx); vec1 = new RandomAccessSparseVector(3); max = vec1.maxValue(); assertEquals(0.0, max, EPSILON); assertEquals(0.0, max, EPSILON); vec1 = new RandomAccessSparseVector(0); max = vec1.maxValue(); assertEquals(Double.NEGATIVE_INFINITY, max, EPSILON);
public static Vector concatenate(Vector vector, double number){ Vector con = null; if (vector instanceof DenseVector){ con = new DenseVector(vector.size()+1); } if (vector instanceof RandomAccessSparseVector){ con = new RandomAccessSparseVector(vector.size()+1); } if (vector instanceof SequentialAccessSparseVector){ con = new SequentialAccessSparseVector(vector.size()+1); } for (Vector.Element nonZeros: vector.nonZeroes()){ int index = nonZeros.index(); double value = nonZeros.get(); con.set(index, value); } con.set(con.size()-1,number); return con; }