@Override protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx) throws IOException, InterruptedException { Vector similarities = similaritiesWritable.get(); // For performance, the creation of transposedPartial is moved out of the while loop and it is reused inside Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1); TopElementsQueue topKQueue = new TopElementsQueue(maxSimilaritiesPerRow); for (Element nonZeroElement : similarities.nonZeroes()) { MutableElement top = topKQueue.top(); double candidateValue = nonZeroElement.get(); if (candidateValue > top.get()) { top.setIndex(nonZeroElement.index()); top.set(candidateValue); topKQueue.updateTop(); } transposedPartial.setQuick(row.get(), candidateValue); ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial)); transposedPartial.setQuick(row.get(), 0.0); } Vector topKSimilarities = new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow); for (Vector.Element topKSimilarity : topKQueue.getTopElements()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } ctx.write(row, new VectorWritable(topKSimilarities)); } }
@Override protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx) throws IOException, InterruptedException { Vector similarities = similaritiesWritable.get(); // For performance, the creation of transposedPartial is moved out of the while loop and it is reused inside Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1); TopElementsQueue topKQueue = new TopElementsQueue(maxSimilaritiesPerRow); for (Element nonZeroElement : similarities.nonZeroes()) { MutableElement top = topKQueue.top(); double candidateValue = nonZeroElement.get(); if (candidateValue > top.get()) { top.setIndex(nonZeroElement.index()); top.set(candidateValue); topKQueue.updateTop(); } transposedPartial.setQuick(row.get(), candidateValue); ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial)); transposedPartial.setQuick(row.get(), 0.0); } Vector topKSimilarities = new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow); for (Vector.Element topKSimilarity : topKQueue.getTopElements()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } ctx.write(row, new VectorWritable(topKSimilarities)); } }
@Override protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx) throws IOException, InterruptedException { Vector similarities = similaritiesWritable.get(); // For performance, the creation of transposedPartial is moved out of the while loop and it is reused inside Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1); TopElementsQueue topKQueue = new TopElementsQueue(maxSimilaritiesPerRow); for (Element nonZeroElement : similarities.nonZeroes()) { MutableElement top = topKQueue.top(); double candidateValue = nonZeroElement.get(); if (candidateValue > top.get()) { top.setIndex(nonZeroElement.index()); top.set(candidateValue); topKQueue.updateTop(); } transposedPartial.setQuick(row.get(), candidateValue); ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial)); transposedPartial.setQuick(row.get(), 0.0); } Vector topKSimilarities = new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow); for (Vector.Element topKSimilarity : topKQueue.getTopElements()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } ctx.write(row, new VectorWritable(topKSimilarities)); } }
public static Vector topKElements(int k, Vector original) { if (original.getNumNondefaultElements() <= k) { return original; } TopElementsQueue topKQueue = new TopElementsQueue(k); for (Element nonZeroElement : original.nonZeroes()) { MutableElement top = topKQueue.top(); double candidateValue = nonZeroElement.get(); if (candidateValue > top.get()) { top.setIndex(nonZeroElement.index()); top.set(candidateValue); topKQueue.updateTop(); } } Vector topKSimilarities = new RandomAccessSparseVector(original.size(), k); for (Vector.Element topKSimilarity : topKQueue.getTopElements()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } return topKSimilarities; }
public static Vector topKElements(int k, Vector original) { if (original.getNumNondefaultElements() <= k) { return original; } TopElementsQueue topKQueue = new TopElementsQueue(k); for (Element nonZeroElement : original.nonZeroes()) { MutableElement top = topKQueue.top(); double candidateValue = nonZeroElement.get(); if (candidateValue > top.get()) { top.setIndex(nonZeroElement.index()); top.set(candidateValue); topKQueue.updateTop(); } } Vector topKSimilarities = new RandomAccessSparseVector(original.size(), k); for (Vector.Element topKSimilarity : topKQueue.getTopElements()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } return topKSimilarities; }
public static Vector topKElements(int k, Vector original) { if (original.getNumNondefaultElements() <= k) { return original; } TopElementsQueue topKQueue = new TopElementsQueue(k); for (Element nonZeroElement : original.nonZeroes()) { MutableElement top = topKQueue.top(); double candidateValue = nonZeroElement.get(); if (candidateValue > top.get()) { top.setIndex(nonZeroElement.index()); top.set(candidateValue); topKQueue.updateTop(); } } Vector topKSimilarities = new RandomAccessSparseVector(original.size(), k); for (Vector.Element topKSimilarity : topKQueue.getTopElements()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } return topKSimilarities; }