/** * {@inheritDoc} */ public double get(int row, int col) { checkIndices(row, col, false); Double val = matrixEntries.get(new Entry(row, col)); return (val == null) ? 0 : val; }
/** * {@inheritDoc} */ public Vector getVector(String word) { Integer index = termToIndex.get(word); if (index == null) return null; // If the matrix hasn't had columns dropped then the returned vector // will be the combination of the word's row and column else if (reduced == null) { // NOTE: the matrix could be asymmetric if the a word has only // appeared on one side of a context (its row or column vector would // never have been set). Therefore, check the index with the matrix // size first. SparseDoubleVector rowVec = (index < cooccurrenceMatrix.rows()) ? cooccurrenceMatrix.getRowVectorUnsafe(index) : new CompactSparseVector(termToIndex.size()); SparseDoubleVector colVec = (index < cooccurrenceMatrix.columns()) ? cooccurrenceMatrix.getColumnVectorUnsafe(index) : new CompactSparseVector(termToIndex.size()); return new ConcatenatedSparseDoubleVector(rowVec, colVec); } // The co-occurrence matrix has had columns dropped so the vector is // just the word's row else { return reduced.getRowVector(index); } }
/** * Returns the column vector, locking the data if {@code shouldLock} is * {@code true}. */ private SparseDoubleVector getColumnVector(int column, boolean shouldLock) { int r = rows.get(); if (shouldLock) lockColumn(column, r); // Ensure that the column data is up to date while (lastVectorCacheUpdate.get() != modifications.get()) updateVectorCache(); int[] rowArr = colToRowsCache[column]; SparseDoubleVector colVec = new SparseHashDoubleVector(r); for (int row : rowArr) colVec.set(row, matrixEntries.get(new Entry(row, column))); if (shouldLock) unlockColumn(column, r); return colVec; }
/** * {@inheritDoc} */ public double[][] toDenseArray() { int r = rows.get(); int c = cols.get(); for (int i = 0; i < r; ++i) lockRow(i, c); double[][] m = new double[r][0]; for (int i = 0; i < r; ++i) { DoubleVector row = getRowVector(i); // Ensure that we see a consistent length for all the rows if (row.length() != c) row = Vectors.subview(row, 0, c); m[i] = row.toArray(); } for (int i = 0; i < r; ++i) unlockRow(i, c); return m; }
/** * {@inheritDoc} */ public void setColumn(int column, DoubleVector rowValues) { checkIndices(rowValues.length(), column, true); int r = rows.get(); lockColumn(column, r); boolean modified = false; for (int row = 0; row < r; ++row) { double val = rowValues.get(row); Entry e = new Entry(row, column); boolean present = matrixEntries.containsKey(e); if (val != 0) { matrixEntries.put(e, val); // Only invalidate the cache if the number of rows or columns // containing data has changed modified = modified || !present; } else if (present) { matrixEntries.remove(e); modified = true; } } if (modified) modifications.incrementAndGet(); unlockColumn(column, r); }
/** * {@inheritDoc} */ public void setRow(int row, DoubleVector colValues) { checkIndices(row, colValues.length(), true); int c = cols.get(); lockRow(row, c); boolean modified = false; for (int col = 0; col < c; ++col) { double val = colValues.get(col); Entry e = new Entry(row, col); boolean present = matrixEntries.containsKey(e); if (val != 0) { matrixEntries.put(e, val); // Only invalidate the cache if the number of rows or columns // containing data has changed modified = modified || !present; } else if (present) { matrixEntries.remove(e); modified = true; } } if (modified) modifications.incrementAndGet(); unlockRow(row, c); }
/** * Provides non-atomic access to the data at the specified column, which may * present an inconsistent view of the data if this matrix is being * concurrently modified. This method should only be used in special cases * where the vector is being accessed at a time when the matrix (or this * particular column) will not be modified. */ public SparseDoubleVector getColumnVectorUnsafe(int column) { return getColumnVector(column, false); }
/** * {@inheritDoc} The length of the returned row vector reflects the size of * matrix at the time of the call, which may be different from earlier calls * to {@link #columns()}. */ public SparseDoubleVector getRowVector(int row) { return getRowVector(row, true); }
/** * Constructs a new instance using the provided properties for * configuration. */ public HyperspaceAnalogueToLanguage(Properties properties) { cooccurrenceMatrix = new AtomicGrowingSparseHashMatrix(); reduced = null; termToIndex = new ConcurrentHashMap<String,Integer>(); wordIndexCounter = 0; String windowSizeProp = properties.getProperty(WINDOW_SIZE_PROPERTY); windowSize = (windowSizeProp != null) ? Integer.parseInt(windowSizeProp) : DEFAULT_WINDOW_SIZE; String weightFuncProp = properties.getProperty(WEIGHTING_FUNCTION_PROPERTY); weighting = (weightFuncProp == null) ? DEFAULT_WEIGHTING : loadWeightingFunction(weightFuncProp); }
cooccurrenceMatrix.addAndGet(p.x, p.y, e.getValue());
SparseDoubleVector sv = cooccurrenceMatrix.getRowVector(row); for (int col : sv.getNonZeroIndices()) { double v = cooccurrenceMatrix.get(row, col);
/** * {@inheritDoc} */ public void processSpace(Properties properties) { // Ensure that the bottom right corner of the matrix has a valid value // so that we always create a 2 * n set of dimensions in the default // case. if (cooccurrenceMatrix.get(termToIndex.numDimensions() - 1, termToIndex.numDimensions() - 1) == 0d) cooccurrenceMatrix.set(termToIndex.numDimensions() - 1, termToIndex.numDimensions() - 1, 0d); if (columnThreshold > -1d) thresholdColumns(columnThreshold); if (retainColumns > 0) retainOnly(retainColumns); }
/** * {@inheritDoc} */ public Vector getVector(String word) { Integer index = termToIndex.getDimension(word); if (index == null) return null; // If the matrix hasn't had columns dropped then the returned vector // will be the combination of the word's row and column else if (reduced == null) { // NOTE: the matrix could be asymmetric if the a word has only // appeared on one side of a context (its row or column vector would // never have been set). Therefore, check the index with the matrix // size first. SparseDoubleVector rowVec = (index < cooccurrenceMatrix.rows()) ? cooccurrenceMatrix.getRowVectorUnsafe(index) : new CompactSparseVector(termToIndex.numDimensions()); SparseDoubleVector colVec = (index < cooccurrenceMatrix.columns()) ? cooccurrenceMatrix.getColumnVectorUnsafe(index) : new CompactSparseVector(termToIndex.numDimensions()); return new ConcatenatedSparseDoubleVector(rowVec, colVec); } // The co-occurrence matrix has had columns dropped so the vector is // just the word's row return reduced.getRowVector(index); }
/** * Returns the column vector, locking the data if {@code shouldLock} is * {@code true}. */ private SparseDoubleVector getColumnVector(int column, boolean shouldLock) { int r = rows.get(); if (shouldLock) lockColumn(column, r); // Ensure that the column data is up to date while (lastVectorCacheUpdate.get() != modifications.get()) updateVectorCache(); int[] rowArr = colToRowsCache[column]; SparseDoubleVector colVec = new SparseHashDoubleVector(r); for (int row : rowArr) colVec.set(row, matrixEntries.get(new Entry(row, column))); if (shouldLock) unlockColumn(column, r); return colVec; }
/** * {@inheritDoc} */ public double[][] toDenseArray() { int r = rows.get(); int c = cols.get(); for (int i = 0; i < r; ++i) lockRow(i, c); double[][] m = new double[r][0]; for (int i = 0; i < r; ++i) { DoubleVector row = getRowVector(i); // Ensure that we see a consistent length for all the rows if (row.length() != c) row = Vectors.subview(row, 0, c); m[i] = row.toArray(); } for (int i = 0; i < r; ++i) unlockRow(i, c); return m; }
/** * {@inheritDoc} */ public void setColumn(int column, DoubleVector rowValues) { checkIndices(rowValues.length(), column, true); int r = rows.get(); lockColumn(column, r); boolean modified = false; for (int row = 0; row < r; ++row) { double val = rowValues.get(row); Entry e = new Entry(row, column); boolean present = matrixEntries.containsKey(e); if (val != 0) { matrixEntries.put(e, val); // Only invalidate the cache if the number of rows or columns // containing data has changed modified = modified || !present; } else if (present) { matrixEntries.remove(e); modified = true; } } if (modified) modifications.incrementAndGet(); unlockColumn(column, r); }
/** * {@inheritDoc} */ public void setRow(int row, DoubleVector colValues) { checkIndices(row, colValues.length(), true); int c = cols.get(); lockRow(row, c); boolean modified = false; for (int col = 0; col < c; ++col) { double val = colValues.get(col); Entry e = new Entry(row, col); boolean present = matrixEntries.containsKey(e); if (val != 0) { matrixEntries.put(e, val); // Only invalidate the cache if the number of rows or columns // containing data has changed modified = modified || !present; } else if (present) { matrixEntries.remove(e); modified = true; } } if (modified) modifications.incrementAndGet(); unlockRow(row, c); }