org.apache.mahout.math.hadoop.stochasticsvd.qr.GivensThinSolver java code examples

protected void cleanup() throws IOException {
 try {
  if (qSolver == null && yLookahead.isEmpty()) {
   return;
  }
  if (qSolver == null) {
   qSolver = new GivensThinSolver(yLookahead.size(), kp);
  }
  // grow q solver up if necessary
  qSolver.adjust(qSolver.getCnt() + yLookahead.size());
  while (!yLookahead.isEmpty()) {
   qSolver.appendRow(yLookahead.remove(0));
  }
  assert qSolver.isFull();
  if (++blockCnt > 1) {
   flushSolver();
   assert tempQw != null;
   closeables.remove(tempQw);
   Closeables.close(tempQw, false);
  }
  flushQBlocks();
 } finally {
  IOUtils.close(closeables);
 }
}

 givens(vARow[0], getRRow(0)[0], cs);
 applyGivensInPlace(cs[0], cs[1], vARow, getRRow(0), 0, n);
 applyGivensInPlace(cs[0], cs[1], vQtRow, getQtRow(0), 0, m);
 givens(getRRow(i - 1)[i], getRRow(i)[i], cs);
 applyGivensInPlace(cs[0], cs[1], getRRow(i - 1), getRRow(i), i,
   n - i);
 applyGivensInPlace(cs[0], cs[1], getQtRow(i - 1), getQtRow(i), 0,
   m);
pushQtDown();
double[] swap = getQtRow(0);
setQtRow(0, vQtRow);
vQtRow = swap;
pushRDown();
swap = getRRow(0);
setRRow(0, vARow);
vARow = swap;

public static void mergeR(double[][] r1, double[][] r2) {
 int kp = r1[0].length;
 assert kp == r2[0].length;
 double[] cs = new double[2];
 for (int v = 0; v < kp; v++) {
  for (int u = v; u < kp; u++) {
   givens(r1[u][u], r2[u - v][u], cs);
   applyGivensInPlace(cs[0], cs[1], r1[u], r2[u - v], u, kp - u);
  }
 }
}

private void flushSolver() throws IOException {
 UpperTriangular r = qSolver.getRTilde();
 double[][] qt = qSolver.getThinQtTilde();
 rSubseq.add(r);
 value.setBlock(qt);
 getTempQw().append(tempKey, value);
 /*
  * this probably should be a sparse row matrix, but compressor should get it
  * for disk and in memory we want it dense anyway, sparse random
  * implementations would be a mostly a memory management disaster consisting
  * of rehashes and GC // thrashing. (IMHO)
  */
 value.setBlock(null);
 qSolver.reset();
}

@Test
public void testGivensQR() throws Exception {
 // DenseMatrix m = new DenseMatrix(dims<<2,dims);
 Matrix m = new DenseMatrix(3, 3);
 m.assign(new DoubleFunction() {
  private final Random rnd = RandomUtils.getRandom();
  @Override
  public double apply(double arg0) {
   return rnd.nextDouble() * SCALE;
  }
 });
 m.setQuick(0, 0, 1);
 m.setQuick(0, 1, 2);
 m.setQuick(0, 2, 3);
 m.setQuick(1, 0, 4);
 m.setQuick(1, 1, 5);
 m.setQuick(1, 2, 6);
 m.setQuick(2, 0, 7);
 m.setQuick(2, 1, 8);
 m.setQuick(2, 2, 9);
 GivensThinSolver qrSolver =
  new GivensThinSolver(m.rowSize(), m.columnSize());
 qrSolver.solve(m);
 Matrix qtm = new DenseMatrix(qrSolver.getThinQtTilde());
 assertOrthonormality(qtm.transpose(), false, SVD_EPSILON);
 Matrix aClone =
  new DenseMatrix(qrSolver.getThinQtTilde()).transpose()
                       .times(qrSolver.getRTilde());
 System.out.println("aclone : " + aClone);
}

private void flushQBlocks() throws IOException {
 if (blockCnt == 1) {
  /*
   * only one block, no temp file, no second pass. should be the default
   * mode for efficiency in most cases. Sure mapper should be able to load
   * the entire split in memory -- and we don't require even that.
   */
  value.setBlock(qSolver.getThinQtTilde());
  outputQHat(value);
  outputR(new VectorWritable(new DenseVector(qSolver.getRTilde().getData(),
                        true)));
 } else {
  secondPass();
 }
}

protected void map(Vector incomingYRow) throws IOException {
 double[] yRow;
 if (yLookahead.size() == kp) {
  if (qSolver.isFull()) {
   flushSolver();
   blockCnt++;
  }
  yRow = yLookahead.remove(0);
  qSolver.appendRow(yRow);
 } else {
  yRow = new double[kp];
 }
 if (incomingYRow.isDense()) {
  for (int i = 0; i < kp; i++) {
   yRow[i] = incomingYRow.get(i);
  }
 } else {
  Arrays.fill(yRow, 0);
  for (Element yEl : incomingYRow.nonZeroes()) {
   yRow[yEl.index()] = yEl.get();
  }
 }
 yLookahead.add(yRow);
}

private void secondPass() throws IOException {
 qSolver = null; // release mem
 FileSystem localFs = FileSystem.getLocal(jobConf);
 SequenceFile.Reader tempQr =
  new SequenceFile.Reader(localFs, tempQPath, jobConf);
 closeables.addFirst(tempQr);
 int qCnt = 0;
 while (tempQr.next(tempKey, value)) {
  value
   .setBlock(GivensThinSolver.computeQtHat(value.getBlock(),
                       qCnt,
                       new CopyConstructorIterator<UpperTriangular>(rSubseq
                        .iterator())));
  if (qCnt == 1) {
   /*
    * just merge r[0] <- r[1] so it doesn't have to repeat in subsequent
    * computeQHat iterators
    */
   GivensThinSolver.mergeR(rSubseq.get(0), rSubseq.remove(1));
  } else {
   qCnt++;
  }
  outputQHat(value);
 }
 assert rSubseq.size() == 1;
 outputR(new VectorWritable(new DenseVector(rSubseq.get(0).getData(), true)));
}

protected void setup() {
 int r = Integer.parseInt(jobConf.get(PROP_AROWBLOCK_SIZE));
 int k = Integer.parseInt(jobConf.get(PROP_K));
 int p = Integer.parseInt(jobConf.get(PROP_P));
 kp = k + p;
 yLookahead = Lists.newArrayListWithCapacity(kp);
 qSolver = new GivensThinSolver(r, kp);
 outputs = new MultipleOutputs(new JobConf(jobConf));
 closeables.addFirst(new Closeable() {
  @Override
  public void close() throws IOException {
   outputs.close();
  }
 });
}

public UpperTriangular getRTilde() {
 UpperTriangular packedR = new UpperTriangular(n);
 for (int i = 0; i < n; i++) {
  packedR.assignNonZeroElementsInRow(i, getRRow(i));
 }
 return packedR;
}

public void solve(Matrix a) {
 assert a.rowSize() == m;
 assert a.columnSize() == n;
 double[] aRow = new double[n];
 for (int i = 0; i < m; i++) {
  Vector aRowV = a.viewRow(i);
  for (int j = 0; j < n; j++) {
   aRow[j] = aRowV.getQuick(j);
  }
  appendRow(aRow);
 }
}

public void trim() {
 adjust(cnt);
}

private boolean loadNextQt() {
 boolean more = qHatInput.hasNext();
 if (!more) {
  return false;
 }
 DenseBlockWritable v = qHatInput.next();
 mQt =
  GivensThinSolver
   .computeQtHat(v.getBlock(),
          blockNum == 0 ? 0 : 1,
          new CopyConstructorIterator<>(mRs.iterator()));
 r = mQt[0].length;
 kp = mQt.length;
 if (qRow == null) {
  qRow = new DenseVector(kp);
 }
 return true;
}

public static void mergeR(double[][] r1, double[][] r2) {
 int kp = r1[0].length;
 assert kp == r2[0].length;
 double[] cs = new double[2];
 for (int v = 0; v < kp; v++) {
  for (int u = v; u < kp; u++) {
   givens(r1[u][u], r2[u - v][u], cs);
   applyGivensInPlace(cs[0], cs[1], r1[u], r2[u - v], u, kp - u);
  }
 }
}

private void flushSolver() throws IOException {
 UpperTriangular r = qSolver.getRTilde();
 double[][] qt = qSolver.getThinQtTilde();
 rSubseq.add(r);
 value.setBlock(qt);
 getTempQw().append(tempKey, value);
 /*
  * this probably should be a sparse row matrix, but compressor should get it
  * for disk and in memory we want it dense anyway, sparse random
  * implementations would be a mostly a memory management disaster consisting
  * of rehashes and GC // thrashing. (IMHO)
  */
 value.setBlock(null);
 qSolver.reset();
}

private void flushQBlocks() throws IOException {
 if (blockCnt == 1) {
  /*
   * only one block, no temp file, no second pass. should be the default
   * mode for efficiency in most cases. Sure mapper should be able to load
   * the entire split in memory -- and we don't require even that.
   */
  value.setBlock(qSolver.getThinQtTilde());
  outputQHat(value);
  outputR(new VectorWritable(new DenseVector(qSolver.getRTilde().getData(),
                        true)));
 } else {
  secondPass();
 }
}

protected void map(Vector incomingYRow) throws IOException {
 double[] yRow;
 if (yLookahead.size() == kp) {
  if (qSolver.isFull()) {
   flushSolver();
   blockCnt++;
  }
  yRow = yLookahead.remove(0);
  qSolver.appendRow(yRow);
 } else {
  yRow = new double[kp];
 }
 if (incomingYRow.isDense()) {
  for (int i = 0; i < kp; i++) {
   yRow[i] = incomingYRow.get(i);
  }
 } else {
  Arrays.fill(yRow, 0);
  for (Element yEl : incomingYRow.nonZeroes()) {
   yRow[yEl.index()] = yEl.get();
  }
 }
 yLookahead.add(yRow);
}

private void secondPass() throws IOException {
 qSolver = null; // release mem
 FileSystem localFs = FileSystem.getLocal(jobConf);
 SequenceFile.Reader tempQr =
  new SequenceFile.Reader(localFs, tempQPath, jobConf);
 closeables.addFirst(tempQr);
 int qCnt = 0;
 while (tempQr.next(tempKey, value)) {
  value
   .setBlock(GivensThinSolver.computeQtHat(value.getBlock(),
                       qCnt,
                       new CopyConstructorIterator<>(rSubseq.iterator())));
  if (qCnt == 1) {
   /*
    * just merge r[0] <- r[1] so it doesn't have to repeat in subsequent
    * computeQHat iterators
    */
   GivensThinSolver.mergeR(rSubseq.get(0), rSubseq.remove(1));
  } else {
   qCnt++;
  }
  outputQHat(value);
 }
 assert rSubseq.size() == 1;
 outputR(new VectorWritable(new DenseVector(rSubseq.get(0).getData(), true)));
}

protected void setup() {
 int r = Integer.parseInt(jobConf.get(PROP_AROWBLOCK_SIZE));
 int k = Integer.parseInt(jobConf.get(PROP_K));
 int p = Integer.parseInt(jobConf.get(PROP_P));
 kp = k + p;
 yLookahead = Lists.newArrayListWithCapacity(kp);
 qSolver = new GivensThinSolver(r, kp);
 outputs = new MultipleOutputs(new JobConf(jobConf));
 closeables.addFirst(new Closeable() {
  @Override
  public void close() throws IOException {
   outputs.close();
  }
 });
}

public UpperTriangular getRTilde() {
 UpperTriangular packedR = new UpperTriangular(n);
 for (int i = 0; i < n; i++) {
  packedR.assignNonZeroElementsInRow(i, getRRow(i));
 }
 return packedR;
}

Javadoc

Givens Thin solver. Standard Givens operations are reordered in a way that helps us to push them thru MapReduce operations in a block fashion.

Most used methods

Popular in Java

Making http post requests using okhttp
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
notifyDataSetChanged (ArrayAdapter)
scheduleAtFixedRate (Timer)
PrintStream (java.io)
Fake signature of an existing Java class.
URL (java.net)
A Uniform Resource Locator that identifies the location of an Internet resource as specified by RFC
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
Best IntelliJ plugins

How to useGivensThinSolver in org.apache.mahout.math.hadoop.stochasticsvd.qr

Best Java code snippets using org.apache.mahout.math.hadoop.stochasticsvd.qr.GivensThinSolver (Showing top 20 results out of 315)

How to use
GivensThinSolver
in
org.apache.mahout.math.hadoop.stochasticsvd.qr