public static void main(String[] args) throws Exception { ToolRunner.run(new DistributedLanczosSolver().job(), args); } }
public LanczosState runJob(Configuration originalConfig, LanczosState state, int desiredRank, boolean isSymmetric, String outputEigenVectorPathString) throws IOException { ((Configurable) state.getCorpus()).setConf(new Configuration(originalConfig)); setConf(originalConfig); solve(state, desiredRank, isSymmetric); serializeOutput(state, new Path(outputEigenVectorPathString)); return state; }
@Override public Configuration getConf() { return DistributedLanczosSolver.this.getConf(); }
int desiredRank) throws Exception { DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols); matrix.setConf(new Configuration(getConf() != null ? getConf() : new Configuration())); state = new LanczosState(matrix, desiredRank, getInitialVector(matrix)); } else { HdfsBackedLanczosState hState = new HdfsBackedLanczosState(matrix, desiredRank, getInitialVector(matrix), workingDirPath); hState.setConf(matrix.getConf()); state = hState; solve(state, desiredRank, isSymmetric); serializeOutput(state, outputEigenVectorPath); return 0;
public void doTestResumeIteration(boolean symmetric) throws IOException { DistributedRowMatrix corpus = getCorpus(symmetric); Configuration conf = getConfiguration(); corpus.setConf(conf); DistributedLanczosSolver solver = new DistributedLanczosSolver(); int rank = 10; Vector intitialVector = DistributedLanczosSolver.getInitialVector(corpus); HdfsBackedLanczosState state = new HdfsBackedLanczosState(corpus, rank, intitialVector, new Path(getTestTempDirPath(), "lanczosStateDir" + suf(symmetric) + counter)); solver.solve(state, rank, symmetric); rank *= 2; state = new HdfsBackedLanczosState(corpus, rank, intitialVector, new Path(getTestTempDirPath(), "lanczosStateDir" + suf(symmetric) + counter)); solver = new DistributedLanczosSolver(); solver.solve(state, rank, symmetric); LanczosState allAtOnceState = doTestDistributedLanczosSolver(symmetric, rank, false); for (int i=0; i<state.getIterationNumber(); i++) { Vector v = state.getBasisVector(i).normalize(); Vector w = allAtOnceState.getBasisVector(i).normalize(); double diff = v.minus(w).norm(2); assertTrue("basis " + i + " is too long: " + diff, diff < 0.1); } counter++; }
DistributedLanczosSolver solver = new DistributedLanczosSolver(); LanczosState state = new LanczosState(L, overshoot, DistributedLanczosSolver.getInitialVector(L)); Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors"); solver.runJob(conf, state, overshoot, true, lanczosSeqFiles.toString());
/** * Factored-out LanczosSolver for the purpose of invoking it programmatically */ public LanczosState runJob(Configuration originalConfig, Path inputPath, Path outputTmpPath, int numRows, int numCols, boolean isSymmetric, int desiredRank, String outputEigenVectorPathString) throws IOException { DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols); matrix.setConf(new Configuration(originalConfig)); LanczosState state = new LanczosState(matrix, desiredRank, getInitialVector(matrix)); return runJob(originalConfig, state, desiredRank, isSymmetric, outputEigenVectorPathString); }
ToolRunner.run(new DistributedLanczosSolver().job(), args);
@Override public int run(String[] strings) throws Exception { Path inputPath = new Path(AbstractJob.getOption(parsedArgs, "--input")); Path outputPath = new Path(AbstractJob.getOption(parsedArgs, "--output")); Path outputTmpPath = new Path(AbstractJob.getOption(parsedArgs, "--tempDir")); Path workingDirPath = AbstractJob.getOption(parsedArgs, "--workingDir") != null ? new Path(AbstractJob.getOption(parsedArgs, "--workingDir")) : null; int numRows = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numRows")); int numCols = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numCols")); boolean isSymmetric = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--symmetric")); int desiredRank = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--rank")); boolean cleansvd = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--cleansvd")); if (cleansvd) { double maxError = Double.parseDouble(AbstractJob.getOption(parsedArgs, "--maxError")); double minEigenvalue = Double.parseDouble(AbstractJob.getOption(parsedArgs, "--minEigenvalue")); boolean inMemory = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--inMemory")); return run(inputPath, outputPath, outputTmpPath, workingDirPath, numRows, numCols, isSymmetric, desiredRank, maxError, minEigenvalue, inMemory); } return run(inputPath, outputPath, outputTmpPath, workingDirPath, numRows, numCols, isSymmetric, desiredRank); }
@Override public void setConf(Configuration conf) { DistributedLanczosSolver.this.setConf(conf); }
int desiredRank) throws Exception { DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols); matrix.setConf(new Configuration(getConf() != null ? getConf() : new Configuration())); state = new LanczosState(matrix, desiredRank, getInitialVector(matrix)); } else { HdfsBackedLanczosState hState = new HdfsBackedLanczosState(matrix, desiredRank, getInitialVector(matrix), workingDirPath); hState.setConf(matrix.getConf()); state = hState; solve(state, desiredRank, isSymmetric); serializeOutput(state, outputEigenVectorPath); return 0;
private LanczosState doTestDistributedLanczosSolver(boolean symmetric, int desiredRank, boolean hdfsBackedState) throws IOException { DistributedRowMatrix corpus = getCorpus(symmetric); Configuration conf = getConfiguration(); corpus.setConf(conf); DistributedLanczosSolver solver = new DistributedLanczosSolver(); Vector intitialVector = DistributedLanczosSolver.getInitialVector(corpus); LanczosState state; if (hdfsBackedState) { HdfsBackedLanczosState hState = new HdfsBackedLanczosState(corpus, desiredRank, intitialVector, new Path(getTestTempDirPath(), "lanczosStateDir" + suf(symmetric) + counter)); hState.setConf(conf); state = hState; } else { state = new LanczosState(corpus, desiredRank, intitialVector); } solver.solve(state, desiredRank, symmetric); SolverTest.assertOrthonormal(state); for (int i = 0; i < desiredRank/2; i++) { SolverTest.assertEigen(i, state.getRightSingularVector(i), corpus, 0.1, symmetric); } counter++; return state; }
DistributedLanczosSolver solver = new DistributedLanczosSolver(); LanczosState state = new LanczosState(L, overshoot, DistributedLanczosSolver.getInitialVector(L)); Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors"); solver.runJob(conf, state, overshoot, true, lanczosSeqFiles.toString());
/** * Factored-out LanczosSolver for the purpose of invoking it programmatically */ public LanczosState runJob(Configuration originalConfig, Path inputPath, Path outputTmpPath, int numRows, int numCols, boolean isSymmetric, int desiredRank, String outputEigenVectorPathString) throws IOException { DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols); matrix.setConf(new Configuration(originalConfig)); LanczosState state = new LanczosState(matrix, desiredRank, getInitialVector(matrix)); return runJob(originalConfig, state, desiredRank, isSymmetric, outputEigenVectorPathString); }
"--workingDir", workingDir.toString() }; ToolRunner.run(getConfiguration(), new DistributedLanczosSolver().new DistributedLanczosSolverJob(), args); "--workingDir", workingDir.toString() }; ToolRunner.run(getConfiguration(), new DistributedLanczosSolver().new DistributedLanczosSolverJob(), args);
@Override public int run(String[] strings) throws Exception { Path inputPath = new Path(AbstractJob.getOption(parsedArgs, "--input")); Path outputPath = new Path(AbstractJob.getOption(parsedArgs, "--output")); Path outputTmpPath = new Path(AbstractJob.getOption(parsedArgs, "--tempDir")); Path workingDirPath = AbstractJob.getOption(parsedArgs, "--workingDir") != null ? new Path(AbstractJob.getOption(parsedArgs, "--workingDir")) : null; int numRows = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numRows")); int numCols = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--numCols")); boolean isSymmetric = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--symmetric")); int desiredRank = Integer.parseInt(AbstractJob.getOption(parsedArgs, "--rank")); boolean cleansvd = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--cleansvd")); if (cleansvd) { double maxError = Double.parseDouble(AbstractJob.getOption(parsedArgs, "--maxError")); double minEigenvalue = Double.parseDouble(AbstractJob.getOption(parsedArgs, "--minEigenvalue")); boolean inMemory = Boolean.parseBoolean(AbstractJob.getOption(parsedArgs, "--inMemory")); return run(inputPath, outputPath, outputTmpPath, workingDirPath, numRows, numCols, isSymmetric, desiredRank, maxError, minEigenvalue, inMemory); } return run(inputPath, outputPath, outputTmpPath, workingDirPath, numRows, numCols, isSymmetric, desiredRank); }
@Override public void setConf(Configuration conf) { DistributedLanczosSolver.this.setConf(conf); }
int desiredRank) throws Exception { DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath, outputTmpPath, numRows, numCols); matrix.setConf(new Configuration(getConf() != null ? getConf() : new Configuration())); state = new LanczosState(matrix, desiredRank, getInitialVector(matrix)); } else { HdfsBackedLanczosState hState = new HdfsBackedLanczosState(matrix, desiredRank, getInitialVector(matrix), workingDirPath); hState.setConf(matrix.getConf()); state = hState; solve(state, desiredRank, isSymmetric); serializeOutput(state, outputEigenVectorPath); return 0;