public static void main(String[] args) throws Exception { ToolRunner.run(new EigenVerificationJob(), args); }
private Map<String,List<String>> handleArgs(String[] args) throws IOException { addOutputOption(); addOption("eigenInput", "ei", "The Path for purported eigenVector input files (SequenceFile<WritableComparable,VectorWritable>.", null); addOption("corpusInput", "ci", "The Path for corpus input files (SequenceFile<WritableComparable,VectorWritable>."); addOption(DefaultOptionCreator.outputOption().create()); addOption(DefaultOptionCreator.helpOption()); addOption("inMemory", "mem", "Buffer eigen matrix into memory (if you have enough!)", "false"); addOption("maxError", "err", "Maximum acceptable error", "0.05"); addOption("minEigenvalue", "mev", "Minimum eigenvalue to keep the vector for", "0.0"); addOption("maxEigens", "max", "Maximum number of eigenvectors to keep (0 means all)", "0"); return parseArguments(args); }
@Override public int run(String[] args) throws Exception { Map<String,List<String>> argMap = handleArgs(args); if (argMap == null) { return -1; } if (argMap.isEmpty()) { return 0; } // parse out the arguments runJob(getConf(), new Path(getOption("eigenInput")), new Path(getOption("corpusInput")), getOutputPath(), getOption("inMemory") != null, Double.parseDouble(getOption("maxError")), // Double.parseDouble(getOption("minEigenvalue")), Integer.parseInt(getOption("maxEigens"))); return 0; }
/** * Progammatic invocation of run() * * @param eigenInput * Output of LanczosSolver * @param corpusInput * Input of LanczosSolver */ public void runJob(Configuration conf, Path eigenInput, Path corpusInput, Path output, boolean inMemory, double maxError, int maxEigens) throws IOException { // no need to handle command line arguments outPath = output; tmpOut = new Path(outPath, "tmp"); maxEigensToKeep = maxEigens; this.maxError = maxError; if (eigenInput != null && eigensToVerify == null) { prepareEigens(new Configuration(conf), eigenInput, inMemory); } DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tmpOut, 1, 1); c.setConf(new Configuration(conf)); corpus = c; eigenVerifier = new SimpleEigenVerifier(); Map<MatrixSlice,EigenStatus> eigenMetaData = verifyEigens(); List<Map.Entry<MatrixSlice,EigenStatus>> prunedEigenMeta = pruneEigens(eigenMetaData); saveCleanEigens(conf, prunedEigenMeta); } }
EigenVerificationJob verifier = new EigenVerificationJob(); Path verifiedEigensPath = new Path(outputCalc, "eigenverifier"); verifier.runJob(conf, lanczosSeqFiles, L.getRowPath(), verifiedEigensPath, true, 1.0, clusters); Path cleanedEigens = verifier.getCleanedEigensPath(); DistributedRowMatrix W = new DistributedRowMatrix(cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
return new EigenVerificationJob().run(inputPath, rawEigenVectorPath, outputPath,
/** * Progammatic invocation of run() * * @param eigenInput * Output of LanczosSolver * @param corpusInput * Input of LanczosSolver */ public void runJob(Configuration conf, Path eigenInput, Path corpusInput, Path output, boolean inMemory, double maxError, int maxEigens) throws IOException { // no need to handle command line arguments outPath = output; tmpOut = new Path(outPath, "tmp"); maxEigensToKeep = maxEigens; this.maxError = maxError; if (eigenInput != null && eigensToVerify == null) { prepareEigens(new Configuration(conf), eigenInput, inMemory); } DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tmpOut, 1, 1); c.setConf(new Configuration(conf)); corpus = c; eigenVerifier = new SimpleEigenVerifier(); Map<MatrixSlice,EigenStatus> eigenMetaData = verifyEigens(); List<Map.Entry<MatrixSlice,EigenStatus>> prunedEigenMeta = pruneEigens(eigenMetaData); saveCleanEigens(conf, prunedEigenMeta); } }
EigenVerificationJob verifier = new EigenVerificationJob(); Path verifiedEigensPath = new Path(outputCalc, "eigenverifier"); verifier.runJob(conf, lanczosSeqFiles, L.getRowPath(), verifiedEigensPath, true, 1.0, clusters); Path cleanedEigens = verifier.getCleanedEigensPath(); DistributedRowMatrix W = new DistributedRowMatrix(cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
return new EigenVerificationJob().run(inputPath, rawEigenVectorPath, outputPath,
/** * Progammatic invocation of run() * * @param eigenInput * Output of LanczosSolver * @param corpusInput * Input of LanczosSolver */ public void runJob(Configuration conf, Path eigenInput, Path corpusInput, Path output, boolean inMemory, double maxError, int maxEigens) throws IOException { // no need to handle command line arguments outPath = output; tmpOut = new Path(outPath, "tmp"); maxEigensToKeep = maxEigens; this.maxError = maxError; if (eigenInput != null && eigensToVerify == null) { prepareEigens(new Configuration(conf), eigenInput, inMemory); } DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tmpOut, 1, 1); c.setConf(new Configuration(conf)); corpus = c; eigenVerifier = new SimpleEigenVerifier(); Map<MatrixSlice,EigenStatus> eigenMetaData = verifyEigens(); List<Map.Entry<MatrixSlice,EigenStatus>> prunedEigenMeta = pruneEigens(eigenMetaData); saveCleanEigens(conf, prunedEigenMeta); } }
@Override public int run(String[] args) throws Exception { Map<String,List<String>> argMap = handleArgs(args); if (argMap == null) { return -1; } if (argMap.isEmpty()) { return 0; } // parse out the arguments runJob(getConf(), new Path(getOption("eigenInput")), new Path(getOption("corpusInput")), getOutputPath(), getOption("inMemory") != null, Double.parseDouble(getOption("maxError")), // Double.parseDouble(getOption("minEigenvalue")), Integer.parseInt(getOption("maxEigens"))); return 0; }
private Map<String,List<String>> handleArgs(String[] args) throws IOException { addOutputOption(); addOption("eigenInput", "ei", "The Path for purported eigenVector input files (SequenceFile<WritableComparable,VectorWritable>.", null); addOption("corpusInput", "ci", "The Path for corpus input files (SequenceFile<WritableComparable,VectorWritable>."); addOption(DefaultOptionCreator.outputOption().create()); addOption(DefaultOptionCreator.helpOption()); addOption("inMemory", "mem", "Buffer eigen matrix into memory (if you have enough!)", "false"); addOption("maxError", "err", "Maximum acceptable error", "0.05"); addOption("minEigenvalue", "mev", "Minimum eigenvalue to keep the vector for", "0.0"); addOption("maxEigens", "max", "Maximum number of eigenvectors to keep (0 means all)", "0"); return parseArguments(args); }
return new EigenVerificationJob().run(inputPath, rawEigenVectorPath, outputPath,
public static void main(String[] args) throws Exception { ToolRunner.run(new EigenVerificationJob(), args); }
prepareEigens(conf, eigenInput, inMemory); Map<MatrixSlice,EigenStatus> eigenMetaData = verifyEigens(); List<Map.Entry<MatrixSlice,EigenStatus>> prunedEigenMeta = pruneEigens(eigenMetaData); saveCleanEigens(new Configuration(), prunedEigenMeta); return 0;
@Override public int run(String[] args) throws Exception { Map<String,List<String>> argMap = handleArgs(args); if (argMap == null) { return -1; } if (argMap.isEmpty()) { return 0; } // parse out the arguments runJob(getConf(), new Path(getOption("eigenInput")), new Path(getOption("corpusInput")), getOutputPath(), getOption("inMemory") != null, Double.parseDouble(getOption("maxError")), // Double.parseDouble(getOption("minEigenvalue")), Integer.parseInt(getOption("maxEigens"))); return 0; }
private Map<String,List<String>> handleArgs(String[] args) throws IOException { addOutputOption(); addOption("eigenInput", "ei", "The Path for purported eigenVector input files (SequenceFile<WritableComparable,VectorWritable>.", null); addOption("corpusInput", "ci", "The Path for corpus input files (SequenceFile<WritableComparable,VectorWritable>."); addOption(DefaultOptionCreator.outputOption().create()); addOption(DefaultOptionCreator.helpOption()); addOption("inMemory", "mem", "Buffer eigen matrix into memory (if you have enough!)", "false"); addOption("maxError", "err", "Maximum acceptable error", "0.05"); addOption("minEigenvalue", "mev", "Minimum eigenvalue to keep the vector for", "0.0"); addOption("maxEigens", "max", "Maximum number of eigenvectors to keep (0 means all)", "0"); return parseArguments(args); }
public static void main(String[] args) throws Exception { ToolRunner.run(new EigenVerificationJob(), args); }
prepareEigens(conf, eigenInput, inMemory); Map<MatrixSlice,EigenStatus> eigenMetaData = verifyEigens(); List<Map.Entry<MatrixSlice,EigenStatus>> prunedEigenMeta = pruneEigens(eigenMetaData); saveCleanEigens(new Configuration(), prunedEigenMeta); return 0;
prepareEigens(conf, eigenInput, inMemory); Map<MatrixSlice,EigenStatus> eigenMetaData = verifyEigens(); List<Map.Entry<MatrixSlice,EigenStatus>> prunedEigenMeta = pruneEigens(eigenMetaData); saveCleanEigens(new Configuration(), prunedEigenMeta); return 0;