addInputOption(); addOutputOption(); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION, "cd", "The convergence delta value", String.valueOf(DEFAULT_CONVERGENCE_DELTA)); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(NUM_TOPICS, "k", "Number of topics to learn", true); addOption(NUM_TERMS, "nt", "Vocabulary size", false); addOption(DOC_TOPIC_SMOOTHING, "a", "Smoothing for document/topic distribution", String.valueOf(DEFAULT_DOC_TOPIC_SMOOTHING)); addOption(TERM_TOPIC_SMOOTHING, "e", "Smoothing for topic/term distribution", String.valueOf(DEFAULT_TERM_TOPIC_SMOOTHING)); addOption(DICTIONARY, "dict", "Path to term-dictionary file(s) (glob expression supported)", false); addOption(DOC_TOPIC_OUTPUT, "dt", "Output path for the training doc/topic distribution", false); addOption(MODEL_TEMP_DIR, "mt", "Path to intermediate model path (useful for restarting)", false); addOption(ITERATION_BLOCK_SIZE, "block", "Number of iterations per perplexity check", String.valueOf(DEFAULT_ITERATION_BLOCK_SIZE)); addOption(RANDOM_SEED, "seed", "Random seed", false); addOption(TEST_SET_FRACTION, "tf", "Fraction of data to hold out for testing", String.valueOf(DEFAULT_TEST_SET_FRACTION)); addOption(NUM_TRAIN_THREADS, "ntt", "number of threads per mapper to train with", String.valueOf(DEFAULT_NUM_TRAIN_THREADS)); addOption(NUM_UPDATE_THREADS, "nut", "number of threads per mapper to update the model with", String.valueOf(DEFAULT_NUM_UPDATE_THREADS)); addOption(MAX_ITERATIONS_PER_DOC, "mipd", "max number of iterations per doc for p(topic|doc) learning", String.valueOf(DEFAULT_MAX_ITERATIONS_PER_DOC)); addOption(NUM_REDUCE_TASKS, null, "number of reducers to use during model estimation", String.valueOf(DEFAULT_NUM_REDUCE_TASKS));
addInputOption(); addOutputOption(); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION, "cd", "The convergence delta value", String.valueOf(DEFAULT_CONVERGENCE_DELTA)); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(NUM_TOPICS, "k", "Number of topics to learn", true); addOption(NUM_TERMS, "nt", "Vocabulary size", false); addOption(DOC_TOPIC_SMOOTHING, "a", "Smoothing for document/topic distribution", String.valueOf(DEFAULT_DOC_TOPIC_SMOOTHING)); addOption(TERM_TOPIC_SMOOTHING, "e", "Smoothing for topic/term distribution", String.valueOf(DEFAULT_TERM_TOPIC_SMOOTHING)); addOption(DICTIONARY, "dict", "Path to term-dictionary file(s) (glob expression supported)", false); addOption(DOC_TOPIC_OUTPUT, "dt", "Output path for the training doc/topic distribution", false); addOption(MODEL_TEMP_DIR, "mt", "Path to intermediate model path (useful for restarting)", false); addOption(ITERATION_BLOCK_SIZE, "block", "Number of iterations per perplexity check", String.valueOf(DEFAULT_ITERATION_BLOCK_SIZE)); addOption(RANDOM_SEED, "seed", "Random seed", false); addOption(TEST_SET_FRACTION, "tf", "Fraction of data to hold out for testing", String.valueOf(DEFAULT_TEST_SET_FRACTION)); addOption(NUM_TRAIN_THREADS, "ntt", "number of threads per mapper to train with", String.valueOf(DEFAULT_NUM_TRAIN_THREADS)); addOption(NUM_UPDATE_THREADS, "nut", "number of threads per mapper to update the model with", String.valueOf(DEFAULT_NUM_UPDATE_THREADS)); addOption(MAX_ITERATIONS_PER_DOC, "mipd", "max number of iterations per doc for p(topic|doc) learning", String.valueOf(DEFAULT_MAX_ITERATIONS_PER_DOC)); addOption(NUM_REDUCE_TASKS, null, "number of reducers to use during model estimation", String.valueOf(DEFAULT_NUM_REDUCE_TASKS));
addInputOption(); addOutputOption(); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION, "cd", "The convergence delta value", String.valueOf(DEFAULT_CONVERGENCE_DELTA)); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(NUM_TOPICS, "k", "Number of topics to learn", true); addOption(NUM_TERMS, "nt", "Vocabulary size", false); addOption(DOC_TOPIC_SMOOTHING, "a", "Smoothing for document/topic distribution", String.valueOf(DEFAULT_DOC_TOPIC_SMOOTHING)); addOption(TERM_TOPIC_SMOOTHING, "e", "Smoothing for topic/term distribution", String.valueOf(DEFAULT_TERM_TOPIC_SMOOTHING)); addOption(DICTIONARY, "dict", "Path to term-dictionary file(s) (glob expression supported)", false); addOption(DOC_TOPIC_OUTPUT, "dt", "Output path for the training doc/topic distribution", false); addOption(MODEL_TEMP_DIR, "mt", "Path to intermediate model path (useful for restarting)", false); addOption(ITERATION_BLOCK_SIZE, "block", "Number of iterations per perplexity check", String.valueOf(DEFAULT_ITERATION_BLOCK_SIZE)); addOption(RANDOM_SEED, "seed", "Random seed", false); addOption(TEST_SET_FRACTION, "tf", "Fraction of data to hold out for testing", String.valueOf(DEFAULT_TEST_SET_FRACTION)); addOption(NUM_TRAIN_THREADS, "ntt", "number of threads per mapper to train with", String.valueOf(DEFAULT_NUM_TRAIN_THREADS)); addOption(NUM_UPDATE_THREADS, "nut", "number of threads per mapper to update the model with", String.valueOf(DEFAULT_NUM_UPDATE_THREADS)); addOption(MAX_ITERATIONS_PER_DOC, "mipd", "max number of iterations per doc for p(topic|doc) learning", String.valueOf(DEFAULT_MAX_ITERATIONS_PER_DOC)); addOption(NUM_REDUCE_TASKS, null, "number of reducers to use during model estimation", String.valueOf(DEFAULT_NUM_REDUCE_TASKS));