/** * How often should the evolutionary optimization of learning parameters occur? * * @param interval Number of training examples to use in each epoch of optimization. */ public void setInterval(int interval) { setInterval(interval, interval); }
@Override public void train(long trackingKey, int actual, Vector instance) { train(trackingKey, null, actual, instance); }
public int nextStep(int recordNumber) { int stepSize = stepSize(recordNumber, 2.6); if (stepSize < minInterval) { stepSize = minInterval; } if (stepSize > maxInterval) { stepSize = maxInterval; } int newCutoff = stepSize * (recordNumber / stepSize + 1); if (newCutoff < cutoff + currentStep) { newCutoff = cutoff + currentStep; } else { this.currentStep = stepSize; } return newCutoff; }
AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(20, FEATURES, new L1()); learningAlgorithm.setInterval(800); learningAlgorithm.setAveragingWindow(500); learningAlgorithm.train(actual, v); State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest(); double maxBeta; double nonZeros; norm = beta.aggregate(Functions.PLUS, Functions.ABS); lambda = learningAlgorithm.getBest().getMappedParams()[0]; mu = learningAlgorithm.getBest().getMappedParams()[1]; } else { maxBeta = 0; if (learningAlgorithm.getBest() != null) { ModelSerializer.writeBinary("/tmp/news-group-" + k + ".model", learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0)); learningAlgorithm.close(); dissect(newsGroups, learningAlgorithm, files); System.out.println("exiting main"); learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
AdaptiveLogisticRegression adaptiveLogisticRegression = new AdaptiveLogisticRegression(2, 200, new L1()); adaptiveLogisticRegression.setInterval(1000); adaptiveLogisticRegression.train(r.getKey(), r.getActual(), r.getInstance()); if (i % 1000 == 0 && adaptiveLogisticRegression.getBest() != null) { System.out.printf("%10d %10.4f %10.8f %.3f\n", i, adaptiveLogisticRegression.auc(), Math.log10(adaptiveLogisticRegression.getBest().getMappedParams()[0]), adaptiveLogisticRegression.getBest().getMappedParams()[1]); assertEquals(1, adaptiveLogisticRegression.auc(), 0.1); adaptiveLogisticRegression.close();
@Test @ThreadLeakLingering(linger = 1000) public void constantStep() { AdaptiveLogisticRegression lr = new AdaptiveLogisticRegression(2, 1000, new L1()); lr.setInterval(5000); assertEquals(20000, lr.nextStep(15000)); assertEquals(20000, lr.nextStep(15001)); assertEquals(20000, lr.nextStep(16500)); assertEquals(20000, lr.nextStep(19999)); lr.close(); }
@ThreadLeakLingering(linger = 1000) @Test public void adaptiveLogisticRegressionRoundTrip() throws IOException { AdaptiveLogisticRegression learner = new AdaptiveLogisticRegression(2, 5, new L1()); learner.setInterval(200); train(learner, 400); AdaptiveLogisticRegression olr3 = roundTrip(learner, AdaptiveLogisticRegression.class); double auc1 = learner.auc(); assertTrue(auc1 > 0.85); assertEquals(auc1, learner.auc(), 1.0e-6); assertEquals(auc1, olr3.auc(), 1.0e-6); train(learner, 1000); train(learner, 1000); train(olr3, 1000); assertEquals(learner.auc(), learner.auc(), 0.005); assertEquals(learner.auc(), olr3.auc(), 0.005); double auc2 = learner.auc(); assertTrue(String.format("%.3f > %.3f", auc2, auc1), auc2 > auc1); learner.close(); olr3.close(); }
public void setPoolSize(int poolSize) { this.poolSize = poolSize; setupOptimizer(poolSize); }
@Override public void train(long trackingKey, String groupKey, int actual, Vector instance) { record++; buffer.add(new TrainingExample(trackingKey, groupKey, actual, instance)); //don't train until we have enough examples if (buffer.size() > bufferSize) { trainWithBufferedExamples(); } }
cutoff = nextStep(record);
/** * * @param numCategories The number of categories (labels) to train on * @param numFeatures The number of features used in creating the vectors (i.e. the cardinality of the vector) * @param prior The {@link org.apache.mahout.classifier.sgd.PriorFunction} to use * @param threadCount The number of threads to use for training * @param poolSize The number of {@link org.apache.mahout.classifier.sgd.CrossFoldLearner} to use. */ public AdaptiveLogisticRegression(int numCategories, int numFeatures, PriorFunction prior, int threadCount, int poolSize) { this.numFeatures = numFeatures; this.threadCount = threadCount; this.poolSize = poolSize; seed = new State<Wrapper, CrossFoldLearner>(new double[2], 10); Wrapper w = new Wrapper(numCategories, numFeatures, prior); seed.setPayload(w); Wrapper.setMappings(seed); seed.setPayload(w); setPoolSize(this.poolSize); }
private static void dissect(Dictionary newsGroups, AdaptiveLogisticRegression learningAlgorithm, Iterable<File> files) throws IOException { CrossFoldLearner model = learningAlgorithm.getBest().getPayload().getLearner(); model.close(); Map<String, Set<Integer>> traceDictionary = Maps.newTreeMap(); ModelDissector md = new ModelDissector(); encoder.setTraceDictionary(traceDictionary); bias.setTraceDictionary(traceDictionary); for (File file : permute(files, rand).subList(0, 500)) { traceDictionary.clear(); Vector v = encodeFeatureVector(file); md.update(v, traceDictionary, model); } List<String> ngNames = Lists.newArrayList(newsGroups.values()); List<ModelDissector.Weight> weights = md.summary(100); for (ModelDissector.Weight w : weights) { System.out.printf("%s\t%.1f\t%s\t%.1f\t%s\t%.1f\t%s\n", w.getFeature(), w.getWeight(), ngNames.get(w.getMaxImpact() + 1), w.getCategory(1), w.getWeight(1), w.getCategory(2), w.getWeight(2)); } }
@Test @ThreadLeakLingering(linger = 1000) public void growingStep() { AdaptiveLogisticRegression lr = new AdaptiveLogisticRegression(2, 1000, new L1()); lr.setInterval(2000, 10000); // start with minimum step size for (int i = 2000; i < 20000; i+=2000) { assertEquals(i + 2000, lr.nextStep(i)); } // then level up a bit for (int i = 20000; i < 50000; i += 5000) { assertEquals(i + 5000, lr.nextStep(i)); } // and more, but we top out with this step size for (int i = 50000; i < 500000; i += 10000) { assertEquals(i + 10000, lr.nextStep(i)); } lr.close(); } }
public void setThreadCount(int threadCount) { this.threadCount = threadCount; setupOptimizer(poolSize); }
@Override public void train(long trackingKey, String groupKey, int actual, Vector instance) { record++; buffer.add(new TrainingExample(trackingKey, groupKey, actual, instance)); //don't train until we have enough examples if (buffer.size() > bufferSize) { trainWithBufferedExamples(); } }
cutoff = nextStep(record);
/** * * @param numCategories The number of categories (labels) to train on * @param numFeatures The number of features used in creating the vectors (i.e. the cardinality of the vector) * @param prior The {@link org.apache.mahout.classifier.sgd.PriorFunction} to use * @param threadCount The number of threads to use for training * @param poolSize The number of {@link org.apache.mahout.classifier.sgd.CrossFoldLearner} to use. */ public AdaptiveLogisticRegression(int numCategories, int numFeatures, PriorFunction prior, int threadCount, int poolSize) { this.numFeatures = numFeatures; this.threadCount = threadCount; this.poolSize = poolSize; seed = new State<Wrapper, CrossFoldLearner>(new double[2], 10); Wrapper w = new Wrapper(numCategories, numFeatures, prior); seed.setPayload(w); Wrapper.setMappings(seed); seed.setPayload(w); setPoolSize(this.poolSize); }
@Override public void train(int actual, Vector instance) { train(record, null, actual, instance); }
public void setThreadCount(int threadCount) { this.threadCount = threadCount; setupOptimizer(poolSize); }
/** * How often should the evolutionary optimization of learning parameters occur? * * @param interval Number of training examples to use in each epoch of optimization. */ public void setInterval(int interval) { setInterval(interval, interval); }