/** * Returns the n most important features with their * weights, most important category and the top few * categories that they affect. * @param n How many results to return. * @return A list of the top variables. */ public List<Weight> summary(int n) { Queue<Weight> pq = new PriorityQueue<>(); for (Map.Entry<String, Vector> entry : weightMap.entrySet()) { pq.add(new Weight(entry.getKey(), entry.getValue())); while (pq.size() > n) { pq.poll(); } } List<Weight> r = new ArrayList<>(pq); Collections.sort(r, Ordering.natural().reverse()); return r; }
/** * Returns the n most important features with their * weights, most important category and the top few * categories that they affect. * @param n How many results to return. * @return A list of the top variables. */ public List<Weight> summary(int n) { Queue<Weight> pq = new PriorityQueue<Weight>(); for (Map.Entry<String, Vector> entry : weightMap.entrySet()) { pq.add(new Weight(entry.getKey(), entry.getValue())); while (pq.size() > n) { pq.poll(); } } List<Weight> r = Lists.newArrayList(pq); Collections.sort(r, Ordering.natural().reverse()); return r; }
private static void dissect(Dictionary newsGroups, AdaptiveLogisticRegression learningAlgorithm, Iterable<File> files) throws IOException { CrossFoldLearner model = learningAlgorithm.getBest().getPayload().getLearner(); model.close(); Map<String, Set<Integer>> traceDictionary = Maps.newTreeMap(); ModelDissector md = new ModelDissector(); encoder.setTraceDictionary(traceDictionary); bias.setTraceDictionary(traceDictionary); for (File file : permute(files, rand).subList(0, 500)) { traceDictionary.clear(); Vector v = encodeFeatureVector(file); md.update(v, traceDictionary, model); } List<String> ngNames = Lists.newArrayList(newsGroups.values()); List<ModelDissector.Weight> weights = md.summary(100); for (ModelDissector.Weight w : weights) { System.out.printf("%s\t%.1f\t%s\t%.1f\t%s\t%.1f\t%s\n", w.getFeature(), w.getWeight(), ngNames.get(w.getMaxImpact() + 1), w.getCategory(1), w.getWeight(1), w.getCategory(2), w.getWeight(2)); } }
/** * Returns the n most important features with their * weights, most important category and the top few * categories that they affect. * @param n How many results to return. * @return A list of the top variables. */ public List<Weight> summary(int n) { Queue<Weight> pq = new PriorityQueue<Weight>(); for (Map.Entry<String, Vector> entry : weightMap.entrySet()) { pq.add(new Weight(entry.getKey(), entry.getValue())); while (pq.size() > n) { pq.poll(); } } List<Weight> r = Lists.newArrayList(pq); Collections.sort(r, Ordering.natural().reverse()); return r; }