private void applyFilter() { List<Pair> chosen = new ArrayList(); for (int next = 0; chosen.size() < questions * 2 && next < exemplars.size(); next++) { Pair pair = exemplars.get(next); if (testdb.inferLink(pair.id1, pair.id2) != null) continue; // we already know the answer pair.believers = whoThinksThisIsTrue(pair.id1, pair.id2); chosen.add(pair); } exemplars = chosen; }
public LinkKind getLinkKind(String id1, String id2) { Link link = linkdb.inferLink(id1, id2); if (link == null) return LinkKind.DIFFERENT; // we assume missing links are incorrect return link.getKind(); } }
public Pair getNext() { if (exemplars.isEmpty()) return null; // find the candidate pair with the lowest similarity score with // already used pairs double bestscore = 2.0; Pair thebest = exemplars.get(0); // just in case for (Pair candidate : exemplars) { if (testdb.inferLink(candidate.id1, candidate.id2) != null) continue; // we already know the answer double worst = 0.0; for (Pair seen : used) { double score = compare(candidate, seen); if (score > worst) worst = score; } if (worst < bestscore) { bestscore = worst; thebest = candidate; } } // now we know which one to return used.add(thebest); exemplars.remove(thebest); return thebest; }
public LinkKind getLinkKind(String id1, String id2) { Link link = linkdb.inferLink(id1, id2); if (link == null) return LinkKind.DIFFERENT; // we assume missing links are incorrect return link.getKind(); } }
private void askQuestions(ExemplarsTracker tracker) { int count = 0; for (Pair pair : tracker.getExemplars()) { if (testdb.inferLink(pair.id1, pair.id2) != null) continue; // we already know the answer Record r1 = database.findRecordById(pair.id1); if (r1 == null) r1 = secondary.get(pair.id1); Record r2 = database.findRecordById(pair.id2); System.out.println(); PrintMatchListener.prettyCompare(r1, r2, (double) pair.counter, "Possible match", config.getProperties()); LinkKind kind = oracle.getLinkKind(pair.id1, pair.id2); Link link = new Link(pair.id1, pair.id2, LinkStatus.ASSERTED, kind, 1.0); testdb.assertLink(link); count++; if (count == questions) break; } asked += count; }