public LinkKind getLinkKind(String id1, String id2) { Link link = linkdb.inferLink(id1, id2); if (link == null) return LinkKind.DIFFERENT; // we assume missing links are incorrect return link.getKind(); } }
public void checkConsistency() { for (String id : links.keySet()) { // find all IDs which we are *not* equal to Set<String> diff = new HashSet(); for (Link link : links.get(id)) if (link.getKind() == LinkKind.DIFFERENT) diff.add(link.getOtherId(id)); // then, find all IDs which we, implicity or explicitly, are equal to for (String eqid : traverseAll(id, new HashSet<String>())) if (diff.contains(eqid)) System.out.println("Inconsistency: " + id + " <-> " + eqid); } }
private Collection<String> getClass(String id) { Collection<String> klass = new ArrayList(); klass.add(id); for (Link link : getAllLinksFor(id)) if (link.getKind() == LinkKind.SAME) klass.add(link.getOtherId(id)); return klass; }
public Set<String> traverseAll(String id, Set<String> seen) { seen.add(id); for (Link link : links.get(id)) { String other = link.getOtherId(id); if (link.getKind() == LinkKind.SAME && !seen.contains(other)) traverseAll(other, seen); } return seen; }
private boolean traverseFrom(String id, String goalid, Set<String> seen) { seen.add(id); if (links.get(id) == null) return false; for (Link link : links.get(id)) { // check that this is a SAME link if (link.getKind() != LinkKind.SAME) continue; // find the ID that is not 'id' (find the ID at the other end) String otherid = link.getID1(); if (otherid.equals(id)) otherid = link.getID2(); // if we haven't seen it, and it's not goalid, keep traversing if (otherid.equals(goalid)) return true; else if (!seen.contains(otherid)) { if (traverseFrom(otherid, goalid, seen)) return true; // found it! // else: keep trying } } return false; }
int wrong = 0; for (Link link : golddb.getAllLinks()) { if (link.getKind() == LinkKind.SAME) correct++; else
private void copyall(String id1, String id2) { Collection<String> class1 = getClass(id1); for (String id : class1) { for (Link tocopy : getAllLinksFor(id2)) { String other = tocopy.getOtherId(id2); if (id.equals(other)) continue; addLink2(new Link(id, other, tocopy.getStatus(), tocopy.getKind(), tocopy.getConfidence())); } } }
public synchronized void matches(Record r1, Record r2, double confidence) { String id1 = getid(r1); String id2 = getid(r2); Link link = golddb.inferLink(id1, id2); if (link == null) { unknown++; // we don't know if this one is right or not if (debug && !showmatches) PrintMatchListener.show(r1, r2, confidence, "\nNOT IN TEST FILE", props, pretty); } else if (link.getKind() == LinkKind.SAME) // no counting now; we do that when we're done dukedb.assertLink(new Link(id1, id2, LinkStatus.INFERRED, LinkKind.SAME, confidence)); else if (link.getKind() == LinkKind.DIFFERENT) { wrongfound++; // we found it, but it's not right if (debug && !showmatches) PrintMatchListener.show(r1, r2, confidence, "\nINCORRECT", props, pretty); } else { unknown++; // we don't know if this one is right or not if (debug && !showmatches) PrintMatchListener.show(r1, r2, confidence, "\nUNKNOWN LINK TYPE", props, pretty); } }
public synchronized void noMatchFor(Record r) { // we missed all of the correct links for this record (if any). // count, and tell the user. for (Link link : golddb.getAllLinksFor(getid(r))) { if (link.getKind() != LinkKind.SAME) continue; // it's a bad link, so never mind missed++; Record r1 = database.findRecordById(link.getID1()); Record r2 = database.findRecordById(link.getID2()); if (r1 != null && r2 != null) { if (debug && !showmatches) PrintMatchListener.show(r1, r2, processor.compare(r1, r2), "\nNOT FOUND", props, pretty); } else if (debug && !showmatches) { System.out.println("\nIDENTITIES IN TEST FILE NOT FOUND IN DATA"); System.out.println("ID1: " + link.getID1() + " -> " + r1); System.out.println("ID2: " + link.getID2() + " -> " + r2); } } }
public void link(String id1, String id2, double confidence) throws IOException { boolean correct = true; // does this provide new information, or do we know it already? Link inferredlink = null; if (linkdb != null) inferredlink = linkdb.inferLink(id1, id2); // record it if (console != null) { if (inferredlink == null) { correct = console.yesorno(); confidence = 1.0; // the user told us, which is as certain as it gets } else { correct = inferredlink.getKind() == LinkKind.SAME; confidence = inferredlink.getConfidence(); } } // note that we also write inferred links, because the test file // listener does not do inference writer.write(id1, id2, correct, confidence); out.flush(); // make sure we preserve the data if (linkdb != null && inferredlink == null) { Link link = new Link(id1, id2, LinkStatus.ASSERTED, correct ? LinkKind.SAME : LinkKind.DIFFERENT, 1.0); linkdb.assertLink(link); } }
query = "update " + tblprefix + "links set status = " + link.getStatus().getId() + " , kind = " + link.getKind().getId() + " , timestamp = " + dbtype.getNow() + " " + " , confidence = " + link.getConfidence() + " " + link.getID2()); query = "insert into " + tblprefix + "links values ('" + escape(link.getID1()) + "', " + " '" + escape(link.getID2()) + "', " + link.getKind().getId() + " , " + link.getStatus().getId() + ", " + dbtype.getNow() + ", " + link.getConfidence() + ") ";
public static void verifySame(Link l1, Link l2) { assertEquals("wrong ID1", l1.getID1(), l2.getID1()); assertEquals("wrong ID2", l1.getID2(), l2.getID2()); assertEquals("wrong status", l1.getStatus(), l2.getStatus()); assertEquals("wrong kind", l1.getKind(), l2.getKind()); assertEquals(l1.getConfidence(), l2.getConfidence(), 0.0001); }
public LinkKind getLinkKind(String id1, String id2) { Link link = linkdb.inferLink(id1, id2); if (link == null) return LinkKind.DIFFERENT; // we assume missing links are incorrect return link.getKind(); } }
public void checkConsistency() { for (String id : links.keySet()) { // find all IDs which we are *not* equal to Set<String> diff = new HashSet(); for (Link link : links.get(id)) if (link.getKind() == LinkKind.DIFFERENT) diff.add(link.getOtherId(id)); // then, find all IDs which we, implicity or explicitly, are equal to for (String eqid : traverseAll(id, new HashSet())) if (diff.contains(eqid)) System.out.println("Inconsistency: " + id + " <-> " + eqid); } }
private Collection<String> getClass(String id) { Collection<String> klass = new ArrayList(); klass.add(id); for (Link link : getAllLinksFor(id)) if (link.getKind() == LinkKind.SAME) klass.add(link.getOtherId(id)); return klass; }
public Set<String> traverseAll(String id, Set<String> seen) { seen.add(id); for (Link link : links.get(id)) { String other = link.getOtherId(id); if (link.getKind() == LinkKind.SAME && !seen.contains(other)) traverseAll(other, seen); } return seen; }
private void copyall(String id1, String id2) { Collection<String> class1 = getClass(id1); for (String id : class1) { for (Link tocopy : getAllLinksFor(id2)) { String other = tocopy.getOtherId(id2); if (id.equals(other)) continue; addLink2(new Link(id, other, tocopy.getStatus(), tocopy.getKind(), tocopy.getConfidence())); } } }
public synchronized void noMatchFor(Record r) { // we missed all of the correct links for this record (if any). // count, and tell the user. for (Link link : golddb.getAllLinksFor(getid(r))) { if (link.getKind() != LinkKind.SAME) continue; // it's a bad link, so never mind missed++; Record r1 = database.findRecordById(link.getID1()); Record r2 = database.findRecordById(link.getID2()); if (r1 != null && r2 != null) { if (debug && !showmatches) PrintMatchListener.show(r1, r2, processor.compare(r1, r2), "\nNOT FOUND", props, pretty); } else if (debug && !showmatches) { System.out.println("\nIDENTITIES IN TEST FILE NOT FOUND IN DATA"); System.out.println("ID1: " + link.getID1() + " -> " + r1); System.out.println("ID2: " + link.getID2() + " -> " + r2); } } }