Collection<Link> linkset = links.get(link.getID1()); if (linkset != null) for (Link oldlink : linkset) if (oldlink.equals(link)) { if (link.getKind() == LinkKind.SAME) { copyall(link.getID1(), link.getID2()); copyall(link.getID2(), link.getID1()); } else if (link.getKind() == LinkKind.DIFFERENT) { Collection<String> klass = getClass(link.getID1()); for (String id : klass) addLink2(new Link(id, link.getID2(), link.getStatus(), LinkKind.DIFFERENT, link.getConfidence())); klass = getClass(link.getID2()); for (String id : klass) addLink2(new Link(id, link.getID1(), link.getStatus(), LinkKind.DIFFERENT, link.getConfidence()));
/** * Returns true if the information in this link should take * precedence over the information in the other link. */ public boolean overrides(Link other) { if (other.getStatus() == LinkStatus.ASSERTED && status != LinkStatus.ASSERTED) return false; else if (status == LinkStatus.ASSERTED && other.getStatus() != LinkStatus.ASSERTED) return true; // the two links are from equivalent sources of information, so we // believe the most recent return timestamp > other.getTimestamp(); }
private void copyall(String id1, String id2) { Collection<String> class1 = getClass(id1); for (String id : class1) { for (Link tocopy : getAllLinksFor(id2)) { String other = tocopy.getOtherId(id2); if (id.equals(other)) continue; addLink2(new Link(id, other, tocopy.getStatus(), tocopy.getKind(), tocopy.getConfidence())); } } }
public static void verifySame(Link l1, Link l2) { assertEquals("wrong ID1", l1.getID1(), l2.getID1()); assertEquals("wrong ID2", l1.getID2(), l2.getID2()); assertEquals("wrong status", l1.getStatus(), l2.getStatus()); assertEquals("wrong kind", l1.getKind(), l2.getKind()); assertEquals(l1.getConfidence(), l2.getConfidence(), 0.0001); }
public static Map<String, Link> load(String testfile) throws IOException { Map<String, Link> links = new HashMap(); BufferedReader reader = new BufferedReader(new FileReader(testfile)); String line = reader.readLine(); while (line != null) { int pos = line.indexOf(','); String id1 = line.substring(1, pos); String id2 = line.substring(pos + 1, line.length()); links.put(id1 + "," + id2, new Link(id1, id2, LinkStatus.ASSERTED, line.charAt(0) == '+' ? LinkKind.SAME : LinkKind.DIFFERENT, 0.0)); line = reader.readLine(); } reader.close(); return links; } }
private boolean traverseFrom(String id, String goalid, Set<String> seen) { seen.add(id); if (links.get(id) == null) return false; for (Link link : links.get(id)) { // check that this is a SAME link if (link.getKind() != LinkKind.SAME) continue; // find the ID that is not 'id' (find the ID at the other end) String otherid = link.getID1(); if (otherid.equals(id)) otherid = link.getID2(); // if we haven't seen it, and it's not goalid, keep traversing if (otherid.equals(goalid)) return true; else if (!seen.contains(otherid)) { if (traverseFrom(otherid, goalid, seen)) return true; // found it! // else: keep trying } } return false; }
public Set<String> traverseAll(String id, Set<String> seen) { seen.add(id); for (Link link : links.get(id)) { String other = link.getOtherId(id); if (link.getKind() == LinkKind.SAME && !seen.contains(other)) traverseAll(other, seen); } return seen; }
public synchronized void matches(Record r1, Record r2, double confidence) { String id1 = getid(r1); String id2 = getid(r2); Link link = golddb.inferLink(id1, id2); if (link == null) { unknown++; // we don't know if this one is right or not if (debug && !showmatches) PrintMatchListener.show(r1, r2, confidence, "\nNOT IN TEST FILE", props, pretty); } else if (link.getKind() == LinkKind.SAME) // no counting now; we do that when we're done dukedb.assertLink(new Link(id1, id2, LinkStatus.INFERRED, LinkKind.SAME, confidence)); else if (link.getKind() == LinkKind.DIFFERENT) { wrongfound++; // we found it, but it's not right if (debug && !showmatches) PrintMatchListener.show(r1, r2, confidence, "\nINCORRECT", props, pretty); } else { unknown++; // we don't know if this one is right or not if (debug && !showmatches) PrintMatchListener.show(r1, r2, confidence, "\nUNKNOWN LINK TYPE", props, pretty); } }
public void link(String id1, String id2, double confidence) throws IOException { boolean correct = true; // does this provide new information, or do we know it already? Link inferredlink = null; if (linkdb != null) inferredlink = linkdb.inferLink(id1, id2); // record it if (console != null) { if (inferredlink == null) { correct = console.yesorno(); confidence = 1.0; // the user told us, which is as certain as it gets } else { correct = inferredlink.getKind() == LinkKind.SAME; confidence = inferredlink.getConfidence(); } } // note that we also write inferred links, because the test file // listener does not do inference writer.write(id1, id2, correct, confidence); out.flush(); // make sure we preserve the data if (linkdb != null && inferredlink == null) { Link link = new Link(id1, id2, LinkStatus.ASSERTED, correct ? LinkKind.SAME : LinkKind.DIFFERENT, 1.0); linkdb.assertLink(link); } }
public Link inferLink(String id1, String id2) { // FIXME: it's possible that we find inconsistencies here. for now we // ignore that. if we've seen a link between these two IDs then that // means we're not going to ask the user about it. Collection<Link> ourlinks = links.get(id1); if (ourlinks != null) { for (Link link : ourlinks) if (link.getID1().equals(id2) || link.getID2().equals(id2)) return link; // if we get here it means we couldn't find a direct link. move on // to see if we can find an indirect one. } // can we prove that these belong to the same equivalence class? // basically, need to traverse graph outwards from ID1 to see if // we ever get to ID2. if (traverseFrom(id1, id2, new HashSet())) return new Link(id1, id2, LinkStatus.ASSERTED, LinkKind.SAME, 0.0); else return null; }
@Test public void testGetSinceForever() { Link l1 = new Link("1", "2", LinkStatus.INFERRED, LinkKind.SAME, 1.0); linkdb.assertLink(l1); Link l2 = new Link("1", "3", LinkStatus.INFERRED, LinkKind.SAME, 1.0); linkdb.assertLink(l2); List<Link> links = linkdb.getChangesSince(0); assertEquals(2, links.size()); // we don't know the order, so must check Link ll1; Link ll2; if (links.get(0).equals(l2)) { ll1 = links.get(1); ll2 = links.get(0); } else { ll1 = links.get(0); ll2 = links.get(1); } assertEquals(l1, ll1); assertEquals(l2, ll2); }
@Test public void testSingleRecordIdempotent() { // we want to verify that seeing the same link twice doesn't cause // the timestamp to be updated in the link database Record r1 = makeRecord("id", "1"); Record r2 = makeRecord("id", "2"); listener.startProcessing(); listener.batchReady(1); listener.matches(r1, r2, 0.95); listener.batchDone(); listener.endProcessing(); Collection<Link> all = linkdb.getAllLinks(); assertEquals(1, all.size()); Link original = all.iterator().next(); TestUtils.verifySame(new Link("1", "2", LinkStatus.INFERRED, LinkKind.SAME, 0.95), original); listener.startProcessing(); listener.batchReady(1); listener.matches(r1, r2, 0.947); listener.batchDone(); listener.endProcessing(); all = linkdb.getAllLinks(); assertEquals(1, all.size()); Link newlink = all.iterator().next(); TestUtils.verifySame(new Link("1", "2", LinkStatus.INFERRED, LinkKind.SAME, 0.95), newlink); assertEquals(original.getTimestamp(), newlink.getTimestamp()); }
public LinkKind getLinkKind(String id1, String id2) { Link link = linkdb.inferLink(id1, id2); if (link == null) return LinkKind.DIFFERENT; // we assume missing links are incorrect return link.getKind(); } }
ResultSet rs = stmt.executeQuery("select * from " + tblprefix + "links where " + "id1 = '" + escape(link.getID1()) + "' and " + "id2 = '" + escape(link.getID2()) + "'"); if (rs.next()) { existing = makeLink(rs); rs.close(); if (!link.overrides(existing)) return; // the existing link rules, so we shut up and go away logger.trace("Updating link for " + link.getID1() + " and " + link.getID2()); query = "update " + tblprefix + "links set status = " + link.getStatus().getId() + " , kind = " + link.getKind().getId() + " , timestamp = " + dbtype.getNow() + " " + " , confidence = " + link.getConfidence() + " " + "where id1 = '" + escape(link.getID1()) + "' " + " and id2 = '" + escape(link.getID2()) + "' "; } else { logger.trace("Inserting link for " + link.getID1() + " and " + link.getID2()); query = "insert into " + tblprefix + "links values ('" + escape(link.getID1()) + "', " + " '" + escape(link.getID2()) + "', " + link.getKind().getId() + " , " + link.getStatus().getId() + ", " + dbtype.getNow() + ", " + link.getConfidence() + ") ";
private Link makeLink(ResultSet rs) throws SQLException { return new Link(rs.getString("id1"), rs.getString("id2"), LinkStatus.getbyid(rs.getInt("status")), LinkKind.getbyid(rs.getInt("kind")), rs.getTimestamp("timestamp").getTime(), rs.getDouble("confidence")); }