private boolean filterbytype(Record record) { if (types.isEmpty()) // there is no filtering return true; boolean found = false; for (String value : record.getValues(RDF_TYPE)) if (types.contains(value)) return true; return false; }
private static String value(Record r, String p) { Collection<String> vs = r.getValues(p); if (vs == null) return "<null>"; if (vs.isEmpty()) return "<null>"; StringBuffer buf = new StringBuffer(); for (String v : vs) { buf.append("'"); buf.append(v); buf.append("', "); } return buf.toString(); } }
public static void prettyCompare(Record r1, Record r2, double confidence, String heading, List<Property> props) { System.out.println(heading + " " + confidence); for (Property p : props) { String prop = p.getName(); if ((r1.getValues(prop) == null || r1.getValues(prop).isEmpty()) && (r2.getValues(prop) == null || r2.getValues(prop).isEmpty())) continue; System.out.println(prop); System.out.println(" " + value(r1, prop)); System.out.println(" " + value(r2, prop)); } }
public static void htmlCompare(Record r1, Record r2, double confidence, String heading, List<Property> props) { System.out.println("<p>" + heading + " " + confidence + "</p>"); System.out.println("<table>"); for (Property p : props) { String prop = p.getName(); if ((r1.getValues(prop) == null || r1.getValues(prop).isEmpty()) && (r2.getValues(prop) == null || r2.getValues(prop).isEmpty())) continue; System.out.println("<tr><td>" + prop); System.out.println("<td>" + value(r1, prop)); System.out.println("<td>" + value(r2, prop)); } System.out.println("</table>"); }
public static String toString(Record r) { StringBuffer buf = new StringBuffer(); for (String p : r.getProperties()) { Collection<String> vs = r.getValues(p); if (vs == null || vs.isEmpty()) continue; buf.append(p + ": "); for (String v : vs) buf.append("'" + v + "', "); } //buf.append(";;; " + r); return buf.toString(); }
public static String toString(Record r, List<Property> props) { StringBuffer buf = new StringBuffer(); for (Property p : props) { Collection<String> vs = r.getValues(p.getName()); if (vs == null || vs.isEmpty()) continue; buf.append(p.getName() + ": "); for (String v : vs) buf.append("'" + v + "', "); } //buf.append(";;; " + r); return buf.toString(); }
private boolean isSameAs(Record r1, Record r2) { for (Property idp : config.getIdentityProperties()) { Collection<String> vs2 = r2.getValues(idp.getName()); Collection<String> vs1 = r1.getValues(idp.getName()); if (vs1 == null) continue; for (String v1 : vs1) if (vs2.contains(v1)) return true; } return false; }
/** * Add the record to the index. */ public void index(Record record) { for (Property p : config.getIdentityProperties()) { Collection<String> values = record.getValues(p.getName()); if (values == null) continue; for (String id : values) idindex.put(id, record); } records.add(record); }
protected void indexById(Record record) { for (Property idprop : config.getIdentityProperties()) for (String id : record.getValues(idprop.getName())) idmap.put(id, record); }
private String getIdentity(Record r) { for (Property p : config.getIdentityProperties()) for (String v : r.getValues(p.getName())) return v; throw new RuntimeException("No identity found in record [" + PrintMatchListener.toString(r) + "]"); }
@Test public void testNormal() { Record r = TestUtils.makeRecord("ID", "abc", "NAME", "b"); assertEquals("abc", r.getValue("ID")); Collection<String> values = r.getValues("ID"); assertEquals(1, values.size()); assertEquals("abc", values.iterator().next()); assertEquals("b", r.getValue("NAME")); values = r.getValues("NAME"); assertEquals(1, values.size()); assertEquals("b", values.iterator().next()); assertEquals(null, r.getValue("EMAIL")); assertTrue(r.getValues("EMAIL").isEmpty()); }
private String getIdentity(Record r) { for (Property p : config.getIdentityProperties()) { Collection<String> vs = r.getValues(p.getName()); if (vs == null) continue; for (String v : vs) return v; } throw new DukeException("No identity found in record [" + PrintMatchListener.toString(r) + "]"); }
/** * Add the record to the index. */ public void index(Record record) { // FIXME: check if record is already indexed // allocate an ID for this record long id = store.makeNewRecordId(); store.registerRecord(id, record); // go through ID properties and register them for (Property p : config.getIdentityProperties()) for (String extid : record.getValues(p.getName())) store.registerId(id, extid); // go through lookup properties and register those for (Property p : config.getLookupProperties()) { String propname = p.getName(); for (String value : record.getValues(propname)) { String[] tokens = StringUtils.split(value); for (int ix = 0; ix < tokens.length; ix++) store.registerToken(id, propname, tokens[ix]); } } }
@Test public void testTwoRecordsDoubleValue() { setupTripleConfig1("?uri"); source.setRows(new String[][] { {"http://a/1", "http://a/name", "1"}, {"http://a/1", "http://a/name", "one"}, {"http://a/1", "http://a/age", "32"}, {"http://a/2", "http://a/name", "2"}, {"http://a/2", "http://a/age", "23"}}); RecordIterator it = source.getRecords(); assertTrue("data source contains no records", it.hasNext()); Record r = it.next(); assertEquals("wrong ID", "http://a/1", r.getValue("ID")); assertEquals("wrong AGE", "32", r.getValue("AGE")); Collection<String> values = r.getValues("NAME"); assertEquals("wrong number of NAMEs", 2, values.size()); assertTrue("NAMEs doesn't contain '1'", values.contains("1")); assertTrue("NAMEs doesn't contain 'one'", values.contains("one")); assertTrue("data source contains only one record", it.hasNext()); r = it.next(); assertEquals("wrong ID", "http://a/2", r.getValue("ID")); assertEquals("wrong NAME", "2", r.getValue("NAME")); assertEquals("wrong AGE", "23", r.getValue("AGE")); assertFalse("data source contains more than two records", it.hasNext()); assertEquals("wrong number of pages", 2, source.getPages()); }
@Test public void testSplitting() throws IOException { source.addColumn(new Column("F1", null, null, null)); Column c = new Column("F2", null, null, null); c.setSplitOn(";"); source.addColumn(c); source.addColumn(new Column("F3", null, null, null)); RecordIterator it = read("F1,F2,F3\na,b;d;e,c"); Record r = it.next(); assertEquals("a", r.getValue("F1")); assertEquals("c", r.getValue("F3")); Collection<String> values = r.getValues("F2"); assertEquals(3, values.size()); assertTrue(values.contains("b")); assertTrue(values.contains("d")); assertTrue(values.contains("e")); }
@Test public void testNoValueForEmptySplit() throws IOException { source.addColumn(new Column("F1", null, null, null)); Column c = new Column("F2", null, null, null); c.setSplitOn(";"); source.addColumn(c); source.addColumn(new Column("F3", null, null, null)); RecordIterator it = read("F1,F2,F3\na,b;;e,c"); Record r = it.next(); assertEquals("a", r.getValue("F1")); assertEquals("c", r.getValue("F3")); Collection<String> values = r.getValues("F2"); assertEquals(2, values.size()); assertTrue(values.contains("b")); assertTrue(values.contains("e")); }
@Test public void testSplitting() { perform("insert into testdata values (1, 'foo bar baz')"); source.addColumn(new Column("ID", null, null, null)); Column col = new Column("NAME", null, null, null); col.setSplitOn(" "); source.addColumn(col); RecordIterator it = source.getRecords(); assertTrue(it.hasNext()); Record r = it.next(); assertEquals("1", r.getValue("ID")); Collection<String> values = r.getValues("NAME"); assertEquals(3, values.size()); assertTrue(values.contains("foo")); assertTrue(values.contains("bar")); assertTrue(values.contains("baz")); assertFalse(it.hasNext()); }
@Test public void testSplittingCleaning() throws IOException { source.addColumn(new Column("F1", null, null, null)); Column c = new Column("F2", null, null, new LowerCaseNormalizeCleaner()); c.setSplitOn(";"); source.addColumn(c); source.addColumn(new Column("F3", null, null, null)); RecordIterator it = read("F1,F2,F3\na, b ; d ; e ,c"); Record r = it.next(); assertEquals("a", r.getValue("F1")); assertEquals("c", r.getValue("F3")); Collection<String> values = r.getValues("F2"); assertEquals(3, values.size()); assertTrue(values.contains("b")); assertTrue(values.contains("d")); assertTrue(values.contains("e")); }