/** * Returns the brief summary used in the command-line output. */ public String toString() { StringBuilder buf = new StringBuilder(); buf.append("[GeneticConfiguration " + shortnum(config.getThreshold())); for (Property p : config.getProperties()) if (p.isIdProperty()) buf.append(" [" + p.getName() + "]"); else buf.append(" [" + p.getName() + " " + shortname(p.getComparator()) + " " + shortnum(p.getHighProbability()) + " " + shortnum(p.getLowProbability()) + "]"); buf.append(" mr=" + mutation_rate + " rr=" + shortnum(recombination_rate)); buf.append("]"); return buf.toString(); }
private void writeProperty(Property prop) { AttributeListImpl atts = new AttributeListImpl(); if (prop.isIdProperty()) atts.addAttribute("type", "CDATA", "id"); else if (prop.isIgnoreProperty()) atts.addAttribute("type", "CDATA", "ignore"); if (!prop.isIdProperty() && prop.getLookupBehaviour() != Property.Lookup.DEFAULT) { String value = prop.getLookupBehaviour().toString().toLowerCase(); atts.addAttribute("lookup", "CDATA", value); } pp.startElement("property", atts); writeElement("name", prop.getName()); if (prop.getComparator() != null) writeElement("comparator", prop.getComparator().getClass().getName()); if (prop.getLowProbability() != 0.0) writeElement("low", "" + prop.getLowProbability()); if (prop.getHighProbability() != 0.0) writeElement("high", "" + prop.getHighProbability()); pp.endElement("property"); }
public void setFromOther(GeneticConfiguration cfg1, GeneticConfiguration cfg2) { Configuration config = cfg1.getConfiguration(); Configuration other = cfg2.getConfiguration(); Property p1 = config.getPropertyByName(prop.getName()); Property p2 = other.getPropertyByName(prop.getName()); p1.setComparator(p2.getComparator()); } }
public void setFromOther(GeneticConfiguration cfg1, GeneticConfiguration cfg2) { Configuration config = cfg1.getConfiguration(); Configuration other = cfg2.getConfiguration(); Property p1 = config.getPropertyByName(prop.getName()); Property p2 = other.getPropertyByName(prop.getName()); p1.setLowProbability(p2.getLowProbability()); }
public void setFromOther(GeneticConfiguration cfg1, GeneticConfiguration cfg2) { Configuration config = cfg1.getConfiguration(); Configuration other = cfg2.getConfiguration(); Property p1 = config.getPropertyByName(prop.getName()); Property p2 = other.getPropertyByName(prop.getName()); p1.setHighProbability(p2.getHighProbability()); } }
if (prop == null) continue; // means the property is unknown if (prop.isIdProperty() || prop.isIgnoreProperty()) continue; double p = prop.compare(v1, v2); high = Math.max(high, p); } catch (Exception e) { throw new DukeException("Comparison of values '" + v1 + "' and "+ "'" + v2 + "' with " + prop.getComparator() + " failed", e);
/** * Returns the name of the property. */ public String getName() { return prop.getName(); }
if (!prop.isIdProperty() && !prop.isIgnoreProperty() && prop.getLookupBehaviour() != Property.Lookup.FALSE && prop.getHighProbability() != 0.0) candidates.add(prop); for (int ix = 0; ix < candidates.size(); ix++) { Property prop = candidates.get(ix); prob = Utils.computeBayes(prob, prop.getHighProbability()); if (prob >= threshold) { last = ix; if (p.getLookupBehaviour() != Property.Lookup.TRUE && p.getLookupBehaviour() != Property.Lookup.REQUIRED) continue;
/** * Look up potentially matching records. */ public Collection<Record> findCandidateMatches(Record record) { if (directory == null) init(); // if we have a geoprop it means that's the only way to search if (geoprop != null) { String value = record.getValue(geoprop.getName()); if (value != null) { Filter filter = geoprop.geoSearch(value); return maintracker.doQuery(new MatchAllDocsQuery(), filter); } } // ok, we didn't do a geosearch, so proceed as normal. // first we build the combined query for all lookup properties BooleanQuery query = new BooleanQuery(); for (Property prop : config.getLookupProperties()) { Collection<String> values = record.getValues(prop.getName()); if (values == null) continue; for (String value : values) parseTokens(query, prop.getName(), value, prop.getLookupBehaviour() == Property.Lookup.REQUIRED, prop.getHighProbability()); } // do the query return maintracker.doQuery(query); }
" for which there is no configuration"); if (prop.getComparator() instanceof GeopositionComparator && geoprop != null) { } else { Field.Index ix; if (prop.isIdProperty()) Float boost = getBoostFactor(prop.getHighProbability(), BoostMode.INDEX); for (String v : record.getValues(propname)) { if (v.equals(""))
@Test public void testDefaultProbs() throws IOException, SAXException { Configuration config = ConfigLoader.load("classpath:config-default-probs.xml"); Property prop = config.getPropertyByName("FIRSTNAME"); assertEquals(0.5, prop.getHighProbability()); assertEquals(0.5, prop.getLowProbability()); assertEquals(Property.Lookup.DEFAULT, prop.getLookupBehaviour()); }
private int checkProperty(String name, Configuration config, Configuration rand) { Property prop = rand.getPropertyByName(name); Property orig = config.getPropertyByName(name); int differences = 0; if (!prop.getComparator().equals(orig.getComparator())) differences++; if (prop.getHighProbability() != orig.getHighProbability()) differences++; if (prop.getLowProbability() != orig.getLowProbability()) differences++; return differences; }
/** * The properties which are used to identify records, rather than * compare them. */ public Collection<Property> getIdentityProperties() { Collection<Property> ids = new ArrayList(); for (Property p : getProperties()) if (p.isIdProperty()) ids.add(p); return ids; }
/** * Creates a new processor, bound to the given database. */ public Processor(Configuration config, Database database) { this.config = config; this.database1 = database; // using this List implementation so that listeners can be removed // while Duke is running (see issue 117) this.listeners = new CopyOnWriteArrayList<MatchListener>(); this.logger = new DummyLogger(); this.threads = 1; // precomputing for later optimizations this.proporder = new ArrayList(); for (Property p : config.getProperties()) if (!p.isIdProperty()) proporder.add(p); Collections.sort(proporder, new PropertyComparator()); // still precomputing double prob = 0.5; accprob = new double[proporder.size()]; for (int ix = proporder.size() - 1; ix >= 0; ix--) { prob = Utils.computeBayes(prob, proporder.get(ix).getHighProbability()); accprob[ix] = prob; } }
@Test public void testDefaultComparator() throws IOException, SAXException { Configuration config = ConfigLoader.load("classpath:config-no-comparator.xml"); Property prop = config.getPropertyByName("LASTNAME"); assertEquals(null, prop.getComparator()); assertEquals(Property.Lookup.DEFAULT, prop.getLookupBehaviour()); }
public void setRandomly(GeneticConfiguration cfg) { Configuration config = cfg.getConfiguration(); Property p = config.getPropertyByName(prop.getName()); p.setComparator(comparators.get((int) (comparators.size() * Math.random()))); }
/** * Checks to see if we need the spatial support, and if so creates * the necessary context objects. */ private void initSpatial() { // FIXME: for now, we only use geosearch if that's the only way to // find suitable records, since we don't know how to combine // geosearch ranking with normal search ranking. if (config.getLookupProperties().size() != 1) return; Property prop = config.getLookupProperties().iterator().next(); if (!(prop.getComparator() instanceof GeopositionComparator)) return; geoprop = new GeoProperty(prop); }
/** * Look up potentially matching records. */ public Collection<Record> findCandidateMatches(Record record) { // if we have a geoprop it means that's the only way to search if (geoprop != null) { String value = record.getValue(geoprop.getName()); if (value != null) { Filter filter = geoprop.geoSearch(value); return maintracker.doQuery(new MatchAllDocsQuery(), filter); } } // ok, we didn't do a geosearch, so proceed as normal. // first we build the combined query for all lookup properties BooleanQuery query = new BooleanQuery(); for (Property prop : config.getLookupProperties()) { Collection<String> values = record.getValues(prop.getName()); if (values == null) continue; for (String value : values) parseTokens(query, prop.getName(), value, prop.getLookupBehaviour() == Property.Lookup.REQUIRED); } // do the query return maintracker.doQuery(query); }
public int compare(Property p1, Property p2) { if (p1.getHighProbability() < p2.getHighProbability()) return 1; else if (p1.getHighProbability() == p2.getHighProbability()) return 0; else return -1; } }
public void setRandomly(GeneticConfiguration cfg) { Configuration config = cfg.getConfiguration(); Property p = config.getPropertyByName(prop.getName()); double new_value = drift(config.getThreshold(), 1.0, 0.5); p.setHighProbability(new_value); }