@Override public void save(UniversalPage page) throws DaoException { NameSpace nameSpace = page.getNameSpace(); for (Language language : page.getLanguageSet()) { for (LocalId localPage : page.getLocalEntities(language)) { insert( language.getId(), localPage.getId(), nameSpace.getArbitraryId(), page.getUnivId(), page.getAlgorithmId() ); } } }
private void evaluateOneSample() throws DaoException, WikiBrainException, IOException { UniversalPage c1 = concepts.get(random.nextInt(concepts.size())); UniversalPage c2 = concepts.get(random.nextInt(concepts.size())); List<SRResult> results = new ArrayList<SRResult>(); for (Language lang : langs) { SRMetric sr = metrics.get(lang); results.add(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false)); if(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false) == null){ LOG.warn(String.format("error calculating SR for universal page %d %s and %d %s", c1.getUnivId(), c1.getBestEnglishTitle(lpDao, true), c2.getUnivId(), c2.getBestEnglishTitle(lpDao, true))); } } writeRow(c1, c2, results); }
@Override public UniversalPage transform(Object obj) { ClusterResult curCluster = (ClusterResult)obj; return new UniversalPage(curCluster.univId, getId(), NameSpace.ARTICLE, curCluster.vertices); } };
@Override public boolean equals(Object o) { if (o instanceof UniversalPage) { UniversalPage other = (UniversalPage) o; return (this.getUnivId() == other.getUnivId() && this.getAlgorithmId() == other.getAlgorithmId()); } else { return false; } }
private void buildIndices(Iterator<UniversalPage> uPages){ while(uPages.hasNext()){ UniversalPage uPage = uPages.next(); for(LocalId localId : uPage.getLocalEntities()){ localId2ItemIdIndex.put(localId, uPage.getUnivId()); itemId2LocalIdIndex.put(uPage.getUnivId(), localId); } } }
private void writeRow(UniversalPage c1, UniversalPage c2, Integer KNNDistance, List<SRResult> results) throws WikiBrainException, IOException { Title t1 = c1.getBestEnglishTitle(lpDao, true); Title t2 = c2.getBestEnglishTitle(lpDao, true); String[] rowEntries = new String[5 + langs.size()]; rowEntries[0] = t1.getCanonicalTitle(); rowEntries[1] = String.valueOf(c1.getUnivId()); rowEntries[2] = t2.getCanonicalTitle(); rowEntries[3] = String.valueOf(c2.getUnivId()); rowEntries[4] = String.valueOf(KNNDistance); int counter = 0; for (SRResult result : results) { rowEntries[5 + counter] = String.valueOf(result.getScore()); counter ++; } output.writeNext(rowEntries); output.flush(); //if(CSVRowCounter % 1000 == 0 // LOG.info("Finished writing to CSV Row " + CSVRowCounter); //} }
public Title getBestEnglishTitle(LocalPageDao lpDao, boolean returnRandomLangIfEnglishNotAvailable) throws WikiBrainException { try { Language lang = getLanguageSet().getBestAvailableEnglishLang(returnRandomLangIfEnglishNotAvailable); LocalPage lp = lpDao.getById(lang, getLocalEntities(lang).iterator().next().getId()); return lp.getTitle(); }catch(DaoException e){ throw new WikiBrainException(e); } }
private void evaluateOneSample() throws DaoException, WikiBrainException, IOException { UniversalPage c1 = concepts.get(random.nextInt(concepts.size())); UniversalPage c2 = concepts.get(random.nextInt(concepts.size())); List<SRResult> results = new ArrayList<SRResult>(); for (Language lang : langs) { SRMetric sr = metrics.get(lang); results.add(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false)); } writeRow(c1, c2, results); }
public static void main(String args[]) throws ConfigurationException, DaoException { // Setup environment Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); UniversalPageDao conceptDao = env.getConfigurator().get(UniversalPageDao.class); // Get local and universal pages LocalPage page = pageDao.getByTitle(Language.EN, "Apple"); UniversalPage concept = conceptDao.getByLocalPage(page); // Translate to other languages. System.out.format("%s in other languages:\n", page.getTitle()); for (Language lang : concept.getLanguageSet()) { LocalPage page2 = pageDao.getById(lang, concept.getLocalId(lang)); System.out.format("%s: %s\n", lang.toString(), page2.getTitle().getCanonicalTitle()); } }
if(counter % 1000 == 0) LOG.info(String.format("Evaluating %d out of %d pairs", counter, total_size)); if(c1.equals(c2)) continue; try { for (Language lang : langs) { SRMetric sr = metrics.get(lang); results.add(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false)); LOG.warn(String.format("Error evaluating between %s and %s", c1.getBestEnglishTitle(lpDao, true), c2.getBestEnglishTitle(lpDao, true)));
public static void main() throws DaoException, ConfigurationException { // Prepare the environment; set the root to the current directory ("."). Env env = new EnvBuilder() .setBaseDir(".") .build(); // Get the configurator that creates components and a phraze analyzer from it Configurator configurator = env.getConfigurator(); PhraseAnalyzer pa = configurator.get(PhraseAnalyzer.class); UniversalPageDao dao = configurator.get(UniversalPageDao.class); // get the most common phrases in simple Language simple = Language.getByLangCode("simple"); // simple english LinkedHashMap<LocalId, Float> resolution = pa.resolve(simple, "apple", 5); // show the closest pages System.out.println("meanings of apple:"); for (LocalId p : resolution.keySet()) { System.out.println("\t" + p + ": " + resolution.get(p)); // translate them... UniversalPage concept = dao.getByLocalPage(p.asLocalPage()); //UniversalPage concept = dao.getByLocalPage(new Local, 1); for (LocalId id : concept.getLocalEntities()) { System.out.println("\t\tin language " + id.getLanguage() + " is " + id); } } } }
@Override public Set<LocalId> pagesWithValue(String propertyName, WikidataValue value, Language language) throws DaoException { Set<LocalId> ids = new HashSet<LocalId>(); for (int conceptId : conceptsWithValue(propertyName, value)) { UniversalPage up = upDao.getById(conceptId); if (up != null && up.isInLanguage(language)) { ids.add(new LocalId(language, up.getLocalId(language))); } } return ids; } @Override
System.out.println(i.toString() + " " + wdDao.getUniversalPage(i).getBestEnglishTitle(lpDao, true).getCanonicalTitle() + " " + distanceMetrics.getDistance(resMap.get(i), rootPoint) + " km");
if(counter % 1000 == 0) LOG.info(String.format("Evaluating %d out of %d pairs", counter, total_size)); if(c1.equals(c2)) continue; List<SRResult> results = new ArrayList<SRResult>(); for (Language lang : langs) { SRMetric sr = metrics.get(lang); results.add(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false));
if (concept != null && concept.hasAllLanguages(new LanguageSet(langs))) { concepts.add(concept); Geometry g1 = geometries.get(conceptId); counter ++; if(counter % 1 == 0){ LOG.info(String.format("Processing the %d th polygon : %s out of %d", counter, upDao.getById(i.getKey()).getBestEnglishTitle(lpDao, true).getCanonicalTitle(), polygons.size()));
public int getLocalId(Language language) { if (isInLanguage(language)) { return localEntities.get(language).iterator().next().getId(); } else { return -1; } }
private void writeRow(UniversalPage c1, UniversalPage c2, Integer KNNDistance, List<SRResult> results) throws WikiBrainException, IOException { Title t1 = c1.getBestEnglishTitle(lpDao, true); Title t2 = c2.getBestEnglishTitle(lpDao, true); String[] rowEntries = new String[5 + langs.size()]; rowEntries[0] = t1.getCanonicalTitle(); rowEntries[1] = String.valueOf(c1.getUnivId()); rowEntries[2] = t2.getCanonicalTitle(); rowEntries[3] = String.valueOf(c2.getUnivId()); rowEntries[4] = String.valueOf(KNNDistance); int counter = 0; for (SRResult result : results) { rowEntries[5 + counter] = String.valueOf(result.getScore()); counter ++; } output.writeNext(rowEntries); output.flush(); //if(CSVRowCounter % 1000 == 0 // LOG.info("Finished writing to CSV Row " + CSVRowCounter); //} }
public String formatExplanation(Explanation explanation) throws DaoException { String[] plaintextBuilder = explanation.getFormat().split("\\?", -1); if (explanation.getInformation().size()!=plaintextBuilder.length-1){ throw new IllegalStateException("Incorrect number of information objects in explanation. Expected "+(plaintextBuilder.length-1)+" but found "+explanation.getInformation().size()); } String plaintext = plaintextBuilder[0]; for (int i = 1; i<plaintextBuilder.length; i++){ Object object = explanation.getInformation().get(i-1); //Handle the different possible types of information. //Add additional handlers as appropriate if (object instanceof LocalPage){ plaintext+=((LocalPage) object).getTitle().getCanonicalTitle(); }else if(object instanceof UniversalPage){ Language defaultlang = ((UniversalPage) object).getLanguageSet().getDefaultLanguage(); LocalId nameId = (LocalId)((UniversalPage) object).getLocalEntities(defaultlang).toArray()[0]; LocalPage namePage = localPageDao.getById(nameId.getLanguage(), nameId.getId()); plaintext+=namePage.getTitle().getCanonicalTitle(); }else { plaintext+=object.toString(); } plaintext+=plaintextBuilder[i]; } return plaintext; }
public void evaluateOneSample() throws DaoException, WikiBrainException, IOException { UniversalPage c1 = concepts.get(random.nextInt(concepts.size())); UniversalPage c2 = concepts.get(random.nextInt(concepts.size())); List<SRResult> results = new ArrayList<SRResult>(); for (Language lang : langs) { SRMetric sr = metrics.get(lang); results.add(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false)); } writeRow(c1, c2, results); }
public static void main(String args[]) throws ConfigurationException, DaoException { // Setup environment Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); UniversalPageDao conceptDao = env.getConfigurator().get(UniversalPageDao.class); // Get local and universal pages LocalPage page = pageDao.getByTitle(Language.EN, "Apple"); UniversalPage concept = conceptDao.getByLocalPage(page); // Translate to other languages. System.out.format("%s in other languages:\n", page.getTitle()); for (Language lang : concept.getLanguageSet()) { LocalPage page2 = pageDao.getById(lang, concept.getLocalId(lang)); System.out.format("%s: %s\n", lang.toString(), page2.getTitle().getCanonicalTitle()); } }