@Override public Integer getItemId(LocalId localId) throws DaoException { return upDao.getUnivPageId(localId.getLanguage(), localId.getId()); }
@Override public Iterable<LocalLink> getLinks(Language language, int localId, boolean outlinks) throws DaoException { LocalId id = new LocalId(language, localId); if (!id.canPackInInt()) { return delegate.getLinks(language, localId, outlinks); SparseMatrixRow row = outlinks ? matrix.getRow(id.toInt()) : transpose.getRow(id.toInt()); if (row == null) { return links; LocalId lid = LocalId.fromInt(row.getColIndex(i)); int srcId = outlinks ? localId : lid.getId(); int destId = outlinks ? lid.getId() : localId; LocalLink ll = new LocalLink( lid.getLanguage(), null, srcId,
@Override public void save(LocalLink item) throws DaoException { if (delegate != null) delegate.save(item); // skip red links if (item.getDestId() < 0 || item.getSourceId() < 0) { return; } LocalId src = new LocalId(item.getLanguage(), item.getSourceId()); LocalId dest = new LocalId(item.getLanguage(), item.getDestId()); if (!src.canPackInInt() || !dest.canPackInInt()) { return; } try { BufferedWriter writer = getSortingWriter(); writer.write(src.toInt() + " " + dest.toInt() + "\n"); } catch (IOException e) { throw new DaoException(e); } }
continue; LocalId src = new LocalId(ll.getLanguage(), ll.getSourceId()); LocalId dest = new LocalId(ll.getLanguage(), ll.getDestId()); if (!outGraph.containsKey(src)) { outGraph.put(src, new HashSet<LocalId>()); Set<LocalId> expected = outGraph.get(src); Set<LocalId> actual = new HashSet<LocalId>(); for (LocalLink ll : matrixDao.getLinks(src.getLanguage(), src.getId(), true)) { if (ll.getSourceId() < 0 || ll.getDestId() < 0) { continue; actual.add(new LocalId(ll.getLanguage(), ll.getDestId()));
public int getLocalId(Language language) { if (isInLanguage(language)) { return localEntities.get(language).iterator().next().getId(); } else { return -1; } }
private void loadOneFile(DateTime tstamp, File file, LanguageSet langs, AtomicInteger[] counters) { PageViewReader reader = new PageViewReader(file, langs); for (RawPageView view : reader) { try { counters[0].getAndIncrement(); int id = pageDao.getIdByTitle(view.getTitle()); if (id >= 0) { counters[1].incrementAndGet(); PageView pv = new PageView( new LocalId(view.getLanguage(), id), tstamp.toDate(), view.getViews()); save(pv); metaDao.incrementRecords(PageView.class, pv.getPageId().getLanguage()); } } catch (DaoException e) { metaDao.incrementErrorsQuietly(PageView.class); e.printStackTrace(); } } }
@Override public List<LinkedHashMap<LocalId, Float>> disambiguate(List<LocalString> phrases, Set<LocalString> context) throws DaoException { if (phrases.isEmpty()) { return new ArrayList<LinkedHashMap<LocalId, Float>>(); } Language lang = phrases.get(0).getLanguage(); List<LinkedHashMap<LocalId, Float>> results = new ArrayList<LinkedHashMap<LocalId, Float>>(); for (LocalString phrase : phrases) { Map<Integer, Double> pageSums = new HashMap<Integer, Double>(); for (PhraseAnalyzer pa : phraseAnalyzers) { LinkedHashMap<LocalId, Float> probs = pa.resolve(phrase.getLanguage(), phrase.getString(), 20); for (Map.Entry<LocalId, Float> entry : probs.entrySet()) { int id = entry.getKey().getId(); if (pageSums.containsKey(id)) { pageSums.put(id, pageSums.get(id) + entry.getValue()); } else { pageSums.put(id, (double)entry.getValue()); } } } LinkedHashMap<LocalId, Float> pageResult = new LinkedHashMap<LocalId, Float>(); for (Integer key : WpCollectionUtils.sortMapKeys(pageSums, true)) { pageResult.put(new LocalId(lang, key), pageSums.get(key).floatValue()); } results.add(pageResult); } return results; }
LocalId src = LocalId.fromInt(row.getRowIndex()); LangRanks lr = ranks.get(src.getLanguage()); if (lr == null) { lr = new LangRanks(); ranks.put(src.getLanguage(), lr); lr.pageSums.put(src.getId(), 1.0); int ncols = row.getNumCols(); if (ncols == 0) continue; LocalId src = LocalId.fromInt(row.getRowIndex()); LangRanks lr = ranks.get(src.getLanguage()); double w = lr.pageSums.get(src.getId()) / ncols; for (int j = 0; j < ncols; j++) { LocalId dest = LocalId.fromInt(row.getColIndex(j)); if (dest.getLanguage() == src.getLanguage()) { lr.nextSums.adjustOrPutValue(dest.getId(), w, w);
public static void main() throws DaoException, ConfigurationException { // Prepare the environment; set the root to the current directory ("."). Env env = new EnvBuilder() .setBaseDir(".") .build(); // Get the configurator that creates components and a phraze analyzer from it Configurator configurator = env.getConfigurator(); PhraseAnalyzer pa = configurator.get(PhraseAnalyzer.class); UniversalPageDao dao = configurator.get(UniversalPageDao.class); // get the most common phrases in simple Language simple = Language.getByLangCode("simple"); // simple english LinkedHashMap<LocalId, Float> resolution = pa.resolve(simple, "apple", 5); // show the closest pages System.out.println("meanings of apple:"); for (LocalId p : resolution.keySet()) { System.out.println("\t" + p + ": " + resolution.get(p)); // translate them... UniversalPage concept = dao.getByLocalPage(p.asLocalPage()); //UniversalPage concept = dao.getByLocalPage(new Local, 1); for (LocalId id : concept.getLocalEntities()) { System.out.println("\t\tin language " + id.getLanguage() + " is " + id); } } } }
outCounter.put(curSource,new SummingHashMap<Integer>()); for(LocalId curDest : ills.get(curSource)){ if (!outCounter.get(curSource).containsKey(curDest.getLanguage().getId())){ outCounter.get(curSource).addValue(new Integer(curDest.getLanguage().getId()), 1.0); outFoundLinks.put(curSource, curDest); }else{ if(!outFoundLinks.get(curSource).equals(curDest)){ // prevent duplicates from counting as second links outCounter.get(curSource).addValue(new Integer(curDest.getLanguage().getId()), 1.0); counter.put(curDest, new SummingHashMap<Integer>()); counter.get(curDest).addValue(new Integer(curSource.getLanguage().getId()), 1.0); graph.addVertex(curSource); for (LocalId curDest : ills.get(curSource)){ if (outCounter.get(curSource).get(new Integer(curDest.getLanguage().getId())) <= maxVotesPerLang){ int totalVotes = counter.get(curDest).keySet().size(); if (totalVotes >= minVotes){ if (counter.get(curDest).get(new Integer(curSource.getLanguage().getId())) <= maxVotesPerLang){ graph.addEdge(edgeCounter++, curSource, curDest); sb.append(","); }catch(DaoException e){ LOG.error("Error while getting title of LocalId: " + clusterMemb.toString());
public ClusterResult(Integer univId, Collection<LocalId> vertexCollection){ Multimap<Language, LocalId> mmap = HashMultimap.create(); for(LocalId curVertex : vertexCollection){ mmap.put(curVertex.getLanguage(), curVertex); } this.univId = univId; this.vertices = mmap; } }
@Override public double score(LocalId candidate, Map<String, String> row, Geometry geometry) throws DaoException { for (WikidataStatement st : wikidataDao.getStatements(candidate.asLocalPage())) { if (st.getProperty() == null || st.getProperty().getId() != INSTANCE_OF_PROPERTY) { continue; } if (st.getValue().getType() == WikidataValue.Type.ITEM) { String name = getPropertyName(st.getValue().getIntValue()); if (name != null && instanceOfKeywords.contains(name.toLowerCase())) { return 1.0; } } } return 0.0; }
if (tokens.length == 2){ cellCount++; LocalId src = LocalId.fromInt(Integer.valueOf(tokens[0])); LocalId dest = LocalId.fromInt(Integer.valueOf(tokens[1])); if (lastSrc != null && !src.equals(lastSrc)) { if (++rowCount % 100000 == 0) { LOG.info("writing adjacency matrix row " + rowCount lastSrc.toInt(), packedDest.toArray(), new short[packedDest.size()] packedDest.clear(); packedDest.add(dest.toInt()); lastSrc = src; } else { SparseMatrixRow row = new SparseMatrixRow( vconf, lastSrc.toInt(), packedDest.toArray(), new short[packedDest.size()]
continue; LocalId src = new LocalId(ll.getLanguage(), ll.getSourceId()); LocalId dest = new LocalId(ll.getLanguage(), ll.getDestId()); if (!outGraph.containsKey(src)) { outGraph.put(src, new HashSet<LocalId>()); Set<LocalId> expected = outGraph.get(src); Set<LocalId> actual = new HashSet<LocalId>(); for (LocalLink ll : matrixDao.getLinks(src.getLanguage(), src.getId(), true)) { if (ll.getSourceId() < 0 || ll.getDestId() < 0) { continue; actual.add(new LocalId(ll.getLanguage(), ll.getDestId()));
@Override public SRResult similarity(String phrase1, String phrase2, boolean explanations) throws DaoException { Language language = getLanguage(); List<LocalString> phrases = Arrays.asList( new LocalString(language, phrase1), new LocalString(language, phrase2)); // debugSimilarityDisambiguator(phrases); List<LocalId> resolutions = disambiguator.disambiguateTop(phrases, null); if (resolutions.get(0) == null || resolutions.get(1) == null) { return new SRResult(); } // LocalPage lp1 = localPageDao.getById(language, resolutions.get(0).getId()); // LocalPage lp2 = localPageDao.getById(language, resolutions.get(1).getId()); // System.out.println("resolved " + phrase1 + ", " + phrase2 + " to " + lp1 + ", " + lp2); return similarity(resolutions.get(0).getId(), resolutions.get(1).getId(), explanations); }
private void loadOneFile(DateTime tstamp, File file, LanguageSet langs, AtomicInteger[] counters) { PageViewReader reader = new PageViewReader(file, langs); for (RawPageView view : reader) { try { counters[0].getAndIncrement(); int id = pageDao.getIdByTitle(view.getTitle()); if (id >= 0) { counters[1].incrementAndGet(); PageView pv = new PageView( new LocalId(view.getLanguage(), id), tstamp.toDate(), view.getViews()); save(pv); metaDao.incrementRecords(PageView.class, pv.getPageId().getLanguage()); } } catch (DaoException e) { metaDao.incrementErrorsQuietly(PageView.class); e.printStackTrace(); } } }
int i = 0; for (LocalId id1 : candidates.keySet()) { SRResultList sr = metric.mostSimilar(id1.getId(), numCands * 2); if (sr != null && sr.numDocs() > 0) { for (int j = 0; j < numPerCand && j < sr.numDocs(); j++) { expanded.put(new LocalId(language, sr.getId(j)), (float)(sr.getScore(j) * candidates.get(id1)));
private List<Integer> getPackedIds(DaoFilter filter) { if (filter.getSourceIds() != null && filter.getDestIds() != null) { throw new IllegalArgumentException(); } Collection<Integer> ids = (filter.getSourceIds() != null) ? filter.getSourceIds() : filter.getDestIds(); if (ids == null) { throw new IllegalArgumentException(); } List<Integer> packed = new ArrayList<Integer>(); for (int langId : filter.getLangIds()) { for (int id : ids) { LocalId lid = new LocalId(Language.getById(langId), id); if (!lid.canPackInInt()) { return null; } packed.add(lid.toInt()); } } return packed; }