@Override public void save(WikidataEntity entity) throws DaoException { for (Map.Entry<Language, String> entry : entity.getLabels().entrySet()) { labelLoader.load(entity.getType().code, entity.getId(), entry.getKey().getId(), entry.getValue()); } for (Map.Entry<Language, String> entry : entity.getDescriptions().entrySet()) { descLoader.load(entity.getType().code, entity.getId(), entry.getKey().getId(), entry.getValue()); } for (Map.Entry<Language, List<String>> entry : entity.getAliases().entrySet()) { for (String alias : entry.getValue()) { aliasLoader.load(entity.getType().code, entity.getId(), entry.getKey().getId(), alias); } } for (WikidataStatement stmt : entity.getStatements()) { save(stmt); } if (entity.getType() == WikidataEntity.Type.PROPERTY) { synchronized (properties) { properties.put(entity.getId(), entity); } } }
private void save(File file, String json) throws WpParseException, DaoException { if (!json.contains("{")) { return; } json = json.trim(); if (json.endsWith(",")) { json = json.substring(0, json.length()-1); } if (counter.incrementAndGet() % 100000 == 0) { LOG.info("processing wikidata entity " + counter.get()); } WikidataEntity entity = wdParser.parse(json); // check if others use prune's boolean? entity.prune(languages); if (keepEntity(entity)) { wikidataDao.save(entity); } }
/** * Prunes a WikiData entity to the specified languages. * Returns true IFF a label, description, or alias exists in one of the specified languages. * @param langs * @return */ public boolean prune(LanguageSet langs) { pruneSet(aliases.keySet(), langs); pruneSet(descriptions.keySet(), langs); pruneSet(labels.keySet(), langs); return (aliases.size() > 0 || descriptions.size() > 0 || labels.size() > 0); }
public Collection<WikidataEntity> getPropertyByName(Language language, String name) throws DaoException { List<WikidataEntity> matches = new ArrayList<WikidataEntity>(); Map<Integer, WikidataEntity> props = getProperties(); for (WikidataEntity e : props.values()) { if (e.getAliases().containsKey(language) && e.getAliases().get(language).contains(name)) { matches.add(e); } else if (e.getLabels().containsKey(language) && e.getLabels().get(language).contains(name)) { matches.add(e); } } return matches; }
WikidataEntity record = new WikidataEntity(mwDoc.getEntityId().getId()); if (!validLanguage(vlist.get(0).getLanguageCode())) continue; Language lang = Language.getByLangCodeLenient(vlist.get(0).getLanguageCode()); record.getAliases().put(lang, new ArrayList<String>()); for (MonolingualTextValue v : vlist) { record.getAliases().get(lang).add(v.getText()); if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getDescriptions().put(lang, v.getText()); if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getLabels().put(lang, v.getText()); for (List<JacksonStatement> statements : ((JacksonItemDocument)mwDoc).getJsonClaims().values()) { for (JacksonStatement s : statements) { record.getStatements().add(parseOneClaim(record, s));
@Override public Iterable<WikidataStatement> getByValue(String propertyName, WikidataValue value) throws DaoException { Set<Integer> propIds = new HashSet<Integer>(); for (WikidataEntity e : getPropertyByName(propertyName)) { propIds.add(e.getId()); } if (propIds.isEmpty()) { return new ArrayList<WikidataStatement>(); } WikidataFilter filter = new WikidataFilter.Builder() .withPropertyIds(propIds) .withValue(value) .build(); return get(filter); }
@Override public Set<Integer> conceptsWithValue(String propertyName, WikidataValue value) throws DaoException { Set<Integer> concepts = new HashSet<Integer>(); for (WikidataStatement st : getByValue(propertyName, value)) { if (st.getItem().getType() == WikidataEntity.Type.ITEM) { concepts.add(st.getItem().getId()); } } return concepts; }
@Override public WikidataEntity getProperty(Language language, String name) throws DaoException { name = name.toLowerCase(); for (WikidataEntity entity : getProperties().values()) { String ename = entity.getLabels().get(language); if (ename != null && ename.toLowerCase().equals(name)) { return entity; } } return null; }
protected WikidataStatement buildStatement(Record record) throws DaoException { if (record == null) { return null; } WikidataEntity item = new WikidataEntity( WikidataEntity.Type.getByCode(record.getValue(Tables.WIKIDATA_STATEMENT.ENTITY_TYPE).charAt(0)), record.getValue(Tables.WIKIDATA_STATEMENT.ENTITY_ID) ); WikidataEntity prop = getProperty(record.getValue(Tables.WIKIDATA_STATEMENT.PROP_ID)); Short rankOrdinal = record.getValue(Tables.WIKIDATA_STATEMENT.RANK); JsonElement json = new JsonParser().parse(record.getValue(Tables.WIKIDATA_STATEMENT.VAL_STR)); WikidataValue val; try { val = parser.jsonToValue( record.getValue(Tables.WIKIDATA_STATEMENT.VAL_TYPE), json); } catch (WpParseException e) { throw new DaoException(e); } WikidataStatement stmt = new WikidataStatement( record.getValue(Tables.WIKIDATA_STATEMENT.ID), item, prop, val, WikidataStatement.Rank.values()[rankOrdinal] ); return stmt; }
WikidataEntity record = new WikidataEntity(mwDoc.getEntityId().getId()); if (!validLanguage(vlist.get(0).getLanguageCode())) continue; Language lang = Language.getByLangCodeLenient(vlist.get(0).getLanguageCode()); record.getAliases().put(lang, new ArrayList<String>()); for (MonolingualTextValue v : vlist) { record.getAliases().get(lang).add(v.getText()); if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getDescriptions().put(lang, v.getText()); if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getLabels().put(lang, v.getText()); for (List<JacksonStatement> statements : ((JacksonItemDocument)mwDoc).getJsonClaims().values()) { for (JacksonStatement s : statements) { record.getStatements().add(parseOneClaim(record, s));
@Override public Iterable<WikidataStatement> getByValue(String propertyName, WikidataValue value) throws DaoException { Set<Integer> propIds = new HashSet<Integer>(); for (WikidataEntity e : getPropertyByName(propertyName)) { propIds.add(e.getId()); } if (propIds.isEmpty()) { return new ArrayList<WikidataStatement>(); } WikidataFilter filter = new WikidataFilter.Builder() .withPropertyIds(propIds) .withValue(value) .build(); return get(filter); }
@Override public Set<Integer> conceptsWithValue(String propertyName, WikidataValue value) throws DaoException { Set<Integer> concepts = new HashSet<Integer>(); for (WikidataStatement st : getByValue(propertyName, value)) { if (st.getItem().getType() == WikidataEntity.Type.ITEM) { concepts.add(st.getItem().getId()); } } return concepts; }
@Override public WikidataEntity getProperty(Language language, String name) throws DaoException { name = name.toLowerCase(); for (WikidataEntity entity : getProperties().values()) { String ename = entity.getLabels().get(language); if (ename != null && ename.toLowerCase().equals(name)) { return entity; } } return null; }
public Collection<WikidataEntity> getPropertyByName(Language language, String name) throws DaoException { List<WikidataEntity> matches = new ArrayList<WikidataEntity>(); Map<Integer, WikidataEntity> props = getProperties(); for (WikidataEntity e : props.values()) { if (e.getAliases().containsKey(language) && e.getAliases().get(language).contains(name)) { matches.add(e); } else if (e.getLabels().containsKey(language) && e.getLabels().get(language).contains(name)) { matches.add(e); } } return matches; }
protected WikidataStatement buildStatement(Record record) throws DaoException { if (record == null) { return null; } WikidataEntity item = new WikidataEntity( WikidataEntity.Type.getByCode(record.getValue(Tables.WIKIDATA_STATEMENT.ENTITY_TYPE).charAt(0)), record.getValue(Tables.WIKIDATA_STATEMENT.ENTITY_ID) ); WikidataEntity prop = getProperty(record.getValue(Tables.WIKIDATA_STATEMENT.PROP_ID)); Short rankOrdinal = record.getValue(Tables.WIKIDATA_STATEMENT.RANK); JsonElement json = new JsonParser().parse(record.getValue(Tables.WIKIDATA_STATEMENT.VAL_STR)); WikidataValue val; try { val = parser.jsonToValue( record.getValue(Tables.WIKIDATA_STATEMENT.VAL_TYPE), json); } catch (WpParseException e) { throw new DaoException(e); } WikidataStatement stmt = new WikidataStatement( record.getValue(Tables.WIKIDATA_STATEMENT.ID), item, prop, val, WikidataStatement.Rank.values()[rankOrdinal] ); return stmt; }
@Override public void save(WikidataEntity entity) throws DaoException { for (Map.Entry<Language, String> entry : entity.getLabels().entrySet()) { labelLoader.load(entity.getType().code, entity.getId(), entry.getKey().getId(), entry.getValue()); } for (Map.Entry<Language, String> entry : entity.getDescriptions().entrySet()) { descLoader.load(entity.getType().code, entity.getId(), entry.getKey().getId(), entry.getValue()); } for (Map.Entry<Language, List<String>> entry : entity.getAliases().entrySet()) { for (String alias : entry.getValue()) { aliasLoader.load(entity.getType().code, entity.getId(), entry.getKey().getId(), alias); } } for (WikidataStatement stmt : entity.getStatements()) { save(stmt); } if (entity.getType() == WikidataEntity.Type.PROPERTY) { synchronized (properties) { properties.put(entity.getId(), entity); } } }