/** * @param file */ public WikidataDumpParser(File file, LanguageSet languages) { this.file = file; this.languages = languages; this.wdParser = new WikidataParser(); }
@Override public WikidataValue visit(QuantityValue value) { return new WikidataValue(type, gsonToPrimitive(jsonValue), jsonValue); }
private void fillBuff() { if (buff != null) { return; } // try to queue up the next article while (buff == null && iterImpl.hasNext()) { String line = iterImpl.next(); if (line.trim().equals("[") || line.trim().equals("]")) { continue; } try { if (line.endsWith(",")) { line = line.substring(0, line.length()-1); } if (!line.trim().isEmpty()) { buff = wdParser.parse(line); } } catch (Exception e) { LOG.warn("parsing of " + file + " failed for line '" + line + "':", e); } } }
if (!validLanguage(vlist.get(0).getLanguageCode())) continue; Language lang = Language.getByLangCodeLenient(vlist.get(0).getLanguageCode()); record.getAliases().put(lang, new ArrayList<String>()); if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getDescriptions().put(lang, v.getText()); if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getLabels().put(lang, v.getText()); for (List<JacksonStatement> statements : ((JacksonItemDocument)mwDoc).getJsonClaims().values()) { for (JacksonStatement s : statements) { record.getStatements().add(parseOneClaim(record, s));
JacksonValueSnak snak = (JacksonValueSnak)js.getMainsnak(); valTypeStr = ((JacksonValue)snak.getValue()).getType(); value = snakToValue(valTypeStr, snak.getValue()); } else { value = jsonToValue(valTypeStr, jsonVal);
public WikidataValue jsonToValue(String type, JsonElement element) throws WpParseException { if (type.equals("somevalue")) { return new WikidataValue(WikidataValue.Type.SOMEVALUE, null, JsonNull.INSTANCE); } else if (type.equals("novalue")) { return new WikidataValue(WikidataValue.Type.NOVALUE, null, JsonNull.INSTANCE); } else if (type.equals("item") || type.equals("property")) { type = "wikibase-entityid"; } String fullJson = "{ \"type\" : \"" + type + "\", \"value\" : " + element.toString() + " }"; try { Value snak = mapper.readValue(fullJson, JacksonValue.class); return snakToValue(type, snak); } catch (IOException e) { throw new WpParseException(e); } }
protected WikidataStatement buildStatement(Record record) throws DaoException { if (record == null) { return null; } WikidataEntity item = new WikidataEntity( WikidataEntity.Type.getByCode(record.getValue(Tables.WIKIDATA_STATEMENT.ENTITY_TYPE).charAt(0)), record.getValue(Tables.WIKIDATA_STATEMENT.ENTITY_ID) ); WikidataEntity prop = getProperty(record.getValue(Tables.WIKIDATA_STATEMENT.PROP_ID)); Short rankOrdinal = record.getValue(Tables.WIKIDATA_STATEMENT.RANK); JsonElement json = new JsonParser().parse(record.getValue(Tables.WIKIDATA_STATEMENT.VAL_STR)); WikidataValue val; try { val = parser.jsonToValue( record.getValue(Tables.WIKIDATA_STATEMENT.VAL_TYPE), json); } catch (WpParseException e) { throw new DaoException(e); } WikidataStatement stmt = new WikidataStatement( record.getValue(Tables.WIKIDATA_STATEMENT.ID), item, prop, val, WikidataStatement.Rank.values()[rankOrdinal] ); return stmt; }
if (!validLanguage(vlist.get(0).getLanguageCode())) continue; Language lang = Language.getByLangCodeLenient(vlist.get(0).getLanguageCode()); record.getAliases().put(lang, new ArrayList<String>()); if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getDescriptions().put(lang, v.getText()); if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getLabels().put(lang, v.getText()); for (List<JacksonStatement> statements : ((JacksonItemDocument)mwDoc).getJsonClaims().values()) { for (JacksonStatement s : statements) { record.getStatements().add(parseOneClaim(record, s));
JacksonValueSnak snak = (JacksonValueSnak)js.getMainsnak(); valTypeStr = ((JacksonValue)snak.getValue()).getType(); value = snakToValue(valTypeStr, snak.getValue()); } else { value = jsonToValue(valTypeStr, jsonVal);
public WikidataValue jsonToValue(String type, JsonElement element) throws WpParseException { if (type.equals("somevalue")) { return new WikidataValue(WikidataValue.Type.SOMEVALUE, null, JsonNull.INSTANCE); } else if (type.equals("novalue")) { return new WikidataValue(WikidataValue.Type.NOVALUE, null, JsonNull.INSTANCE); } else if (type.equals("item") || type.equals("property")) { type = "wikibase-entityid"; } String fullJson = "{ \"type\" : \"" + type + "\", \"value\" : " + element.toString() + " }"; try { Value snak = mapper.readValue(fullJson, JacksonValue.class); return snakToValue(type, snak); } catch (IOException e) { throw new WpParseException(e); } }
protected WikidataStatement buildStatement(Record record) throws DaoException { if (record == null) { return null; } WikidataEntity item = new WikidataEntity( WikidataEntity.Type.getByCode(record.getValue(Tables.WIKIDATA_STATEMENT.ENTITY_TYPE).charAt(0)), record.getValue(Tables.WIKIDATA_STATEMENT.ENTITY_ID) ); WikidataEntity prop = getProperty(record.getValue(Tables.WIKIDATA_STATEMENT.PROP_ID)); Short rankOrdinal = record.getValue(Tables.WIKIDATA_STATEMENT.RANK); JsonElement json = new JsonParser().parse(record.getValue(Tables.WIKIDATA_STATEMENT.VAL_STR)); WikidataValue val; try { val = parser.jsonToValue( record.getValue(Tables.WIKIDATA_STATEMENT.VAL_TYPE), json); } catch (WpParseException e) { throw new DaoException(e); } WikidataStatement stmt = new WikidataStatement( record.getValue(Tables.WIKIDATA_STATEMENT.ID), item, prop, val, WikidataStatement.Rank.values()[rankOrdinal] ); return stmt; }
@Override public WikidataValue visit(GlobeCoordinatesValue value) { return new WikidataValue(type, gsonToPrimitive(jsonValue), jsonValue); }
private void fillBuff() { if (buff != null) { return; } // try to queue up the next article while (buff == null && iterImpl.hasNext()) { String line = iterImpl.next(); if (line.trim().equals("[") || line.trim().equals("]")) { continue; } try { if (line.endsWith(",")) { line = line.substring(0, line.length()-1); } if (!line.trim().isEmpty()) { buff = wdParser.parse(line); } } catch (Exception e) { LOG.warn("parsing of " + file + " failed for line '" + line + "':", e); } } }
/** * @param file */ public WikidataDumpParser(File file, LanguageSet languages) { this.file = file; this.languages = languages; this.wdParser = new WikidataParser(); }
@Override public WikidataValue visit(GlobeCoordinatesValue value) { return new WikidataValue(type, gsonToPrimitive(jsonValue), jsonValue); }
private void save(File file, String json) throws WpParseException, DaoException { if (!json.contains("{")) { return; } json = json.trim(); if (json.endsWith(",")) { json = json.substring(0, json.length()-1); } if (counter.incrementAndGet() % 100000 == 0) { LOG.info("processing wikidata entity " + counter.get()); } WikidataEntity entity = wdParser.parse(json); // check if others use prune's boolean? entity.prune(languages); if (keepEntity(entity)) { wikidataDao.save(entity); } }
@Override public WikidataValue visit(QuantityValue value) { return new WikidataValue(type, gsonToPrimitive(jsonValue), jsonValue); }
private void save(File file, String json) throws WpParseException, DaoException { if (!json.contains("{")) { return; } json = json.trim(); if (json.endsWith(",")) { json = json.substring(0, json.length()-1); } if (counter.incrementAndGet() % 100000 == 0) { LOG.info("processing wikidata entity " + counter.get()); } WikidataEntity entity = wdParser.parse(json); // check if others use prune's boolean? entity.prune(languages); if (keepEntity(entity)) { wikidataDao.save(entity); } }
List<Object> list = new ArrayList<Object>(); for (int i = 0; i < array.size(); i++) { list.add(gsonToPrimitive(array.get(i))); Map<String, Object> map = new HashMap<String, Object>(); for (Map.Entry<String, JsonElement> entry : element.getAsJsonObject().entrySet()) { map.put(entry.getKey(), gsonToPrimitive(entry.getValue()));
List<Object> list = new ArrayList<Object>(); for (int i = 0; i < array.size(); i++) { list.add(gsonToPrimitive(array.get(i))); Map<String, Object> map = new HashMap<String, Object>(); for (Map.Entry<String, JsonElement> entry : element.getAsJsonObject().entrySet()) { map.put(entry.getKey(), gsonToPrimitive(entry.getValue()));