protected void setEnWikiTitle(Article article, ParsedPage page) { if (article.isLang(Language.EN)) { return; } try { if (page.getLanguages() == null) { article.setEnWikiTitle(""); return; } } catch (final NullPointerException e) { // FIXME title is always null! logger.warn("no languages for page {} ", article.getTitle()); return; } for (final de.tudarmstadt.ukp.wikipedia.parser.Link l : page.getLanguages()) { if (l.getText().startsWith("en:")) { article.setEnWikiTitle(l.getTarget().substring(3)); break; } } }
private void parseIlls(RawPage xml, ParsedPage pp) { if (pp.getLanguagesElement() != null){ for (Link ill : pp.getLanguages()){ try{ Matcher m = illPattern.matcher(ill.getTarget());
List<Link> languageLinks = ppage.getLanguages(); for(Link link : languageLinks)
/** * Prints the targets of the internal links found in the page <i>Germany</i>. * @param args * @throws WikiApiException */ public static void main(String[] args) throws WikiApiException { // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt") String documentText = TestFile.getFileText(); // get a ParsedPage object MediaWikiParserFactory pf = new MediaWikiParserFactory(); MediaWikiParser parser = pf.createParser(); ParsedPage pp = parser.parse(documentText); // only the links to other Wikipedia language editions for (Link language : pp.getLanguages()) { System.out.println(language.getTarget()); } //get the internal links of each section for (Section section : pp.getSections()){ System.out.println("Section: " + section.getTitle()); for (Link link : section.getLinks(Link.type.INTERNAL)) { System.out.println(" " + link.getTarget()); } } } }