/** * Extract the lemma attribute. * * @param node * @return */ private List<String> getLemma(Element node) { String lemma = node.getAttribute(ATTR_LEMMA); if (lemma == null || lemma.equals("")) return null; return getWordString(lemma); }
String posStr = getPOS(eRoot); posStr = treeNormalizer.normalizeNonterminal(posStr); List<String> lemmas = getLemma(eRoot); String morph = getMorph(eRoot); List<String> leafToks = getWordString(eRoot.getTextContent().trim()); for(Node childNode = eRoot.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) { if(childNode.getNodeType() != Node.ELEMENT_NODE) continue; Tree t = getTreeFromXML(childNode); if(t == null) { System.err.printf("%s: Discarding empty tree (root: %s)%n", this.getClass().getName(),childNode.getNodeName()); t = postProcessMWE(t);
public Tree readTree() { Tree t = null; while(t == null && sentences != null && sentIdx < sentences.getLength()) { Node sentRoot = sentences.item(sentIdx++); t = getTreeFromXML(sentRoot); if(t != null) { t = treeNormalizer.normalizeWholeTree(t, treeFactory); if(t.label() instanceof CoreLabel) { String ftbId = ((Element) sentRoot).getAttribute(ATTR_NUMBER); HasIndex lab = (HasIndex) t.label(); lab.setDocID(ftbId); } } } return t; }
public TreeReader newTreeReader(Reader in) { if(readPennFormat) { return new PennTreeReader(in, new LabeledScoredTreeFactory(), new FrenchTreeNormalizer(),new PennTreebankTokenizer(in)); } return new FrenchTreeReader(in); } }