private boolean contains(TreebankNode node, TreebankNode descendant) { if (node == descendant) { return true; } for (int i = 0; i < node.getChildren().size(); i++) { boolean result = this.contains(node.getChildren(i), descendant); if (result) { return true; } } return false; }
TreebankNode findHead(TreebankNode parentNode) { TreebankNode cursor = parentNode; while (cursor.getChildren() != null && cursor.getChildren().size() > 0) cursor = findHead2(cursor); return cursor; }
TreebankNode findHead2(TreebankNode parentNode) { List<TreebankNode> childNodes = Lists.newArrayList(JCasUtil.select( parentNode.getChildren(), TreebankNode.class)); List<String> childTypes = new ArrayList<String>(childNodes.size()); String parentType = parentNode.getNodeType(); for (TreebankNode childNode : childNodes) childTypes.add(childNode.getNodeType()); int headIndex = findHead3(parentType, childTypes); return childNodes.get(headIndex); }
public List<Feature> extract(JCas jCas, TreebankNode node) throws UnsupportedOperationException { TreebankNode parent = node.getParent(); if (parent == null) return new ArrayList<Feature>(); StringBuffer buffer = new StringBuffer(); buffer.append(parent.getNodeType() + "->"); boolean first = true; for (TreebankNode child : JCasUtil.select(parent.getChildren(), TreebankNode.class)) { if (!first) buffer.append("-"); buffer.append(child.getNodeType()); first = false; } return Collections.singletonList(new Feature(this.featureName, buffer)); } }
/** * Format the TreebankNode as a Penn-Treebank-style parenthesized string. * * @param node * The TreebankNode to be formatted. * @return A parenthesized Penn-Treebank-style string. */ public static String toTreebankString(TreebankNode node) { StringBuilder builder = new StringBuilder(); builder.append('(').append(node.getNodeType()); if (node.getLeaf()) { builder.append(' ').append(node.getCoveredText()); } else { for (TreebankNode child : JCasUtil.select(node.getChildren(), TreebankNode.class)) { builder.append(' ').append(toTreebankString(child)); } } builder.append(')'); return builder.toString(); }
private static void _initTerminalNodes( org.cleartk.syntax.constituent.type.TreebankNode node, List<TerminalTreebankNode> terminals) { FSArray children = node.getChildren(); for (int i = 0; i < children.size(); i++) { org.cleartk.syntax.constituent.type.TreebankNode child = (org.cleartk.syntax.constituent.type.TreebankNode) children.get(i); if (child instanceof TerminalTreebankNode) { terminals.add((TerminalTreebankNode) child); } else _initTerminalNodes(child, terminals); } }
private static void _initTerminalNodes( org.cleartk.syntax.constituent.type.TreebankNode node, List<TerminalTreebankNode> terminals) { FSArray children = node.getChildren(); for (int i = 0; i < children.size(); i++) { org.cleartk.syntax.constituent.type.TreebankNode child = (org.cleartk.syntax.constituent.type.TreebankNode) children.get(i); if (child instanceof TerminalTreebankNode) { terminals.add((TerminalTreebankNode) child); } else _initTerminalNodes(child, terminals); } }
public List<Feature> extract(JCas jCas, TreebankNode constituent) throws CleartkExtractorException { TreebankNode headNode = findHead(constituent); List<Feature> features = new ArrayList<Feature>(extractNode(jCas, headNode, false)); if (includePPHead && constituent.getNodeType().equals("PP")) { for (int i = 0; i < constituent.getChildren().size(); i++) { TreebankNode child = constituent.getChildren(i); if (child.getNodeType().equals("NP")) { features = new ArrayList<Feature>(features); features.addAll(extractNode(jCas, findHead(child), true)); break; } } } return features; }
private static String print(TreebankNode node, int tabs) { StringBuffer returnValue = new StringBuffer(); String tabString = getTabs(tabs); returnValue.append(tabString + node.getNodeType()); if (node.getNodeValue() != null) returnValue.append(":" + node.getNodeValue() + "\n"); else { returnValue.append(":" + node.getCoveredText() + "\n"); } if (node.getChildren().size() > 0) { Collection<TreebankNode> children = JCasUtil.select(node.getChildren(), TreebankNode.class); for (TreebankNode child : children) { returnValue.append(print(child, (tabs + 1))); } } return returnValue.toString(); }
public List<Feature> extract(JCas jCas, TreebankNode node) throws CleartkExtractorException { TreebankNode parent = node.getParent(); if (parent == null) return Collections.emptyList(); List<TreebankNode> children = Lists.newArrayList(JCasUtil.select( parent.getChildren(), TreebankNode.class)); int index = children.indexOf(node); int siblingIndex = index + offset; if (siblingIndex < 0 || siblingIndex >= children.size()) return Collections.emptyList(); TreebankNode sibling = children.get(siblingIndex); List<Feature> features = subExtractor.extract(jCas, sibling); for (Feature feature : features) { feature.setName(Feature.createName(name, feature.getName())); } return features; }
private void collectHeads(TreebankNode node, List<TreebankNode> heads) { if (node.getLeaf()) { heads.add(node); } String[] headTypes = VerbClauseTemporalAnnotator.headMap.get(node.getNodeType()); if (headTypes != null) { for (String headType : headTypes) { boolean foundChildWithHeadType = false; for (int i = 0; i < node.getChildren().size(); i++) { TreebankNode child = node.getChildren(i); if (child.getNodeType().equals(headType)) { String text = child.getCoveredText(); if (!VerbClauseTemporalAnnotator.stopWords.contains(text)) { this.collectHeads(child, heads); foundChildWithHeadType = true; } } } if (foundChildWithHeadType) { break; } } } }
private void collectVerbClausePairs(TreebankNode node, List<TreebankNodeLink> links) { if (this.isVerbPhrase(node)) { List<TreebankNode> sources = new ArrayList<TreebankNode>(); List<TreebankNode> targets = new ArrayList<TreebankNode>(); this.collectHeads(node, sources); // look for clauses in descendants for (int i = 0; i < node.getChildren().size(); i++) { TreebankNode child = node.getChildren(i); if (this.isClause(child)) { // pair the verb phrase heads with the clause heads targets.clear(); this.collectHeads(child, targets); for (TreebankNode source : sources) { for (TreebankNode target : targets) { // skip pairs where the head of the VP is inside the // clause if (!this.contains(child, source)) { links.add(new TreebankNodeLink(source, target)); } } } } } } // look for verb phrases in descendants for (int i = 0; i < node.getChildren().size(); i++) { TreebankNode child = node.getChildren(i); this.collectVerbClausePairs(child, links); } }
@Override public List<Feature> extract(JCas view, T focusAnnotation) { List<Feature> features = new ArrayList<Feature>(); TreebankNode node = TreebankNodeUtil.selectMatchingLeaf(view, focusAnnotation); if (node != null) { node = node.getParent(); } if (node != null) { node = node.getParent(); } if (node != null) { node = node.getChildren(0); } if (node != null && node.getLeaf()) { features.add(new Feature(this.featureName, node.getCoveredText())); } return features; } }
public TreebankNode makeTreebankNode(JCas jCas) { if (this.type.equals("S1")) { return this.children.get(0); } else { int[] span = AnnotationUtil.getAnnotationsExtent(this.children); TreebankNode node = new TreebankNode(jCas, span[0], span[1]); node.setNodeType(this.type); node.setChildren(new FSArray(jCas, this.children.size())); FSCollectionFactory.fillArrayFS(node.getChildren(), this.children); for (TreebankNode child : this.children) child.setParent(node); node.addToIndexes(); return node; } } }
@Override public List<Feature> extract(JCas view, Token token) throws CleartkExtractorException { TreebankNode node = TreebankNodeUtil.selectMatchingLeaf(view, token); List<Feature> features = new ArrayList<Feature>(); if (node != null) { TreebankNode parent = node.getParent(); if (parent != null) { features.add(new Feature("ParentNodeType", parent.getNodeType())); TreebankNode firstSibling = parent.getChildren(0); if (firstSibling != node && firstSibling.getLeaf()) { features.add(new Feature("FirstSiblingText", firstSibling.getCoveredText())); } } } return features; } }
public TreebankNode makeTreebankNode(JCas jCas) { if (this.type.equals("S1")) { return this.children.get(0); } else { int[] span = AnnotationUtil.getAnnotationsExtent(this.children); TreebankNode node = new TreebankNode(jCas, span[0], span[1]); node.setNodeType(this.type); node.setChildren(new FSArray(jCas, this.children.size())); FSCollectionFactory.fillArrayFS(node.getChildren(), this.children); for (TreebankNode child : this.children) child.setParent(node); node.addToIndexes(); return node; } } }
private void addTreebankNodeToIndexes( TreebankNode node, JCas jCas, Tree tree, List<CoreLabel> tokenAnns) { // figure out begin and end character offsets CoreMap label = (CoreMap) tree.label(); CoreMap beginToken = tokenAnns.get(label.get(BeginIndexAnnotation.class)); CoreMap endToken = tokenAnns.get(label.get(EndIndexAnnotation.class) - 1); int nodeBegin = beginToken.get(CharacterOffsetBeginAnnotation.class); int nodeEnd = endToken.get(CharacterOffsetEndAnnotation.class); // set span, node type, children (mutual recursion), and add it to the JCas node.setBegin(nodeBegin); node.setEnd(nodeEnd); node.setNodeType(tree.value()); node.setChildren(this.addTreebankNodeChildrenToIndexes(node, jCas, tokenAnns, tree)); node.setLeaf(node.getChildren().size() == 0); node.addToIndexes(); }
FSCollectionFactory.fillArrayFS(uimaNode.getChildren(), uimaChildren); uimaNode.addToIndexes(); for (TreebankNode child : uimaChildren) {