private boolean isVerbPhrase(TreebankNode node) { return node.getNodeType().startsWith("VP"); }
private boolean isClause(TreebankNode node) { return node.getNodeType().startsWith("S"); }
private String toTagString(List<TreebankNode> nodes, String join) { StringBuilder builder = new StringBuilder(); for (TreebankNode node : nodes) { if (builder.length() > 0) { builder.append(join); } builder.append(node.getNodeType()); } return builder.toString(); } }
private static String pathString(List<TreebankNode> nodes) { // strip the first node from the list nodes = nodes.subList(Math.min(1, nodes.size()), nodes.size()); // join the types with underscores List<String> types = new ArrayList<String>(); for (TreebankNode node : nodes) { types.add(node.getNodeType()); } return Joiner.on('_').join(types); }
TreebankNode findHead2(TreebankNode parentNode) { List<TreebankNode> childNodes = Lists.newArrayList(JCasUtil.select( parentNode.getChildren(), TreebankNode.class)); List<String> childTypes = new ArrayList<String>(childNodes.size()); String parentType = parentNode.getNodeType(); for (TreebankNode childNode : childNodes) childTypes.add(childNode.getNodeType()); int headIndex = findHead3(parentType, childTypes); return childNodes.get(headIndex); }
public static TreebankNode getAncestorWithType(TreebankNode node, String type) { while (node != null && !node.getNodeType().equals(type)) { node = node.getParent(); } return node; }
public List<Feature> extract(JCas jCas, TreebankNode node) throws UnsupportedOperationException { TreebankNode parent = node.getParent(); if (parent == null) return new ArrayList<Feature>(); StringBuffer buffer = new StringBuffer(); buffer.append(parent.getNodeType() + "->"); boolean first = true; for (TreebankNode child : JCasUtil.select(parent.getChildren(), TreebankNode.class)) { if (!first) buffer.append("-"); buffer.append(child.getNodeType()); first = false; } return Collections.singletonList(new Feature(this.featureName, buffer)); } }
/** * Format the TreebankNode as a Penn-Treebank-style parenthesized string. * * @param node * The TreebankNode to be formatted. * @return A parenthesized Penn-Treebank-style string. */ public static String toTreebankString(TreebankNode node) { StringBuilder builder = new StringBuilder(); builder.append('(').append(node.getNodeType()); if (node.getLeaf()) { builder.append(' ').append(node.getCoveredText()); } else { for (TreebankNode child : JCasUtil.select(node.getChildren(), TreebankNode.class)) { builder.append(' ').append(toTreebankString(child)); } } builder.append(')'); return builder.toString(); }
protected static String noLeavesPath(TreebankNodePath path) { if (path.getCommonAncestor() == null) { return null; } List<String> sourceTypes = new ArrayList<String>(); for (TreebankNode node : path.getSourceToAncestorPath()) { if (!node.getLeaf()) { sourceTypes.add(node.getNodeType()); } } List<String> targetTypes = new ArrayList<String>(); for (TreebankNode node : path.getTargetToAncestorPath()) { if (!node.getLeaf()) { targetTypes.add(node.getNodeType()); } } Collections.reverse(targetTypes); StringBuilder builder = new StringBuilder(); for (String type : sourceTypes) { builder.append(type).append('>'); } builder.append(path.getCommonAncestor().getNodeType()); for (String type : targetTypes) { builder.append('<').append(type); } return builder.toString(); } }
public List<Feature> extract(JCas jCas, TreebankNode constituent) throws CleartkExtractorException { TreebankNode headNode = findHead(constituent); List<Feature> features = new ArrayList<Feature>(extractNode(jCas, headNode, false)); if (includePPHead && constituent.getNodeType().equals("PP")) { for (int i = 0; i < constituent.getChildren().size(); i++) { TreebankNode child = constituent.getChildren(i); if (child.getNodeType().equals("NP")) { features = new ArrayList<Feature>(features); features.addAll(extractNode(jCas, findHead(child), true)); break; } } } return features; }
private void collectHeads(TreebankNode node, List<TreebankNode> heads) { if (node.getLeaf()) { heads.add(node); } String[] headTypes = VerbClauseTemporalAnnotator.headMap.get(node.getNodeType()); if (headTypes != null) { for (String headType : headTypes) { boolean foundChildWithHeadType = false; for (int i = 0; i < node.getChildren().size(); i++) { TreebankNode child = node.getChildren(i); if (child.getNodeType().equals(headType)) { String text = child.getCoveredText(); if (!VerbClauseTemporalAnnotator.stopWords.contains(text)) { this.collectHeads(child, heads); foundChildWithHeadType = true; } } } if (foundChildWithHeadType) { break; } } } }
private static String print(TreebankNode node, int tabs) { StringBuffer returnValue = new StringBuffer(); String tabString = getTabs(tabs); returnValue.append(tabString + node.getNodeType()); if (node.getNodeValue() != null) returnValue.append(":" + node.getNodeValue() + "\n"); else { returnValue.append(":" + node.getCoveredText() + "\n"); } if (node.getChildren().size() > 0) { Collection<TreebankNode> children = JCasUtil.select(node.getChildren(), TreebankNode.class); for (TreebankNode child : children) { returnValue.append(print(child, (tabs + 1))); } } return returnValue.toString(); }
@Override public List<Feature> extract(JCas view, Token token) throws CleartkExtractorException { TreebankNode node = TreebankNodeUtil.selectMatchingLeaf(view, token); List<Feature> features = new ArrayList<Feature>(); if (node != null) { TreebankNode parent = node.getParent(); if (parent != null) { features.add(new Feature("ParentNodeType", parent.getNodeType())); TreebankNode firstSibling = parent.getChildren(0); if (firstSibling != node && firstSibling.getLeaf()) { features.add(new Feature("FirstSiblingText", firstSibling.getCoveredText())); } } } return features; } }
public List<Feature> extract(JCas jCas, T source, U target) { List<Feature> features = new ArrayList<Feature>(); TreebankNode sourceNode = TreebankNodeUtil.selectMatchingLeaf(jCas, source); TreebankNode targetNode = TreebankNodeUtil.selectMatchingLeaf(jCas, target); if (sourceNode != null && targetNode != null) { TreebankNodePath path = TreebankNodeUtil.getPath(sourceNode, targetNode); TreebankNode ancestor = path.getCommonAncestor(); features.add(new Feature("CommonAncestor", ancestor == null ? null : ancestor.getNodeType())); features.add(new Feature("SourceToAncestor", pathString(path.getSourceToAncestorPath()))); features.add(new Feature("TargetToAncestor", pathString(path.getTargetToAncestorPath()))); } return features; }
if (leaf.getBegin() != leaf.getEnd()) { Token token = new Token(jCas, leaf.getBegin(), leaf.getEnd()); token.setPos(leaf.getNodeType()); token.addToIndexes();
if (leaf.getBegin() != leaf.getEnd()) { Token token = new Token(jCas, leaf.getBegin(), leaf.getEnd()); token.setPos(leaf.getNodeType()); token.addToIndexes();