/** * Format the TreebankNode as a Penn-Treebank-style parenthesized string. * * @param node * The TreebankNode to be formatted. * @return A parenthesized Penn-Treebank-style string. */ public static String toTreebankString(TreebankNode node) { StringBuilder builder = new StringBuilder(); builder.append('(').append(node.getNodeType()); if (node.getLeaf()) { builder.append(' ').append(node.getCoveredText()); } else { for (TreebankNode child : JCasUtil.select(node.getChildren(), TreebankNode.class)) { builder.append(' ').append(toTreebankString(child)); } } builder.append(')'); return builder.toString(); }
protected static String noLeavesPath(TreebankNodePath path) { if (path.getCommonAncestor() == null) { return null; } List<String> sourceTypes = new ArrayList<String>(); for (TreebankNode node : path.getSourceToAncestorPath()) { if (!node.getLeaf()) { sourceTypes.add(node.getNodeType()); } } List<String> targetTypes = new ArrayList<String>(); for (TreebankNode node : path.getTargetToAncestorPath()) { if (!node.getLeaf()) { targetTypes.add(node.getNodeType()); } } Collections.reverse(targetTypes); StringBuilder builder = new StringBuilder(); for (String type : sourceTypes) { builder.append(type).append('>'); } builder.append(path.getCommonAncestor().getNodeType()); for (String type : targetTypes) { builder.append('<').append(type); } return builder.toString(); } }
/** * Selects a single TreebankNode leaf that has the same span as the given annotation. * * @param jCas * The JCas containing the TreebankNodes. * @param annotation * The Annotation whose span should match a TreebankNode leaf. * @return The single TreebankNode leaf that matches the annotation, or null if no such annotation * exists. */ public static TreebankNode selectMatchingLeaf(JCas jCas, Annotation annotation) { TreebankNode leaf = null; for (TreebankNode node : JCasUtil.selectCovered(jCas, TreebankNode.class, annotation)) { if (node.getLeaf() && node.getBegin() == annotation.getBegin() && node.getEnd() == annotation.getEnd()) { if (leaf == null) { leaf = node; } else { throw new IllegalArgumentException(String.format( "expected one leaf matching annotation %s, found %s", annotation, Arrays.asList(leaf, node))); } } } return leaf; }
private void collectHeads(TreebankNode node, List<TreebankNode> heads) { if (node.getLeaf()) { heads.add(node); } String[] headTypes = VerbClauseTemporalAnnotator.headMap.get(node.getNodeType()); if (headTypes != null) { for (String headType : headTypes) { boolean foundChildWithHeadType = false; for (int i = 0; i < node.getChildren().size(); i++) { TreebankNode child = node.getChildren(i); if (child.getNodeType().equals(headType)) { String text = child.getCoveredText(); if (!VerbClauseTemporalAnnotator.stopWords.contains(text)) { this.collectHeads(child, heads); foundChildWithHeadType = true; } } } if (foundChildWithHeadType) { break; } } } }
@Override public List<Feature> extract(JCas view, T focusAnnotation) { List<Feature> features = new ArrayList<Feature>(); TreebankNode node = TreebankNodeUtil.selectMatchingLeaf(view, focusAnnotation); if (node != null) { node = node.getParent(); } if (node != null) { node = node.getParent(); } if (node != null) { node = node.getChildren(0); } if (node != null && node.getLeaf()) { features.add(new Feature(this.featureName, node.getCoveredText())); } return features; } }
@Override public List<Feature> extract(JCas view, Token token) throws CleartkExtractorException { TreebankNode node = TreebankNodeUtil.selectMatchingLeaf(view, token); List<Feature> features = new ArrayList<Feature>(); if (node != null) { TreebankNode parent = node.getParent(); if (parent != null) { features.add(new Feature("ParentNodeType", parent.getNodeType())); TreebankNode firstSibling = parent.getChildren(0); if (firstSibling != node && firstSibling.getLeaf()) { features.add(new Feature("FirstSiblingText", firstSibling.getCoveredText())); } } } return features; } }