@Override public List<Feature> extract(JCas view, T focusAnnotation) { List<Feature> features = new ArrayList<Feature>(); TreebankNode node = TreebankNodeUtil.selectMatchingLeaf(view, focusAnnotation); if (node != null) { node = node.getParent(); } if (node != null) { node = node.getParent(); } if (node != null) { node = node.getChildren(0); } if (node != null && node.getLeaf()) { features.add(new Feature(this.featureName, node.getCoveredText())); } return features; } }
/** * Create a branch TreebankNode in a JCas. The offsets of this node will be determined by its * children. * * @param jCas * The JCas which the annotation should be added to. * @param nodeType * The phrase type tag of the node. * @param children * The TreebankNode children of the node. * @return The TreebankNode which was added to the JCas. */ public static TreebankNode newNode(JCas jCas, String nodeType, TreebankNode... children) { int begin = children[0].getBegin(); int end = children[children.length - 1].getEnd(); TreebankNode node = new TreebankNode(jCas, begin, end); node.setNodeType(nodeType); node.addToIndexes(); FSArray fsArray = new FSArray(jCas, children.length); fsArray.copyFromArray(children, 0, 0, children.length); node.setChildren(fsArray); for (TreebankNode child : children) { child.setParent(node); } return node; }
public List<Feature> extract(JCas jCas, TreebankNode node) throws UnsupportedOperationException { TreebankNode parent = node.getParent(); if (parent == null) return new ArrayList<Feature>(); StringBuffer buffer = new StringBuffer(); buffer.append(parent.getNodeType() + "->"); boolean first = true; for (TreebankNode child : JCasUtil.select(parent.getChildren(), TreebankNode.class)) { if (!first) buffer.append("-"); buffer.append(child.getNodeType()); first = false; } return Collections.singletonList(new Feature(this.featureName, buffer)); } }
public static TreebankNode getAncestorWithType(TreebankNode node, String type) { while (node != null && !node.getNodeType().equals(type)) { node = node.getParent(); } return node; }
/** * Create a leaf TreebankNode in a JCas. * * @param jCas * The JCas which the annotation should be added to. * @param begin * The begin offset of the node. * @param end * The end offset of the node. * @param nodeType * The part of speech tag of the node. * @return The TreebankNode which was added to the JCas. */ public static TreebankNode newNode(JCas jCas, int begin, int end, String nodeType) { TreebankNode node = new TreebankNode(jCas, begin, end); node.setNodeType(nodeType); node.setChildren(new FSArray(jCas, 0)); node.setLeaf(true); node.addToIndexes(); return node; }
TreebankNode findHead2(TreebankNode parentNode) { List<TreebankNode> childNodes = Lists.newArrayList(JCasUtil.select( parentNode.getChildren(), TreebankNode.class)); List<String> childTypes = new ArrayList<String>(childNodes.size()); String parentType = parentNode.getNodeType(); for (TreebankNode childNode : childNodes) childTypes.add(childNode.getNodeType()); int headIndex = findHead3(parentType, childTypes); return childNodes.get(headIndex); }
public TreebankNode makeTreebankNode(JCas jCas) { if (this.type.equals("S1")) { return this.children.get(0); } else { int[] span = AnnotationUtil.getAnnotationsExtent(this.children); TreebankNode node = new TreebankNode(jCas, span[0], span[1]); node.setNodeType(this.type); node.setChildren(new FSArray(jCas, this.children.size())); FSCollectionFactory.fillArrayFS(node.getChildren(), this.children); for (TreebankNode child : this.children) child.setParent(node); node.addToIndexes(); return node; } } }
org.cleartk.syntax.constituent.type.TreebankNode parentNode, boolean addToIndexes) { uimaNode.setNodeType(pojoNode.getType()); StringArray nodeTags = (StringArray) (FSCollectionFactory.fillArrayFS(new StringArray( jCas, pojoNode.getTags().length), pojoNode.getTags())); uimaNode.setNodeTags(nodeTags); uimaNode.setNodeValue(pojoNode.getValue()); uimaNode.setLeaf(pojoNode.isLeaf()); uimaNode.setParent(parentNode); childNode = new TerminalTreebankNode(jCas, child.getTextBegin(), child.getTextEnd()); } else { childNode = new org.cleartk.syntax.constituent.type.TreebankNode( jCas, child.getTextBegin(), childNode.addToIndexes(); 0, uimaChildren.size()); uimaNode.setChildren(uimaChildrenFSArray); return uimaNode;
private void addTreebankNodeToIndexes( TreebankNode node, JCas jCas, Tree tree, List<CoreLabel> tokenAnns) { // figure out begin and end character offsets CoreMap label = (CoreMap) tree.label(); CoreMap beginToken = tokenAnns.get(label.get(BeginIndexAnnotation.class)); CoreMap endToken = tokenAnns.get(label.get(EndIndexAnnotation.class) - 1); int nodeBegin = beginToken.get(CharacterOffsetBeginAnnotation.class); int nodeEnd = endToken.get(CharacterOffsetEndAnnotation.class); // set span, node type, children (mutual recursion), and add it to the JCas node.setBegin(nodeBegin); node.setEnd(nodeEnd); node.setNodeType(tree.value()); node.setChildren(this.addTreebankNodeChildrenToIndexes(node, jCas, tokenAnns, tree)); node.setLeaf(node.getChildren().size() == 0); node.addToIndexes(); }
private static String print(TreebankNode node, int tabs) { StringBuffer returnValue = new StringBuffer(); String tabString = getTabs(tabs); returnValue.append(tabString + node.getNodeType()); if (node.getNodeValue() != null) returnValue.append(":" + node.getNodeValue() + "\n"); else { returnValue.append(":" + node.getCoveredText() + "\n"); } if (node.getChildren().size() > 0) { Collection<TreebankNode> children = JCasUtil.select(node.getChildren(), TreebankNode.class); for (TreebankNode child : children) { returnValue.append(print(child, (tabs + 1))); } } return returnValue.toString(); }
if (leaf.getBegin() != leaf.getEnd()) { Token token = new Token(jCas, leaf.getBegin(), leaf.getEnd()); token.setPos(leaf.getNodeType()); token.addToIndexes();
private Event getOrCreateEvent(JCas jCas, Anchor anchor, TreebankNode node) { if (anchor != null && anchor instanceof Event) { return (Event) anchor; } else if (this.createEvents) { Event event = new Event(jCas, node.getBegin(), node.getEnd()); event.setId("e" + this.eventID); this.eventID++; event.addToIndexes(); return event; } else { return null; } }
private boolean contains(TreebankNode node, TreebankNode descendant) { if (node == descendant) { return true; } for (int i = 0; i < node.getChildren().size(); i++) { boolean result = this.contains(node.getChildren(i), descendant); if (result) { return true; } } return false; }
/** * Selects a single TreebankNode leaf that has the same span as the given annotation. * * @param jCas * The JCas containing the TreebankNodes. * @param annotation * The Annotation whose span should match a TreebankNode leaf. * @return The single TreebankNode leaf that matches the annotation, or null if no such annotation * exists. */ public static TreebankNode selectMatchingLeaf(JCas jCas, Annotation annotation) { TreebankNode leaf = null; for (TreebankNode node : JCasUtil.selectCovered(jCas, TreebankNode.class, annotation)) { if (node.getLeaf() && node.getBegin() == annotation.getBegin() && node.getEnd() == annotation.getEnd()) { if (leaf == null) { leaf = node; } else { throw new IllegalArgumentException(String.format( "expected one leaf matching annotation %s, found %s", annotation, Arrays.asList(leaf, node))); } } } return leaf; }
public List<Feature> extract(JCas jCas, TreebankNode node) throws CleartkExtractorException { TreebankNode parent = node.getParent(); if (parent == null) return Collections.emptyList(); List<TreebankNode> children = Lists.newArrayList(JCasUtil.select( parent.getChildren(), TreebankNode.class)); int index = children.indexOf(node); int siblingIndex = index + offset; if (siblingIndex < 0 || siblingIndex >= children.size()) return Collections.emptyList(); TreebankNode sibling = children.get(siblingIndex); List<Feature> features = subExtractor.extract(jCas, sibling); for (Feature feature : features) { feature.setName(Feature.createName(name, feature.getName())); } return features; }
public static TreebankNode getParent(TreebankNode node) { if (node != null) { node = node.getParent(); } return node; }
private FSArray addTreebankNodeChildrenToIndexes( TreebankNode parent, JCas jCas, List<CoreLabel> tokenAnns, Tree tree) { Tree[] childTrees = tree.children(); // collect all children (except leaves, which are just the words - POS tags are pre-terminals in // a Stanford tree) List<TreebankNode> childNodes = new ArrayList<TreebankNode>(); for (Tree child : childTrees) { if (!child.isLeaf()) { // set node attributes and add children (mutual recursion) TreebankNode node = new TreebankNode(jCas); node.setParent(parent); this.addTreebankNodeToIndexes(node, jCas, child, tokenAnns); childNodes.add(node); } } // convert the child list into an FSArray FSArray childNodeArray = new FSArray(jCas, childNodes.size()); for (int i = 0; i < childNodes.size(); ++i) { childNodeArray.set(i, childNodes.get(i)); } return childNodeArray; }
private boolean isVerbPhrase(TreebankNode node) { return node.getNodeType().startsWith("VP"); }
protected static String noLeavesPath(TreebankNodePath path) { if (path.getCommonAncestor() == null) { return null; } List<String> sourceTypes = new ArrayList<String>(); for (TreebankNode node : path.getSourceToAncestorPath()) { if (!node.getLeaf()) { sourceTypes.add(node.getNodeType()); } } List<String> targetTypes = new ArrayList<String>(); for (TreebankNode node : path.getTargetToAncestorPath()) { if (!node.getLeaf()) { targetTypes.add(node.getNodeType()); } } Collections.reverse(targetTypes); StringBuilder builder = new StringBuilder(); for (String type : sourceTypes) { builder.append(type).append('>'); } builder.append(path.getCommonAncestor().getNodeType()); for (String type : targetTypes) { builder.append('<').append(type); } return builder.toString(); } }
public TopTreebankNode makeParse() { int[] span = AnnotationUtil.getAnnotationsExtent(this.terminals); TopTreebankNode node = new TopTreebankNode(jCas, span[0], span[1]); node.setNodeType("TOP"); List<TreebankNode> children = parseStack.peek().children; node.setChildren(new FSArray(jCas, children.size())); FSCollectionFactory.fillArrayFS(node.getChildren(), children); for (TreebankNode child : parseStack.peek().children) child.setParent(node); node.setTerminals(new FSArray(jCas, this.terminals.size())); FSCollectionFactory.fillArrayFS(node.getTerminals(), this.terminals); node.addToIndexes(); parseStack.pop(); return node; }