/** * Creates punctuation feature for the specified punctuation at the specfied index * based on the punctuation's tag. * @param punct The punctuation which is in context. * @param i The index of the punctuation relative to the parse. * @return Punctuation feature for the specified parse and the specified punctuation at the specfied index. */ protected String punctbo(Parse punct, int i) { return String.valueOf(i) + "=" + punct.getType(); }
private boolean containsPunct(Collection<Parse> puncts, String punct) { if (puncts != null) { for (Parse p : puncts) { if (p.getType().equals(punct)) { return true; } } } return false; }
/** * Indicates whether this parse node is a pos-tag. * * @return true if this node is a pos-tag, false otherwise. */ public boolean isPosTag() { return (parts.size() == 1 && (parts.get(0)).getType().equals(AbstractBottomUpParser.TOK_NODE)); }
public Parse[] getTokenNodes() { List<Parse> tokens = new LinkedList<>(); List<Parse> nodes = new LinkedList<>(); nodes.addAll(this.parts); while (nodes.size() != 0) { Parse p = nodes.remove(0); if (p.getType().equals(AbstractBottomUpParser.TOK_NODE)) { tokens.add(p); } else { nodes.addAll(0, p.parts); } } return tokens.toArray(new Parse[tokens.size()]); }
protected String consbo(Parse p, int i) { //cons back-off StringBuilder feat = new StringBuilder(20); feat.append(i).append("*="); if (p != null) { if (useLabel && i < 0) { feat.append(p.getLabel()).append("|"); } feat.append(p.getType()); } else { feat.append(EOS); } return feat.toString(); }
/** * Produces features to determine whether the specified child node is part of * a complete constituent of the specified type and adds those features to the * specfied list. * @param child The parse node to consider. * @param i A string indicating the position of the child node. * @param type The type of constituent being built. * @param features List to add features to. */ protected void checkcons(Parse child, String i, String type, List<String> features) { StringBuilder feat = new StringBuilder(20); feat.append("c").append(i).append("=").append(child.getType()).append("|") .append(child.getHead().getCoveredText()).append("|").append(type); features.add(feat.toString()); feat.setLength(0); feat.append("c").append(i).append("*=").append(child.getType()).append("|").append(type); features.add(feat.toString()); }
protected void checkcons(Parse p1, Parse p2, String type, List<String> features) { StringBuilder feat = new StringBuilder(20); feat.append("cil=").append(type).append(",").append(p1.getType()).append("|") .append(p1.getHead().getCoveredText()).append(",").append(p2.getType()) .append("|").append(p2.getHead().getCoveredText()); features.add(feat.toString()); feat.setLength(0); feat.append("ci*l=").append(type).append(",").append(p1.getType()).append(",") .append(p2.getType()).append("|").append(p2.getHead().getCoveredText()); features.add(feat.toString()); feat.setLength(0); feat.append("cil*=").append(type).append(",").append(p1.getType()).append("|") .append(p1.getHead().getCoveredText()).append(",").append(p2.getType()); features.add(feat.toString()); feat.setLength(0); feat.append("ci*l*=").append(type).append(",").append(p1.getType()) .append(",").append(p2.getType()); features.add(feat.toString()); }
protected String cons(Parse p, int i) { StringBuilder feat = new StringBuilder(20); feat.append(i).append("="); if (p != null) { if (useLabel && i < 0) { feat.append(p.getLabel()).append("|"); } feat.append(p.getType()).append("|").append(p.getHead().getCoveredText()); } else { feat.append(EOS); } return feat.toString(); }
/** * Prune the specified sentence parse of vacuous productions. * * @param parse */ public static void pruneParse(Parse parse) { List<Parse> nodes = new LinkedList<>(); nodes.add(parse); while (nodes.size() != 0) { Parse node = nodes.remove(0); Parse[] children = node.getChildren(); if (children.length == 1 && node.getType().equals(children[0].getType())) { int index = node.getParent().parts.indexOf(node); children[0].setParent(node.getParent()); node.getParent().parts.set(index,children[0]); node.parent = null; node.parts = null; } nodes.addAll(Arrays.asList(children)); } }
/** * Returns a set of parent nodes which consist of the immediate * parent of the specified node and any of its parent which * share the same syntactic type. * @param node The node whose parents are to be returned. * @return a set of parent nodes. */ private Map<Parse, Integer> getNonAdjoinedParent(Parse node) { Map<Parse, Integer> parents = new HashMap<>(); Parse parent = node.getParent(); int index = indexOf(node,parent); parents.put(parent, index); while (parent.getType().equals(node.getType())) { node = parent; parent = parent.getParent(); index = indexOf(node,parent); parents.put(parent, index); } return parents; }
/** * Returns the right frontier of the specified parse tree with nodes ordered from deepest * to shallowest. * @param root The root of the parse tree. * @return The right frontier of the specified parse tree. */ public static List<Parse> getRightFrontier(Parse root,Set<String> punctSet) { List<Parse> rf = new LinkedList<>(); Parse top; if (AbstractBottomUpParser.TOP_NODE.equals(root.getType()) || AbstractBottomUpParser.INC_NODE.equals(root.getType())) { top = collapsePunctuation(root.getChildren(),punctSet)[0]; } else { top = root; } while (!top.isPosTag()) { rf.add(0,top); Parse[] kids = top.getChildren(); top = kids[kids.length - 1]; } return new ArrayList<>(rf); }
private void addTagEvents(List<Event> tagEvents, Parse[] chunks) { List<String> toks = new ArrayList<>(); List<String> preds = new ArrayList<>(); for (int ci = 0, cl = chunks.length; ci < cl; ci++) { Parse c = chunks[ci]; if (c.isPosTag()) { toks.add(c.getCoveredText()); preds.add(c.getType()); } else { Parse[] kids = c.getChildren(); for (int ti = 0, tl = kids.length; ti < tl; ti++) { Parse tok = kids[ti]; toks.add(tok.getCoveredText()); preds.add(tok.getType()); } } } for (int ti = 0, tl = toks.size(); ti < tl; ti++) { tagEvents.add(new Event(preds.get(ti), tagContextGenerator.getContext(ti, toks.toArray(new String[toks.size()]), preds.toArray(new String[preds.size()]), null))); } }
public POSSample read() throws IOException { Parse parse = samples.read(); if (parse != null) { List<String> sentence = new ArrayList<>(); List<String> tags = new ArrayList<>(); for (Parse tagNode : parse.getTagNodes()) { sentence.add(tagNode.getCoveredText()); tags.add(tagNode.getType()); } return new POSSample(sentence, tags); } else { return null; } } }
public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) { Parse lastChild = parts.get(parseIndex); Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(), node.getSpan().getEnd()),lastChild.getType(),1, rules.getHead(new Parse[]{lastChild,node},lastChild.getType())); adjNode.parts.add(lastChild); if (node.prevPunctSet != null) { adjNode.parts.addAll(node.prevPunctSet); } adjNode.parts.add(node); parts.set(parseIndex,adjNode); return adjNode; }
private void codeTree(Parse p,int[] levels) { Parse[] kids = p.getChildren(); StringBuilder levelsBuff = new StringBuilder(); levelsBuff.append("["); int[] nlevels = new int[levels.length + 1]; for (int li = 0; li < levels.length; li++) { nlevels[li] = levels[li]; levelsBuff.append(levels[li]).append("."); } for (int ki = 0; ki < kids.length; ki++) { nlevels[levels.length] = ki; System.out.println(levelsBuff.toString() + ki + "] " + kids[ki].getType() + " " + kids[ki].hashCode() + " -> " + kids[ki].getParent().hashCode() + " " + kids[ki].getParent().getType() + " " + kids[ki].getCoveredText()); codeTree(kids[ki],nlevels); } }
public POSSample read() throws IOException { Parse parse = samples.read(); if (parse != null) { Parse[] nodes = parse.getTagNodes(); String[] toks = new String[nodes.length]; String[] preds = new String[nodes.length]; for (int ti = 0; ti < nodes.length; ti++) { Parse tok = nodes[ti]; toks[ti] = tok.getCoveredText(); preds[ti] = tok.getType(); } return new POSSample(toks, preds); } else { return null; } } }
/** * Converts the parse from the tagger back. * * @param parseFromTagger * @return the final parse */ Parse transformParseFromTagger(Parse parseFromTagger) { int start = parseFromTagger.getSpan().getStart(); int end = parseFromTagger.getSpan().getEnd(); Parse transformedParse = new Parse(mSentence, new Span( mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(), parseFromTagger.getProb(), parseFromTagger.getHeadIndex()); Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren(); for (Parse child : parseFromTaggerChildrens) { transformedParse.insert(transformParseFromTagger(child)); } return transformedParse; } }
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }
protected AnnotationFS createAnnotation(CAS cas, int offset, Parse parse) { Parse[] parseChildren = parse.getChildren(); AnnotationFS[] parseChildAnnotations = new AnnotationFS[parseChildren.length]; // do this for all children for (int i = 0; i < parseChildren.length; i++) { parseChildAnnotations[i] = createAnnotation(cas, offset, parseChildren[i]); } AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset + parse.getSpan().getStart(), offset + parse.getSpan().getEnd()); parseAnnotation.setStringValue(mTypeFeature, parse.getType()); if (probabilityFeature != null) { parseAnnotation.setDoubleValue(probabilityFeature, parse.getProb()); } ArrayFS childrenArray = cas.createArrayFS(parseChildAnnotations.length); childrenArray.copyFromArray(parseChildAnnotations, 0, 0, parseChildAnnotations.length); parseAnnotation.setFeatureValue(childrenFeature, childrenArray); cas.getIndexRepository().addFS(parseAnnotation); return parseAnnotation; }