/** * Represents this parse in a human readable way. */ @Override public String toString() { // TODO: Use the commented code in next bigger release, // change probably breaks backward compatibility in some // applications //StringBuffer buffer = new StringBuffer(); //show(buffer); //return buffer.toString(); return getCoveredText(); }
/** * Creates punctuation feature for the specified punctuation at the specified index * based on the punctuation mark. * @param punct The punctuation which is in context. * @param i The index of the punctuation with relative to the parse. * @return Punctuation feature for the specified parse and the specified punctuation at the specfied index. */ protected String punct(Parse punct, int i) { return String.valueOf(i) + "=" + punct.getCoveredText(); }
protected String cons(Parse p, int i) { StringBuilder feat = new StringBuilder(20); feat.append(i).append("="); if (p != null) { if (useLabel && i < 0) { feat.append(p.getLabel()).append("|"); } feat.append(p.getType()).append("|").append(p.getHead().getCoveredText()); } else { feat.append(EOS); } return feat.toString(); }
/** * Produces features to determine whether the specified child node is part of * a complete constituent of the specified type and adds those features to the * specfied list. * @param child The parse node to consider. * @param i A string indicating the position of the child node. * @param type The type of constituent being built. * @param features List to add features to. */ protected void checkcons(Parse child, String i, String type, List<String> features) { StringBuilder feat = new StringBuilder(20); feat.append("c").append(i).append("=").append(child.getType()).append("|") .append(child.getHead().getCoveredText()).append("|").append(type); features.add(feat.toString()); feat.setLength(0); feat.append("c").append(i).append("*=").append(child.getType()).append("|").append(type); features.add(feat.toString()); }
protected void checkcons(Parse p1, Parse p2, String type, List<String> features) { StringBuilder feat = new StringBuilder(20); feat.append("cil=").append(type).append(",").append(p1.getType()).append("|") .append(p1.getHead().getCoveredText()).append(",").append(p2.getType()) .append("|").append(p2.getHead().getCoveredText()); features.add(feat.toString()); feat.setLength(0); feat.append("ci*l=").append(type).append(",").append(p1.getType()).append(",") .append(p2.getType()).append("|").append(p2.getHead().getCoveredText()); features.add(feat.toString()); feat.setLength(0); feat.append("cil*=").append(type).append(",").append(p1.getType()).append("|") .append(p1.getHead().getCoveredText()).append(",").append(p2.getType()); features.add(feat.toString()); feat.setLength(0); feat.append("ci*l*=").append(type).append(",").append(p1.getType()) .append(",").append(p2.getType()); features.add(feat.toString()); }
private void addTagEvents(List<Event> tagEvents, Parse[] chunks) { List<String> toks = new ArrayList<>(); List<String> preds = new ArrayList<>(); for (int ci = 0, cl = chunks.length; ci < cl; ci++) { Parse c = chunks[ci]; if (c.isPosTag()) { toks.add(c.getCoveredText()); preds.add(c.getType()); } else { Parse[] kids = c.getChildren(); for (int ti = 0, tl = kids.length; ti < tl; ti++) { Parse tok = kids[ti]; toks.add(tok.getCoveredText()); preds.add(tok.getType()); } } } for (int ti = 0, tl = toks.size(); ti < tl; ti++) { tagEvents.add(new Event(preds.get(ti), tagContextGenerator.getContext(ti, toks.toArray(new String[toks.size()]), preds.toArray(new String[preds.size()]), null))); } }
public POSSample read() throws IOException { Parse parse = samples.read(); if (parse != null) { List<String> sentence = new ArrayList<>(); List<String> tags = new ArrayList<>(); for (Parse tagNode : parse.getTagNodes()) { sentence.add(tagNode.getCoveredText()); tags.add(tagNode.getType()); } return new POSSample(sentence, tags); } else { return null; } } }
Parse c = chunks[ci]; if (c.isPosTag()) { toks.add(c.getCoveredText()); tags.add(c.getType()); preds.add(Parser.OTHER); for (int ti = 0, tl = kids.length; ti < tl; ti++) { Parse tok = kids[ti]; toks.add(tok.getCoveredText()); tags.add(tok.getType()); if (start) {
Parse punct = pi.next(); if (node != null) { feat.append(node.getHead().getCoveredText()).append("|").append(type) .append("|").append(node.getType()).append("|").append(punct.getType()); feat.append(node.getHead().getCoveredText()).append("|").append(type) .append("|").append(node.getType());
Parse c = chunks[ci]; if (c.isPosTag()) { toks.add(c.getCoveredText()); tags.add(c.getType()); preds.add(Parser.OTHER); for (int ti = 0, tl = kids.length; ti < tl; ti++) { Parse tok = kids[ti]; toks.add(tok.getCoveredText()); tags.add(tok.getType()); if (start) {
private void codeTree(Parse p,int[] levels) { Parse[] kids = p.getChildren(); StringBuilder levelsBuff = new StringBuilder(); levelsBuff.append("["); int[] nlevels = new int[levels.length + 1]; for (int li = 0; li < levels.length; li++) { nlevels[li] = levels[li]; levelsBuff.append(levels[li]).append("."); } for (int ki = 0; ki < kids.length; ki++) { nlevels[levels.length] = ki; System.out.println(levelsBuff.toString() + ki + "] " + kids[ki].getType() + " " + kids[ki].hashCode() + " -> " + kids[ki].getParent().hashCode() + " " + kids[ki].getParent().getType() + " " + kids[ki].getCoveredText()); codeTree(kids[ki],nlevels); } }
public POSSample read() throws IOException { Parse parse = samples.read(); if (parse != null) { Parse[] nodes = parse.getTagNodes(); String[] toks = new String[nodes.length]; String[] preds = new String[nodes.length]; for (int ti = 0; ti < nodes.length; ti++) { Parse tok = nodes[ti]; toks[ti] = tok.getCoveredText(); preds[ti] = tok.getType(); } return new POSSample(toks, preds); } else { return null; } } }
/** * Advances the parse by assigning it POS tags and returns multiple tag sequences. * @param p The parse to be tagged. * @return Parses with different POS-tag sequence assignments. */ protected Parse[] advanceTags(final Parse p) { Parse[] children = p.getChildren(); String[] words = new String[children.length]; double[] probs = new double[words.length]; for (int i = 0,il = children.length; i < il; i++) { words[i] = children[i].getCoveredText(); } Sequence[] ts = tagger.topKSequences(words); Parse[] newParses = new Parse[ts.length]; for (int i = 0; i < ts.length; i++) { String[] tags = ts[i].getOutcomes().toArray(new String[words.length]); ts[i].getProbs(probs); newParses[i] = (Parse) p.clone(); //copies top level if (createDerivationString) newParses[i].getDerivation().append(i).append("."); for (int j = 0; j < words.length; j++) { Parse word = children[j]; //System.err.println("inserting tag "+tags[j]); double prob = probs[j]; newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j)); newParses[i].addProb(Math.log(prob)); } } return newParses; }
unigram[0] = p_2.getHead().getCoveredText(); u_2 = dict.contains(new StringList(unigram)); unigram[0] = p2.getHead().getCoveredText(); u2 = dict.contains(new StringList(unigram)); unigram[0] = p0.getHead().getCoveredText(); u0 = dict.contains(new StringList(unigram)); bigram[0] = p_2.getHead().getCoveredText(); bigram[1] = p_1.getHead().getCoveredText(); b_2_1 = dict.contains(new StringList(bigram)); trigram[0] = p_2.getHead().getCoveredText(); trigram[1] = p_1.getHead().getCoveredText(); trigram[2] = p0.getHead().getCoveredText(); t_2_10 = dict.contains(new StringList(trigram)); trigram[0] = p_1.getHead().getCoveredText(); trigram[1] = p0.getHead().getCoveredText(); trigram[2] = p1.getHead().getCoveredText(); t_101 = dict.contains(new StringList(trigram)); unigram[0] = p_1.getHead().getCoveredText(); u_1 = dict.contains(new StringList(unigram)); bigram[0] = p_1.getHead().getCoveredText(); bigram[1] = p0.getHead().getCoveredText(); b_10 = dict.contains(new StringList(bigram)) && u_1 && u0;
words[wi] = pwords[wi].getCoveredText(); String[] cwords = new String[chunks.length]; for (int wi = 0; wi < cwords.length; wi++) { cwords[wi] = chunks[wi].getHead().getCoveredText(); String[] window = new String[5]; int wi = 0; if (ci - 2 >= 0) window[wi++] = chunks[ci - 2].getHead().getCoveredText(); if (ci - 1 >= 0) window[wi++] = chunks[ci - 1].getHead().getCoveredText(); window[wi++] = chunks[ci].getHead().getCoveredText(); if (ci + 1 < chunks.length) window[wi++] = chunks[ci + 1].getHead().getCoveredText(); if (ci + 2 < chunks.length) window[wi++] = chunks[ci + 2].getHead().getCoveredText(); if (wi < 5) { String[] subWindow = new String[wi];
words[i] = sp.getHead().getCoveredText(); ptags[i] = sp.getType();
/** * Creates punctuation feature for the specified punctuation at the specified index * based on the punctuation mark. * @param punct The punctuation which is in context. * @param i The index of the punctuation with relative to the parse. * @return Punctuation feature for the specified parse and the specified punctuation at the specfied index. */ protected String punct(Parse punct, int i) { return String.valueOf(i) + "=" + punct.getCoveredText(); }
/** * Creates punctuation feature for the specified punctuation at the specified index * based on the punctuation mark. * @param punct The punctuation which is in context. * @param i The index of the punctuation with relative to the parse. * @return Punctuation feature for the specified parse and the specified punctuation at the specfied index. */ protected String punct(Parse punct, int i) { return String.valueOf(i) + "=" + punct.getCoveredText(); }
@Test public void testTokensAreCorrect() throws IOException { try (ObjectStream<Parse> samples = new ConstitParseSampleStream(ObjectStreamUtils.createObjectStream(getSample1()))) { Parse p = samples.read(); Parse[] tagNodes = p.getTagNodes(); String[] tokens = new String[tagNodes.length]; for (int ti = 0; ti < tagNodes.length; ti++) { tokens[ti] = tagNodes[ti].getCoveredText(); } Assert.assertArrayEquals(sample1Tokens, tokens); } } }
private static boolean containsToken(String token, Parse p) { for (Parse node : p.getTagNodes()) { if (node.getCoveredText().equals(token)) return true; } return false; }