public TokenSequence build() { return new TokenSequence(tokens); } }
@Override public String toString() { return asString(); }
public String first() { if (isEmpty()) { return ""; } return words[0]; }
public static TokenSequence fromStartEndTaggedSequence(String spaceSeparatedWords) { Iterator<String> it = separate(spaceSeparatedWords).iterator(); List<String> list = new ArrayList<>(); while (it.hasNext()) { String s = it.next(); if (s.equalsIgnoreCase(SENTENCE_START) || s.equalsIgnoreCase(SENTENCE_END)) { continue; } list.add(s); } return new TokenSequence(list); }
public List<String> getGrams(int gramSize) { int size = size(); if (size < 2) { return Collections.emptyList(); } if (size < gramSize) { return Lists.newArrayList(asString()); } ArrayList<String> result = new ArrayList<>(size()); for (int i = 0; i < words.length - gramSize + 1; i++) { StringBuilder sb = new StringBuilder(); for (int j = 0; j < gramSize; j++) { sb.append(words[i + j]); if (j != gramSize - 1) { sb.append(" "); } } result.add(sb.toString()); } return result; }
@Override public int sourceSize(TokenSequence sourceSequence) { return sourceSequence.size(); }
public TokenSequence(String spaceSeparatedWords) { this(Lists.newArrayList(separate(spaceSeparatedWords))); }
public String last() { if (isEmpty()) { return ""; } return words[words.length - 1]; }
@Override public int sourceSize(TokenSequence sourceSequence) { return sourceSequence.asString().length(); }