private int getFirstNonWS(String s, int pos) { while (pos < s.length() && StringUtil.isWhitespace(s.charAt(pos))) pos++; return pos; }
private int getFirstWS(String s, int pos) { while (pos < s.length() && !StringUtil.isWhitespace(s.charAt(pos))) pos++; return pos; }
/** * Finds the index of the nearest space before a specified index which is not itself preceded by a space. * * @param sb The string buffer which contains the text being examined. * @param seek The index to begin searching from. * @return The index which contains the nearest space. */ private static int previousSpaceIndex(CharSequence sb, int seek) { seek--; while (seek > 0 && !StringUtil.isWhitespace(sb.charAt(seek))) { seek--; } if (seek > 0 && StringUtil.isWhitespace(sb.charAt(seek))) { while (seek > 0 && StringUtil.isWhitespace(sb.charAt(seek - 1))) seek--; return seek; } return 0; }
/** * Finds the index of the nearest space after a specified index. * * @param sb The string buffer which contains the text being examined. * @param seek The index to begin searching from. * @param lastIndex The highest index of the StringBuffer sb. * @return The index which contains the nearest space. */ private static int nextSpaceIndex(CharSequence sb, int seek, int lastIndex) { seek++; char c; while (seek < lastIndex) { c = sb.charAt(seek); if (StringUtil.isWhitespace(c)) { while (sb.length() > seek + 1 && StringUtil.isWhitespace(sb.charAt(seek + 1))) seek++; return seek; } seek++; } return lastIndex; } }
private static String extractTagName(CharSequence tagChars) throws InvalidFormatException { int fromOffset = 1; if (tagChars.length() > 1 && tagChars.charAt(1) == '/') { fromOffset = 2; } for (int ci = 1; ci < tagChars.length(); ci++) { if (tagChars.charAt(ci) == '>' || StringUtil.isWhitespace(tagChars.charAt(ci))) { return tagChars.subSequence(fromOffset, ci).toString(); } } throw new InvalidFormatException("Failed to extract tag name!"); }
if (StringUtil.isWhitespace(tagChars.charAt(i)) && !extractValue) { extractKey = true; else if (extractKey && ('=' == tagChars.charAt(i) || StringUtil.isWhitespace(tagChars.charAt(i)))) { extractKey = false;
preds.add(key + "_num"); else if (StringUtil.isWhitespace(c)) { preds.add(key + "_ws");
public Span[] tokenizePos(String d) { int tokStart = -1; List<Span> tokens = new ArrayList<>(); boolean inTok = false; //gather up potential tokens int end = d.length(); for (int i = 0; i < end; i++) { if (StringUtil.isWhitespace(d.charAt(i))) { if (inTok) { tokens.add(new Span(tokStart, i)); inTok = false; tokStart = -1; } } else { if (!inTok) { tokStart = i; inTok = true; } } } if (inTok) { tokens.add(new Span(tokStart, end)); } return tokens.toArray(new Span[tokens.size()]); } }
for (int ci = 0; ci < sl; ci++) { char c = s.charAt(ci); if (StringUtil.isWhitespace(c)) { charType = CharacterEnum.WHITESPACE;
if (position > 0 && StringUtil.isWhitespace(sb.charAt(position - 1))) collectFeats.add("sp"); if (position < lastIndex && StringUtil.isWhitespace(sb.charAt(position + 1))) collectFeats.add("sn"); collectFeats.add("eos=" + escapeChar(sb.charAt(position)));
@Test public void testNoBreakSpace() { Assert.assertTrue(StringUtil.isWhitespace(0x00A0)); Assert.assertTrue(StringUtil.isWhitespace(0x2007)); Assert.assertTrue(StringUtil.isWhitespace(0x202F)); Assert.assertTrue(StringUtil.isWhitespace((char) 0x00A0)); Assert.assertTrue(StringUtil.isWhitespace((char) 0x2007)); Assert.assertTrue(StringUtil.isWhitespace((char) 0x202F)); }
if (!StringUtil.isWhitespace(text.charAt(searchIndex))) { text.insert(searchIndex, TokenSample.DEFAULT_SEPARATOR_CHARS);
int end = s.length(); while (start < s.length() && StringUtil.isWhitespace(s.charAt(start))) start++; while (end > 0 && StringUtil.isWhitespace(s.charAt(end - 1))) end--;
/** * Return a copy of this span with leading and trailing white spaces removed. * * @param text * * @return the trimmed span or the same object if already trimmed */ public Span trim(CharSequence text) { int newStartOffset = getStart(); for (int i = getStart(); i < getEnd() && StringUtil.isWhitespace(text.charAt(i)); i++) { newStartOffset++; } int newEndOffset = getEnd(); for (int i = getEnd(); i > getStart() && StringUtil.isWhitespace(text.charAt(i - 1)); i--) { newEndOffset--; } if (newStartOffset == getStart() && newEndOffset == getEnd()) { return this; } else if (newStartOffset > newEndOffset) { return new Span(getStart(), getStart(), getType()); } else { return new Span(newStartOffset, newEndOffset, getType()); } }
private int getFirstWS(String s, int pos) { while (pos < s.length() && !StringUtil.isWhitespace(s.charAt(pos))) pos++; return pos; }
private int getFirstWS(String s, int pos) { while (pos < s.length() && !StringUtil.isWhitespace(s.charAt(pos))) pos++; return pos; }
private int getFirstWS(String s, int pos) { while (pos < s.length() && !StringUtil.isWhitespace(s.charAt(pos))) pos++; return pos; }
private int getFirstWS(String s, int pos) { while (pos < s.length() && !StringUtil.isWhitespace(s.charAt(pos))) pos++; return pos; }
private int getFirstNonWS(String s, int pos) { while (pos < s.length() && StringUtil.isWhitespace(s.charAt(pos)) && s.charAt(pos) != '\n') pos++; return pos; }
private static String extractTagName(CharSequence tagChars) throws InvalidFormatException { int fromOffset = 1; if (tagChars.length() > 1 && tagChars.charAt(1) == '/') { fromOffset = 2; } for (int ci = 1; ci < tagChars.length(); ci++) { if (tagChars.charAt(ci) == '>' || StringUtil.isWhitespace(tagChars.charAt(ci))) { return tagChars.subSequence(fromOffset, ci).toString(); } } throw new InvalidFormatException("Failed to extract tag name!"); }