/** * We have encountered a colon in the input data stream, check to see if it is a URL, and if it * is, advance the cursor and return true, or return false. * * @return return true if it is a url. */ protected boolean isEmail() { int start = this.getCurrent().start; String tmp = new String (text).substring(start); Matcher matcher = emailRegex.matcher(tmp); if (matcher.find()) { int end = matcher.end(); current = start + (end-1); this.pop(this.current + 1); return true; } else { return false; } }
int start = this.getCurrent().start; String tmp = new String (text).substring(start); Matcher matcher = emailRegex2.matcher(tmp);
@Override public void process(char token) { String cword = getCurrent().getWord(); // let's see if this is a contraction. if (cword.equals("'")) { String word = getNextWord(); if (Contractions.contains(word)) { // just change the state type to text, this will end up being a // word. getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } } else if (cword.equals(".") && Character.isDigit(token)) { // This is a decimal number (probably), just keep the current state and // make it a word token getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } pop(current); push(new State(TokenizerState.IN_WORD), current); } },
@Override public void process(char token) { String cword = getCurrent().getWord(); // let's see if this is a contraction. if (cword.equals("'")) { String word = getNextWord(); if (Contractions.contains(word)) { // just change the state type to text, this will end up being a // word. getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } } else if (cword.equals(".") && Character.isDigit(token)) { // This is a decimal number (probably), just keep the current state and // make it a word token getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } pop(current); push(new State(TokenizerState.IN_WORD), current); } },
@Override public void process(char token) { String cword = getCurrent().getWord(); // let's see if this is a contraction. if (cword.equals("'")) { String word = getNextWord(); if (Contractions.contains(word)) { // just change the state type to text, this will end up being a // word. getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } } else if (cword.equals(".") && Character.isDigit(token)) { // This is a decimal number (probably), just keep the current state and // make it a word token getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } pop(current); push(new State(TokenizerState.IN_WORD), current); } },
State cs = this.getCurrent(); UrlValidation syntaxvalid = new UrlValidation(); int where;
State cs = this.getCurrent(); UrlValidation syntaxvalid = new UrlValidation(); int where;
State cs = this.getCurrent(); UrlValidation syntaxvalid = new UrlValidation(); int where;
if (getCurrent().isNumeric()) { int advance = 1; while (true) { if (getCurrent().isNumeric()) { if (current < (text.length - 1) && Character.isDigit(text[current + 1])) { State s = getCurrent(); if (!Character.isUpperCase(text[s.start])) if (getCurrent().isNumeric()) { if (getCurrent().isAbbr()) return; // previous was upper case, acronym and word else if (getCurrent().isSpecialMeaning()){ return;
if (getCurrent().isDate()) { int advance = 1; while (true) { if (getCurrent().isNumeric()) { if (current < (text.length - 1) && Character.isDigit(text[current + 1])) { State s = getCurrent(); if (!Character.isUpperCase(text[s.start])) if (getCurrent().isNumeric()) { if (getCurrent().isAbbr()) return; // previous was upper case, acronym and word
if (getCurrent().isDate()) { int advance = 1; while (true) { if (getCurrent().isNumeric()) { if (current < (text.length - 1) && Character.isDigit(text[current + 1])) { State s = getCurrent(); if (!Character.isUpperCase(text[s.start])) if (getCurrent().isNumeric()) { if (getCurrent().isAbbr()) return; // previous was upper case, acronym and word