/** * classify the character. * * @param c the character to categorize. * @return the index of the associated type. */ private int classify(char c) { if (c == '_') return TokenType.TEXT.ordinal(); if (Character.isAlphabetic(c) || Character.isDigit(c)) { return TokenType.TEXT.ordinal(); } else if (Character.isWhitespace(c)) { return TokenType.WHITESPACE.ordinal(); } else { Character.UnicodeBlock block = Character.UnicodeBlock.of(c); if (!Character.isISOControl(c) && c != KeyEvent.CHAR_UNDEFINED && block != null && block != Character.UnicodeBlock.SPECIALS) return TokenType.PUNCTUATION.ordinal(); else return TokenType.UNPRINTABLE.ordinal(); } }
/** * classify the character. * * @param c the character to categorize. * @return the index of the associated type. */ private int classify(char c) { if (c == '_') return TokenType.TEXT.ordinal(); if (Character.isAlphabetic(c) || Character.isDigit(c)) { return TokenType.TEXT.ordinal(); } else if (Character.isWhitespace(c)) { return TokenType.WHITESPACE.ordinal(); } else { Character.UnicodeBlock block = Character.UnicodeBlock.of(c); if (!Character.isISOControl(c) && c != KeyEvent.CHAR_UNDEFINED && block != null && block != Character.UnicodeBlock.SPECIALS) return TokenType.PUNCTUATION.ordinal(); else return TokenType.UNPRINTABLE.ordinal(); } }
/** * classify the character. * * @param c the character to categorize. * @return the index of the associated type. */ private int classify(char c) { if (c == '_') return TokenType.TEXT.ordinal(); if (Character.isAlphabetic(c) || Character.isDigit(c)) { return TokenType.TEXT.ordinal(); } else if (Character.isWhitespace(c)) { return TokenType.WHITESPACE.ordinal(); } else { Character.UnicodeBlock block = Character.UnicodeBlock.of(c); if (!Character.isISOControl(c) && c != KeyEvent.CHAR_UNDEFINED && block != null && block != Character.UnicodeBlock.SPECIALS) return TokenType.PUNCTUATION.ordinal(); else return TokenType.UNPRINTABLE.ordinal(); } }
/** * Get the next word, this is a lookahead operation. * * @return the next word. */ String getNextWord() { int texttype = TokenType.TEXT.ordinal(); int n = current; for (; n < this.text.length; n++) { char character = this.text[n]; int tokentype = classify(character); if (tokentype != texttype) return textstring.substring(current, n); } return textstring.substring(current, n); }
/** * Get the next word, this is a lookahead operation. * * @returns the next word. */ String getNextWord() { int texttype = TokenType.TEXT.ordinal(); int n = current; for (; n < this.text.length; n++) { char character = this.text[n]; int tokentype = classify(character); if (tokentype != texttype) return textstring.substring(current, n); } return textstring.substring(current, n); }
/** * Get the next word, this is a lookahead operation. * * @returns the next word. */ String getNextWord() { int texttype = TokenType.TEXT.ordinal(); int n = current; for (; n < this.text.length; n++) { char character = this.text[n]; int tokentype = classify(character); if (tokentype != texttype) return textstring.substring(current, n); } return textstring.substring(current, n); }
@Override public void process(char token) { String cword = getCurrent().getWord(); // let's see if this is a contraction. if (cword.equals("'")) { String word = getNextWord(); if (Contractions.contains(word)) { // just change the state type to text, this will end up being a // word. getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } } else if (cword.equals(".") && Character.isDigit(token)) { // This is a decimal number (probably), just keep the current state and // make it a word token getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } pop(current); push(new State(TokenizerState.IN_WORD), current); } },
@Override public void process(char token) { String cword = getCurrent().getWord(); // let's see if this is a contraction. if (cword.equals("'")) { String word = getNextWord(); if (Contractions.contains(word)) { // just change the state type to text, this will end up being a // word. getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } } else if (cword.equals(".") && Character.isDigit(token)) { // This is a decimal number (probably), just keep the current state and // make it a word token getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } pop(current); push(new State(TokenizerState.IN_WORD), current); } },
@Override public void process(char token) { String cword = getCurrent().getWord(); // let's see if this is a contraction. if (cword.equals("'")) { String word = getNextWord(); if (Contractions.contains(word)) { // just change the state type to text, this will end up being a // word. getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } } else if (cword.equals(".") && Character.isDigit(token)) { // This is a decimal number (probably), just keep the current state and // make it a word token getCurrent().stateindex = TokenType.TEXT.ordinal(); state = getCurrent().stateindex; return; } pop(current); push(new State(TokenizerState.IN_WORD), current); } },