parsii.tokenizer.Tokenizer java code examples

@Override
protected boolean isSymbolCharacter(Char ch) {
  return super.isSymbolCharacter(ch) && !ch.is('#');
}

protected Parser(Reader input, Scope scope) {
  this.scope = scope;
  tokenizer = new Tokenizer(input);
  tokenizer.setProblemCollector(errors);
}

/**
 * Consumes the current token, expecting it to be as <tt>KEYWORD</tt> with the given content
 *
 * @param keyword the expected content of the current token
 */
public void consumeExpectedKeyword(String keyword) {
  if (current().matches(Token.TokenType.KEYWORD, keyword)) {
    consume();
  } else {
    addError(current(), "Unexpected token: '%s'. Expected: '%s'", current().getSource(), keyword);
  }
}

  /**
   * Signals that the given token is expected.
   * <p>
   * If the current input is pointing at the specified token, it will be consumed. If not, an error will be added
   * to the error list and the input remains unchanged.
   *
   * @param type    the type of the expected token
   * @param trigger the trigger of the expected token
   */
  protected void expect(Token.TokenType type, String trigger) {
    if (tokenizer.current().matches(type, trigger)) {
      tokenizer.consume();
    } else {
      errors.add(ParseError.error(tokenizer.current(),
                    String.format("Unexpected token '%s'. Expected: '%s'",
                           tokenizer.current().getSource(),
                           trigger)));
    }
  }
}

if (isAtStartOfLineComment(true)) {
  skipToEndOfLine();
  return fetch();
if (isAtStartOfBlockComment(true)) {
  skipBlockComment();
  return fetch();
if (isAtStartOfNumber()) {
  return fetchNumber();
if (isAtStartOfIdentifier()) {
  return fetchId();
  return fetchString();
if (isAtBracket(false)) {
  return Token.createAndFill(Token.TokenType.SYMBOL, input.consume());
if (isAtStartOfSpecialId()) {
  return fetchSpecialId();
if (isSymbolCharacter(input.current())) {
  return fetchSymbol();
                          input.current().getStringValue())));
input.consume();
return fetch();

/**
 * Determines if the given Char is a symbol character.
 * <p>
 * By default these are all non-control characters, which don't match any other class (letter, digit, whitepsace)
 *
 * @param ch the character to check
 * @return <tt>true</tt> if the given character is a valid symbol character, <tt>false</tt> otherwise
 */
@SuppressWarnings("squid:S1067")
protected boolean isSymbolCharacter(Char ch) {
  if (ch.isEndOfInput() || ch.isDigit() || ch.isLetter() || ch.isWhitepace()) {
    return false;
  }
  char c = ch.getValue();
  if (Character.isISOControl(c)) {
    return false;
  }
  return !(isAtBracket(true)
       || isAtStartOfBlockComment(false)
       || isAtStartOfLineComment(false)
       || isAtStartOfNumber()
       || isAtStartOfIdentifier()
       || stringDelimiters.containsKey(ch.getValue()));
}

/**
 * Reads and returns a special id.
 *
 * @return the parsed special id as Token
 */
protected Token fetchSpecialId() {
  Token result = Token.create(Token.TokenType.SPECIAL_ID, input.current());
  result.addToTrigger(input.consume());
  while (isIdentifierChar(input.current())) {
    result.addToContent(input.consume());
  }
  return handleKeywords(result);
}

if (tokenizer.current().isSymbol("+") && tokenizer.next().isNumber()) {
  tokenizer.consume();
if (tokenizer.current().isNumber()) {
  double value = Double.parseDouble(tokenizer.consume().getContents());
  if (tokenizer.current().is(Token.TokenType.ID)) {
    String quantifier = tokenizer.current().getContents().intern();
    if ("n" == quantifier) {
      value /= 1000000000d;
      tokenizer.consume();
    } else if ("u" == quantifier) {
      value /= 1000000d;
      tokenizer.consume();
    } else if ("m" == quantifier) {
      value /= 1000d;
      tokenizer.consume();
    } else if ("K" == quantifier || "k" == quantifier) {
      value *= 1000d;
      tokenizer.consume();
    } else if ("M" == quantifier) {
      value *= 1000000d;
      tokenizer.consume();
    } else if ("G" == quantifier) {
      value *= 1000000000d;
      tokenizer.consume();
    } else {
      Token token = tokenizer.consume();
      errors.add(ParseError.error(token,

@Override
protected boolean isAtBracket(boolean inSymbol) {
  // Treat % as single symbol so that 10%; is not tokenized to
  // "10", "%;" but to "10", "%", ";"
  // The title of this method might be a bit misleading
  return super.isAtBracket(inSymbol) || input.current().is('%');
}

/**
 * Checks if the underlying input is looking at a end of block comment
 * <p>
 * If an end of block comment is detected, any characters indicating this are consumed by this method
 *
 * @return <tt>true</tt> if the next character(s) of the input end a block comment, <tt>false</tt> otherwise
 */
protected boolean isAtEndOfBlockComment() {
  return canConsumeThisString(blockCommentEnd, true);
}

@Override
protected boolean isIdentifierChar(Char current) {
  if (super.isIdentifierChar(current)) {
    return true;
  }
  // CSS selectors can contain "-", "." or "#" as long as it is not the last character of the token
  return (current.is('-') || current.is('.') || current.is('#')) && !input.next().isWhitepace();
}

@Override
protected boolean isAtStartOfIdentifier() {
  if (super.isAtStartOfIdentifier()) {
    return true;
  }
  // Support vendor specific and class selectors like -moz-border-radius or .test
  return (input.current().is('-') || input.current().is('.')) && input.next().isLetter();
}

@Override
protected Token fetchNumber() {
  Token token = super.fetchNumber();
  // If a number is immediately followed by % or a text like "px" - this belongs to the numeric token.
  if (input.current().is('%')) {
    token.addToContent(input.consume());
    return token;
  }
  while (input.current().isLetter()) {
    token.addToContent(input.consume());
  }
  return token;
}

/**
 * Creates a new tokenizer for the given input
 *
 * @param input the input to parse. The reader will be buffered by the implementation so that it can be effectively
 *              read character b character.
 */
public Tokenizer(Reader input) {
  this.input = new LookaheadReader(input);
  this.input.setProblemCollector(problemCollector);
  // Setup default string handling
  addStringDelimiter('"', '\\');
  addStringDelimiter('\'', '\0');
}

if (isAtStartOfLineComment(true)) {
  skipToEndOfLine();
  return fetch();
if (isAtStartOfBlockComment(true)) {
  skipBlockComment();
  return fetch();
if (isAtStartOfNumber()) {
  return fetchNumber();
if (isAtStartOfIdentifier()) {
  return fetchId();
  return fetchString();
if (isAtBracket(false)) {
  return Token.createAndFill(Token.TokenType.SYMBOL, input.consume());
if (isAtStartOfSpecialId()) {
  return fetchSpecialId();
if (isSymbolCharacter(input.current())) {
  return fetchSymbol();
                          input.current().getStringValue())));
input.consume();
return fetch();

/**
 * Determines if the given Char is a symbol character.
 * <p>
 * By default these are all non-control characters, which don't match any other class (letter, digit, whitepsace)
 *
 * @param ch the character to check
 * @return <tt>true</tt> if the given character is a valid symbol character, <tt>false</tt> otherwise
 */
@SuppressWarnings("squid:S1067")
protected boolean isSymbolCharacter(Char ch) {
  if (ch.isEndOfInput() || ch.isDigit() || ch.isLetter() || ch.isWhitepace()) {
    return false;
  }
  char c = ch.getValue();
  if (Character.isISOControl(c)) {
    return false;
  }
  return !(isAtBracket(true)
       || isAtStartOfBlockComment(false)
       || isAtStartOfLineComment(false)
       || isAtStartOfNumber()
       || isAtStartOfIdentifier()
       || stringDelimiters.containsKey(ch.getValue()));
}

  /**
   * Signals that the given token is expected.
   * <p>
   * If the current input is pointing at the specified token, it will be consumed. If not, an error will be added
   * to the error list and the input remains unchanged.
   *
   * @param type    the type of the expected token
   * @param trigger the trigger of the expected token
   */
  protected void expect(Token.TokenType type, String trigger) {
    if (tokenizer.current().matches(type, trigger)) {
      tokenizer.consume();
    } else {
      errors.add(ParseError.error(tokenizer.current(),
                    String.format("Unexpected token '%s'. Expected: '%s'",
                           tokenizer.current().getSource(),
                           trigger)));
    }
  }
}

/**
 * Reads and returns a special id.
 *
 * @return the parsed special id as Token
 */
protected Token fetchSpecialId() {
  Token result = Token.create(Token.TokenType.SPECIAL_ID, input.current());
  result.addToTrigger(input.consume());
  while (isIdentifierChar(input.current())) {
    result.addToContent(input.consume());
  }
  return handleKeywords(result);
}

if (tokenizer.current().isSymbol("+") && tokenizer.next().isNumber()) {
  tokenizer.consume();
if (tokenizer.current().isNumber()) {
  double value = Double.parseDouble(tokenizer.consume().getContents());
  if (tokenizer.current().is(Token.TokenType.ID)) {
    String quantifier = tokenizer.current().getContents().intern();
    if ("n" == quantifier) {
      value /= 1000000000d;
      tokenizer.consume();
    } else if ("u" == quantifier) {
      value /= 1000000d;
      tokenizer.consume();
    } else if ("m" == quantifier) {
      value /= 1000d;
      tokenizer.consume();
    } else if ("K" == quantifier || "k" == quantifier) {
      value *= 1000d;
      tokenizer.consume();
    } else if ("M" == quantifier) {
      value *= 1000000d;
      tokenizer.consume();
    } else if ("G" == quantifier) {
      value *= 1000000000d;
      tokenizer.consume();
    } else {
      Token token = tokenizer.consume();
      errors.add(ParseError.error(token,

/**
 * Checks if the underlying input is looking at a end of block comment
 * <p>
 * If an end of block comment is detected, any characters indicating this are consumed by this method
 *
 * @return <tt>true</tt> if the next character(s) of the input end a block comment, <tt>false</tt> otherwise
 */
protected boolean isAtEndOfBlockComment() {
  return canConsumeThisString(blockCommentEnd, true);
}

Javadoc

Turns a stream of characters ( Reader into a stream of Token, supporting lookahead.

Reads from the given input and parses it into a stream of tokens. By default all token types defined by Token are supported. Most of the features can be further tweaked by changing the default settings.

By default the tokenizer operates as follows:

Consume and ignore any whitespace characters (see Char#isWhitepace()
If the current character starts a line comment, read until the end of the line and ignore all characters consumed.
If the current character starts a block comment, read until and end of block comment is detected.
If the current character is a digit, parse a INTEGER, if a decimal separator is found, switch over to a DECIMAL (see Char#isDigit(). Also if the current character is a '-' and the next is a digit, we try to read a number.
If the current character is a letter, parse an ID (see Char#isLetter(). Once this is complete, check if the ID matches one of the supplied keywords, and convert if necessary.
If the current character is an opening or closing bracket, a SYMBOL for that single character is returned
If the current character is one of the special id starters, all valid ID characters ( #isIdentifierChar(Char) are consumed and returned as SPECIAL_ID
All other characters, especially all operators, will be read and returned as one SYMBOL. Therefore #++* will be returned as a single symbol.

Most used methods

fetchNumber
Reads and returns a number.
isAtBracket
Determines if the underlying input is looking at a bracket. By default all supplied brackets are che
isAtStartOfIdentifier
Determines if the underlying input is looking at a valid character to start an identifier By default
isIdentifierChar
Determines if the given Char is a valid identifier part. By default, letters, digits and '_' are val
isSymbolCharacter
Determines if the given Char is a symbol character. By default these are all non-control characters,
<init>
Creates a new tokenizer for the given input
addError
Adds a parse error to the internal problem collector. It is preferred to collect as much errors as p
addStringDelimiter
Adds a new string delimiter character along with the character used to escape string within it.
canConsumeThisString
Checks if the next characters, starting from the current, match the given string.
consume
current
fetch

Popular in Java

Updating database using SQL prepared statement
putExtra (Intent)
getApplicationContext (Context)
setContentView (Activity)
PrintStream (java.io)
Fake signature of an existing Java class.
Connection (java.sql)
A connection represents a link from a Java application to a database. All SQL statements and results
Collectors (java.util.stream)
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Best IntelliJ plugins

How to useTokenizer in parsii.tokenizer

Best Java code snippets using parsii.tokenizer.Tokenizer (Showing top 20 results out of 315)

How to use
Tokenizer
in
parsii.tokenizer