/** * Drops the next character off the queue. */ public void advance() { if (!isEmpty()) pos++; }
public boolean matchesAny(char... seq) { if (isEmpty()) return false; for (char c: seq) { if (queue.charAt(pos) == c) return true; } return false; }
/** * Retrieves but does not remove the first character from the queue. * @return First character, or 0 if empty. */ public char peek() { return isEmpty() ? 0 : queue.charAt(pos); }
/** Test if the queue matches a word character (letter or digit). @return if matches a word character */ public boolean matchesWord() { return !isEmpty() && Character.isLetterOrDigit(queue.charAt(pos)); }
/** Tests if queue starts with a whitespace character. @return if starts with whitespace */ public boolean matchesWhitespace() { return !isEmpty() && StringUtil.isWhitespace(queue.charAt(pos)); }
/** Consumes to the first sequence provided, or to the end of the queue. Leaves the terminator on the queue. @param seq any number of terminators to consume to. <b>Case insensitive.</b> @return consumed string */ // todo: method name. not good that consumeTo cares for case, and consume to any doesn't. And the only use for this // is is a case sensitive time... public String consumeToAny(String... seq) { int start = pos; while (!isEmpty() && !matchesAny(seq)) { pos++; } return queue.substring(start, pos); }
/** Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _) http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier @return identifier */ public String consumeCssIdentifier() { int start = pos; while (!isEmpty() && (matchesWord() || matchesAny('-', '_'))) pos++; return queue.substring(start, pos); }
public String consumeToIgnoreCase(String seq) { int start = pos; String first = seq.substring(0, 1); boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if first is not cased, use index of while (!isEmpty()) { if (matches(seq)) break; if (canScan) { int skip = queue.indexOf(first, pos) - pos; if (skip == 0) // this char is the skip char, but not match, so force advance of pos pos++; else if (skip < 0) // no chance of finding, grab to end pos = queue.length(); else pos += skip; } else pos++; } return queue.substring(start, pos); }
/** Consume an attribute key off the queue (letter, digit, -, _, :") @return attribute key */ public String consumeAttributeKey() { int start = pos; while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':'))) pos++; return queue.substring(start, pos); }
/** * Consume a CSS element selector (tag name, but | instead of : for namespaces (or *| for wildcard namespace), to not conflict with :pseudo selects). * * @return tag name */ public String consumeElementSelector() { int start = pos; while (!isEmpty() && (matchesWord() || matchesAny("*|","|", "_", "-"))) pos++; return queue.substring(start, pos); }
/** * Consume an tag name off the queue (word or :, _, -) * * @return tag name */ public String consumeTagName() { int start = pos; while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-'))) pos++; return queue.substring(start, pos); }
private String consumeSubQuery() { StringBuilder sq = new StringBuilder(); while (!tq.isEmpty()) { if (tq.matches("(")) sq.append("(").append(tq.chompBalanced('(', ')')).append(")"); else if (tq.matches("[")) sq.append("[").append(tq.chompBalanced('[', ']')).append("]"); else if (tq.matchesAny(combinators)) break; else sq.append(tq.consume()); } return sq.toString(); }
if (isEmpty()) break; Character c = consume(); if (last == 0 || last != ESC) {
private void byAttribute() { TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content queue String key = cq.consumeToAny(AttributeEvals); // eq, not, start, end, contain, match, (no val) Validate.notEmpty(key); cq.consumeWhitespace(); if (cq.isEmpty()) { if (key.startsWith("^")) evals.add(new Evaluator.AttributeStarting(key.substring(1))); else evals.add(new Evaluator.Attribute(key)); } else { if (cq.matchChomp("=")) evals.add(new Evaluator.AttributeWithValue(key, cq.remainder())); else if (cq.matchChomp("!=")) evals.add(new Evaluator.AttributeWithValueNot(key, cq.remainder())); else if (cq.matchChomp("^=")) evals.add(new Evaluator.AttributeWithValueStarting(key, cq.remainder())); else if (cq.matchChomp("$=")) evals.add(new Evaluator.AttributeWithValueEnding(key, cq.remainder())); else if (cq.matchChomp("*=")) evals.add(new Evaluator.AttributeWithValueContaining(key, cq.remainder())); else if (cq.matchChomp("~=")) evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder()))); else throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, cq.remainder()); } }
/** * Parse the query * @return Evaluator */ Evaluator parse() { tq.consumeWhitespace(); if (tq.matchesAny(combinators)) { // if starts with a combinator, use root as elements evals.add(new StructuralEvaluator.Root()); combinator(tq.consume()); } else { findElements(); } while (!tq.isEmpty()) { // hierarchy and extras boolean seenWhite = tq.consumeWhitespace(); if (tq.matchesAny(combinators)) { combinator(tq.consume()); } else if (seenWhite) { combinator(' '); } else { // E.class, E#id, E[attr] etc. AND findElements(); // take next el, #. etc off queue } } if (evals.size() == 1) return evals.get(0); return new CombiningEvaluator.And(evals); }
private List<Node> parse() { while (!tq.isEmpty()) { if (tq.matches("<!--")) { parseComment(); } else if (tq.matches("<![CDATA[")) { parseCdata(); } else if (tq.matches("<?") || tq.matches("<!")) { parseXmlDecl(); } else if (tq.matches("</")) { parseEndTag(); } else if (tq.matches("<") && !isRawDataTag(stack.peek())) { parseStartTag(); } else { parseTextNode(); } } // Pop off body as it is already inside html. Iterator<Node> iterator = stack.iterator(); while (iterator.hasNext()) { if (iterator.next().nodeName().equals(bodyTag.getName())) { iterator.remove(); } } return stack; }
private List<Node> parse() { while (!tq.isEmpty()) { if (tq.matches("<!--")) { parseComment(); } else if (tq.matches("<![CDATA[")) { parseCdata(); } else if (tq.matches("<?") || tq.matches("<!")) { parseXmlDecl(); } else if (tq.matches("</")) { parseEndTag(); } else if (tq.matches("<") && !isRawDataTag(stack.peek())) { parseStartTag(); } else { parseTextNode(); } } // Pop off body as it is already inside html. Iterator<Node> iterator = stack.iterator(); while (iterator.hasNext()) { if (iterator.next().nodeName().equals(bodyTag.getName())) { iterator.remove(); } } return stack; }
private List<Node> parse() { while (!tq.isEmpty()) { if (tq.matches("<!--")) { parseComment(); } else if (tq.matches("<![CDATA[")) { parseCdata(); } else if (tq.matches("<?") || tq.matches("<!")) { parseXmlDecl(); } else if (tq.matches("</")) { parseEndTag(); } else if (tq.matches("<")) { parseStartTag(); } else { parseTextNode(); } } // Pop off body as it is already inside html. Iterator<Node> iterator = stack.iterator(); while (iterator.hasNext()) { if (iterator.next().nodeName().equals(bodyTag.getName())) { iterator.remove(); } } return stack; }
private Attribute parseAttribute() { whitespace(); String key = tq.consumeAttributeKey(); String value = ""; whitespace(); if (tq.matchChomp("=")) { whitespace(); if (tq.matchChomp(SQ)) { value = tq.chompTo(SQ); } else if (tq.matchChomp(DQ)) { value = tq.chompTo(DQ); } else { StringBuilder valueAccum = new StringBuilder(); // no ' or " to look for, so scan to end tag or space (or end of stream) while (!tq.matchesAny("<", "/>", ">") && !tq.matchesWhitespace() && !tq.isEmpty()) { valueAccum.append(tq.consume()); } value = valueAccum.toString(); } whitespace(); } if (!Strings.empty(key)) return Attribute.createFromEncoded(key, value); else { tq.consume(); // unknown char, keep popping so not get stuck return null; } }
private Attribute parseAttribute() { whitespace(); String key = tq.consumeAttributeKey(); String value = ""; whitespace(); if (tq.matchChomp("=")) { whitespace(); if (tq.matchChomp(SQ)) { value = tq.chompTo(SQ); } else if (tq.matchChomp(DQ)) { value = tq.chompTo(DQ); } else { StringBuilder valueAccum = new StringBuilder(); // no ' or " to look for, so scan to end tag or space (or end of stream) while (!tq.matchesAny("<", "/>", ">") && !tq.matchesWhitespace() && !tq.isEmpty()) { valueAccum.append(tq.consume()); } value = valueAccum.toString(); } whitespace(); } if (!Strings.empty(key)) return Attribute.createFromEncoded(key, value); else { tq.consume(); // unknown char, keep popping so not get stuck return null; } }