@Test(expected = BadTsvLineException.class) public void testTsvParserInvalidTimestamp() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); assertEquals(1, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\ttimestamp\tval_a"); ParsedLine parsed = parser.parse(line, line.length); assertEquals(-1, parsed.getTimestamp(-1)); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
Splitter.on(',').trimResults().split(columnsSpecification));
public static void registerFilters(Configuration conf) { String[] filters = conf.getStrings(THRIFT_FILTERS); Splitter splitter = Splitter.on(':'); if(filters != null) { for(String filterClass: filters) { List<String> filterPart = splitter.splitToList(filterClass); if(filterPart.size() != 2) { LOG.warn("Invalid filter specification " + filterClass + " - skipping"); } else { ParseFilter.registerFilter(filterPart.get(0), filterPart.get(1)); } } } }
Iterable<String> components = Splitter.on('/').omitEmptyStrings().split(pathname); List<String> path = new ArrayList<String>();
/** * Returns an iterator over the lines in the string. If the string ends in a newline, a final * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). */ private Iterator<String> linesIterator() { return new AbstractIterator<String>() { Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); @Override protected String computeNext() { if (lines.hasNext()) { String next = lines.next(); // skip last line if it's empty if (lines.hasNext() || !next.isEmpty()) { return next; } } return endOfData(); } }; }
/** * Returns a splitter that uses the given single-character separator. For example, * {@code Splitter.on(',').split("foo,,bar")} returns an iterable containing * {@code ["foo", "", "bar"]}. * * @param separator the character to recognize as a separator * @return a splitter, with default settings, that recognizes that separator */ public static Splitter on(char separator) { return on(CharMatcher.is(separator)); }
/** * Returns a {@code MapSplitter} which splits entries based on this splitter, and splits entries * into keys and values using the specified separator. * * @since 10.0 */ @Beta public MapSplitter withKeyValueSeparator(String separator) { return withKeyValueSeparator(on(separator)); }
/** * Returns a splitter that behaves equivalently to {@code this} splitter, but automatically * removes leading and trailing {@linkplain CharMatcher#whitespace whitespace} from each returned * substring; equivalent to {@code trimResults(CharMatcher.whitespace())}. For example, {@code * Splitter.on(',').trimResults().split(" a, b ,c ")} returns an iterable containing * {@code ["a", "b", "c"]}. * * @return a splitter with the desired configuration */ public Splitter trimResults() { return trimResults(CharMatcher.whitespace()); }
/** * Returns a splitter that behaves equivalently to {@code this} splitter, but automatically omits * empty strings from the results. For example, {@code * Splitter.on(',').omitEmptyStrings().split(",a,,,b,c,,")} returns an iterable containing only * {@code ["a", "b", "c"]}. * * <p>If either {@code trimResults} option is also specified when creating a splitter, that * splitter always trims results first before checking for emptiness. So, for example, {@code * Splitter.on(':').omitEmptyStrings().trimResults().split(": : : ")} returns an empty iterable. * * <p>Note that it is ordinarily not possible for {@link #split(CharSequence)} to return an empty * iterable, but when using this option, it can (if the input sequence consists of nothing but * separators). * * @return a splitter with the desired configuration */ public Splitter omitEmptyStrings() { return new Splitter(strategy, true, trimmer, limit); }
@Nullable private static byte[] textToNumericFormatV4(String ipString) { byte[] bytes = new byte[IPV4_PART_COUNT]; int i = 0; try { for (String octet : IPV4_SPLITTER.split(ipString)) { bytes[i++] = parseOctet(octet); } } catch (NumberFormatException ex) { return null; } return i == IPV4_PART_COUNT ? bytes : null; }
/** * Returns a splitter that considers any subsequence matching {@code pattern} to be a separator. * For example, {@code Splitter.on(Pattern.compile("\r?\n")).split(entireFile)} splits a string * into lines whether it uses DOS-style or UNIX-style line terminators. * * @param separatorPattern the pattern that determines whether a subsequence is a separator. This * pattern may not match the empty string. * @return a splitter, with default settings, that uses this pattern * @throws IllegalArgumentException if {@code separatorPattern} matches the empty string */ @GwtIncompatible // java.util.regex public static Splitter on(Pattern separatorPattern) { return on(new JdkPattern(separatorPattern)); }
public static void registerFilters(Configuration conf) { String[] filters = conf.getStrings("hbase.thrift.filters"); Splitter splitter = Splitter.on(':'); if(filters != null) { for(String filterClass: filters) { List<String> filterPart = splitter.splitToList(filterClass); if(filterPart.size() != 2) { LOG.warn("Invalid filter specification " + filterClass + " - skipping"); } else { ParseFilter.registerFilter(filterPart.get(0), filterPart.get(1)); } } } }
/** * Returns a {@code MapSplitter} which splits entries based on this splitter, and splits entries * into keys and values using the specified separator. * * @since 14.0 */ @Beta public MapSplitter withKeyValueSeparator(char separator) { return withKeyValueSeparator(on(separator)); }
/** * Returns a splitter that behaves equivalently to {@code this} splitter, but removes all leading * or trailing characters matching the given {@code * CharMatcher} from each returned substring. For example, {@code * Splitter.on(',').trimResults(CharMatcher.is('_')).split("_a ,_b_ ,c__")} returns an iterable * containing {@code ["a ", "b_ ", "c"]}. * * @param trimmer a {@link CharMatcher} that determines whether a character should be removed from * the beginning/end of a subsequence * @return a splitter with the desired configuration */ // TODO(kevinb): throw if a trimmer was already specified! public Splitter trimResults(CharMatcher trimmer) { checkNotNull(trimmer); return new Splitter(strategy, omitEmptyStrings, trimmer, limit); }
@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
Splitter.on(',').trimResults().split(columnsSpecification));
/** * Splits {@code sequence} into substrings, splits each substring into an entry, and returns an * unmodifiable map with each of the entries. For example, * {@code Splitter.on(';').trimResults().withKeyValueSeparator("=>").split("a=>b ; c=>b")} will * return a mapping from {@code "a"} to {@code "b"} and {@code "c"} to {@code "b"}. * * <p>The returned map preserves the order of the entries from {@code sequence}. * * @throws IllegalArgumentException if the specified sequence does not split into valid map * entries, or if there are duplicate keys */ public Map<String, String> split(CharSequence sequence) { Map<String, String> map = new LinkedHashMap<String, String>(); for (String entry : outerSplitter.split(sequence)) { Iterator<String> entryFields = entrySplitter.splittingIterator(entry); checkArgument(entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry); String key = entryFields.next(); checkArgument(!map.containsKey(key), "Duplicate key [%s] found.", key); checkArgument(entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry); String value = entryFields.next(); map.put(key, value); checkArgument(!entryFields.hasNext(), INVALID_ENTRY_MESSAGE, entry); } return Collections.unmodifiableMap(map); } }
/** * Returns a splitter that considers any subsequence matching a given pattern (regular expression) * to be a separator. For example, {@code Splitter.onPattern("\r?\n").split(entireFile)} splits a * string into lines whether it uses DOS-style or UNIX-style line terminators. This is equivalent * to {@code Splitter.on(Pattern.compile(pattern))}. * * @param separatorPattern the pattern that determines whether a subsequence is a separator. This * pattern may not match the empty string. * @return a splitter, with default settings, that uses this pattern * @throws IllegalArgumentException if {@code separatorPattern} matches the empty string or is a * malformed expression */ @GwtIncompatible // java.util.regex public static Splitter onPattern(String separatorPattern) { return on(Platform.compilePattern(separatorPattern)); }
/** * Returns a splitter that behaves equivalently to {@code this} splitter but stops splitting after * it reaches the limit. The limit defines the maximum number of items returned by the iterator, * or the maximum size of the list returned by {@link #splitToList}. * * <p>For example, {@code Splitter.on(',').limit(3).split("a,b,c,d")} returns an iterable * containing {@code ["a", "b", "c,d"]}. When omitting empty strings, the omitted strings do not * count. Hence, {@code Splitter.on(',').limit(3).omitEmptyStrings().split("a,,,b,,,c,d")} returns * an iterable containing {@code ["a", "b", "c,d"}. When trim is requested, all entries are * trimmed, including the last. Hence * {@code Splitter.on(',').limit(3).trimResults().split(" a , b , c , d ")} results in * {@code ["a", "b", "c , d"]}. * * @param limit the maximum number of items returned * @return a splitter with the desired configuration * @since 9.0 */ public Splitter limit(int limit) { checkArgument(limit > 0, "must be greater than zero: %s", limit); return new Splitter(strategy, omitEmptyStrings, trimmer, limit); }