input = ParserUtils.stripQuotes(input); int lastIndex = input.lastIndexOf(' '); DateTimeZone timeZone = DateTimeZone.UTC; if (lastIndex > 0) { DateTimeZone timeZoneFromString = ParserUtils.getDateTimeZone(input.substring(lastIndex + 1)); if (timeZoneFromString != null) { timeZone = timeZoneFromString; return input -> { Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); return DateTimes.of(ParserUtils.stripQuotes(input)); }; } else if ("posix".equalsIgnoreCase(format) return input -> { Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); return numericFun.apply(Long.parseLong(ParserUtils.stripQuotes(input))); }; } else if ("ruby".equalsIgnoreCase(format)) { return input -> { Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); return numericFun.apply(Double.parseDouble(ParserUtils.stripQuotes(input))); }; } else { return input -> { Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); return formatter.parse(ParserUtils.stripQuotes(input));
@Override public void setFieldNames(Iterable<String> fieldNames) { ParserUtils.validateFields(fieldNames); this.fieldNames = Lists.newArrayList(fieldNames); }
public static void validateFields(Iterable<String> fieldNames) { Set<String> duplicates = findDuplicates(fieldNames); if (!duplicates.isEmpty()) { throw new ParseException("Duplicate column entries found : %s", duplicates.toString()); } }
@Override public void setFieldNames(final Iterable<String> fieldNames) { if (fieldNames != null) { final List<String> fieldsList = Lists.newArrayList(fieldNames); this.fieldNames = new ArrayList<>(fieldsList.size()); for (int i = 0; i < fieldsList.size(); i++) { if (Strings.isNullOrEmpty(fieldsList.get(i))) { this.fieldNames.add(ParserUtils.getDefaultColumnName(i)); } else { this.fieldNames.add(fieldsList.get(i)); } } ParserUtils.validateFields(this.fieldNames); } }
@Test public void testExtractTimeZone() { Assert.assertEquals(DateTimeZone.UTC, ParserUtils.getDateTimeZone("UTC")); Assert.assertEquals(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")), ParserUtils.getDateTimeZone("PST")); Assert.assertNull(ParserUtils.getDateTimeZone("Hello")); Assert.assertNull(ParserUtils.getDateTimeZone("AEST")); Assert.assertEquals(DateTimeZone.forTimeZone(TimeZone.getTimeZone("Australia/Hobart")), ParserUtils.getDateTimeZone("Australia/Hobart")); Assert.assertNull(ParserUtils.getDateTimeZone("")); Assert.assertNull(ParserUtils.getDateTimeZone(null)); }
@Test public void testStripQuotes() { Assert.assertEquals("hello world", ParserUtils.stripQuotes("\"hello world\"")); Assert.assertEquals("hello world", ParserUtils.stripQuotes(" \" hello world \" ")); }
@Override public Map<String, Object> parseToMap(String input) { try { final Matcher matcher = compiled.matcher(input); if (!matcher.matches()) { throw new ParseException("Incorrect Regex: %s . No match found.", pattern); } List<String> values = new ArrayList<>(); for (int i = 1; i <= matcher.groupCount(); i++) { values.add(matcher.group(i)); } if (fieldNames == null) { setFieldNames(ParserUtils.generateFieldNames(values.size())); } return Utils.zipMapPartial(fieldNames, Iterables.transform(values, valueFunction)); } catch (Exception e) { throw new ParseException(e, "Unable to parse row [%s]", input); } }
public AbstractFlatTextFormatParser( @Nullable final String listDelimiter, final boolean hasHeaderRow, final int maxSkipHeaderRows ) { this.listDelimiter = listDelimiter != null ? listDelimiter : Parsers.DEFAULT_LIST_DELIMITER; this.listSplitter = Splitter.on(this.listDelimiter); this.valueFunction = ParserUtils.getMultiValueFunction(this.listDelimiter, this.listSplitter); this.hasHeaderRow = hasHeaderRow; this.maxSkipHeaderRows = maxSkipHeaderRows; }
public static ArrayList<String> generateFieldNames(int length) { final ArrayList<String> names = new ArrayList<>(length); for (int i = 0; i < length; ++i) { names.add(getDefaultColumnName(i)); } return names; }
@Override public void setFieldNames(final Iterable<String> fieldNames) { if (fieldNames != null) { final List<String> fieldsList = Lists.newArrayList(fieldNames); this.fieldNames = new ArrayList<>(fieldsList.size()); for (int i = 0; i < fieldsList.size(); i++) { if (Strings.isNullOrEmpty(fieldsList.get(i))) { this.fieldNames.add(ParserUtils.getDefaultColumnName(i)); } else { this.fieldNames.add(fieldsList.get(i)); } } ParserUtils.validateFields(this.fieldNames); } }
@Override public Map<String, Object> parseToMap(final String input) { if (!supportSkipHeaderRows && (hasHeaderRow || maxSkipHeaderRows > 0)) { throw new UnsupportedOperationException("hasHeaderRow or maxSkipHeaderRows is not supported. " + "Please check the indexTask supports these options."); } try { List<String> values = parseLine(input); if (skippedHeaderRows < maxSkipHeaderRows) { skippedHeaderRows++; return null; } if (hasHeaderRow && !hasParsedHeader) { if (fieldNames == null) { setFieldNames(values); } hasParsedHeader = true; return null; } if (fieldNames == null) { setFieldNames(ParserUtils.generateFieldNames(values.size())); } return Utils.zipMapPartial(fieldNames, Iterables.transform(values, valueFunction)); } catch (Exception e) { throw new ParseException(e, "Unable to parse row [%s]", input); } }
public AbstractFlatTextFormatParser( @Nullable final String listDelimiter, final boolean hasHeaderRow, final int maxSkipHeaderRows ) { this.listDelimiter = listDelimiter != null ? listDelimiter : Parsers.DEFAULT_LIST_DELIMITER; this.listSplitter = Splitter.on(this.listDelimiter); this.valueFunction = ParserUtils.getMultiValueFunction(this.listDelimiter, this.listSplitter); this.hasHeaderRow = hasHeaderRow; this.maxSkipHeaderRows = maxSkipHeaderRows; }
public static ArrayList<String> generateFieldNames(int length) { final ArrayList<String> names = new ArrayList<>(length); for (int i = 0; i < length; ++i) { names.add(getDefaultColumnName(i)); } return names; }
input = ParserUtils.stripQuotes(input); int lastIndex = input.lastIndexOf(' '); DateTimeZone timeZone = DateTimeZone.UTC; if (lastIndex > 0) { DateTimeZone timeZoneFromString = ParserUtils.getDateTimeZone(input.substring(lastIndex + 1)); if (timeZoneFromString != null) { timeZone = timeZoneFromString; return input -> { Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); return DateTimes.of(ParserUtils.stripQuotes(input)); }; } else if ("posix".equalsIgnoreCase(format) return input -> { Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); return numericFun.apply(Long.parseLong(ParserUtils.stripQuotes(input))); }; } else if ("ruby".equalsIgnoreCase(format)) { return input -> { Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); return numericFun.apply(Double.parseDouble(ParserUtils.stripQuotes(input))); }; } else { return input -> { Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); return formatter.parse(ParserUtils.stripQuotes(input));
@Override public void setFieldNames(Iterable<String> fieldNames) { ParserUtils.validateFields(fieldNames); this.fieldNames = Lists.newArrayList(fieldNames); }
@Override public Map<String, Object> parseToMap(String input) { try { final Matcher matcher = compiled.matcher(input); if (!matcher.matches()) { throw new ParseException("Incorrect Regex: %s . No match found.", pattern); } List<String> values = Lists.newArrayList(); for (int i = 1; i <= matcher.groupCount(); i++) { values.add(matcher.group(i)); } if (fieldNames == null) { setFieldNames(ParserUtils.generateFieldNames(values.size())); } return Utils.zipMapPartial(fieldNames, Iterables.transform(values, valueFunction)); } catch (Exception e) { throw new ParseException(e, "Unable to parse row [%s]", input); } }
public static void validateFields(Iterable<String> fieldNames) { Set<String> duplicates = findDuplicates(fieldNames); if (!duplicates.isEmpty()) { throw new ParseException("Duplicate column entries found : %s", duplicates.toString()); } }
private void verify(List<SpatialDimensionSchema> spatialDimensions) { List<String> dimNames = getDimensionNames(); Preconditions.checkArgument( Sets.intersection(this.dimensionExclusions, Sets.newHashSet(dimNames)).isEmpty(), "dimensions and dimensions exclusions cannot overlap" ); ParserUtils.validateFields(dimNames); ParserUtils.validateFields(dimensionExclusions); List<String> spatialDimNames = Lists.transform( spatialDimensions, new Function<SpatialDimensionSchema, String>() { @Override public String apply(SpatialDimensionSchema input) { return input.getDimName(); } } ); // Don't allow duplicates between main list and deprecated spatial list ParserUtils.validateFields(Iterables.concat(dimNames, spatialDimNames)); }
@Override public Map<String, Object> parseToMap(final String input) { if (!supportSkipHeaderRows && (hasHeaderRow || maxSkipHeaderRows > 0)) { throw new UnsupportedOperationException("hasHeaderRow or maxSkipHeaderRows is not supported. " + "Please check the indexTask supports these options."); } try { List<String> values = parseLine(input); if (skippedHeaderRows < maxSkipHeaderRows) { skippedHeaderRows++; return null; } if (hasHeaderRow && !hasParsedHeader) { if (fieldNames == null) { setFieldNames(values); } hasParsedHeader = true; return null; } if (fieldNames == null) { setFieldNames(ParserUtils.generateFieldNames(values.size())); } return Utils.zipMapPartial(fieldNames, Iterables.transform(values, valueFunction)); } catch (Exception e) { throw new ParseException(e, "Unable to parse row [%s]", input); } }
@Override public void setFieldNames(Iterable<String> fieldNames) { ParserUtils.validateFields(fieldNames); this.fieldNames = Lists.newArrayList(fieldNames); }