/** * Convenience method to turn on all format detection features in a single method call, namely: * <ul> * <li>{@link #setDelimiterDetectionEnabled(boolean, char[])} </li> * <li>{@link #setQuoteDetectionEnabled(boolean)} </li> * <li>{@link #setLineSeparatorDetectionEnabled(boolean)} </li> * </ul> * * @param delimitersForDetection possible delimiters for detection, in order of priority. */ public final void detectFormatAutomatically(char... delimitersForDetection) { this.setDelimiterDetectionEnabled(true, delimitersForDetection); this.setQuoteDetectionEnabled(true); this.setLineSeparatorDetectionEnabled(true); }
CsvParserSettings settings = new CsvParserSettings(); settings.setLineSeparatorDetectionEnabled(true); settings.getFormat().setDelimiter('|'); CsvParser parser = new CsvParser(settings); List<String[]> allLines = parser.parseAll(YOUR_INPUT_HERE);
private Iterable<Record> getItererableCsv(String source, LogicalSource logicalSource) { CsvParserSettings settings = new CsvParserSettings(); settings.setHeaderExtractionEnabled(true); settings.setLineSeparatorDetectionEnabled(true); settings.setDelimiterDetectionEnabled(true); settings.setReadInputOnSeparateThread(true); CsvParser parser = new CsvParser(settings); return parser.iterateRecords(new StringReader(source)); }
public static CsvParserSettings extractCsvParserSettings(Configuration job) { CsvParserSettings parserSettings = new CsvParserSettings(); parserSettings.getFormat().setDelimiter(job.get(DELIMITER, DELIMITER_DEFAULT).charAt(0)); parserSettings.getFormat().setComment(job.get(COMMENT, COMMENT_DEFAULT).charAt(0)); parserSettings.setLineSeparatorDetectionEnabled(true); parserSettings.setNullValue(""); parserSettings.setEmptyValue(""); parserSettings.setIgnoreLeadingWhitespaces(false); parserSettings.setIgnoreTrailingWhitespaces(false); parserSettings.setSkipEmptyLines( Boolean.valueOf(job.get(SKIP_EMPTY_LINE, CarbonCommonConstants.CARBON_SKIP_EMPTY_LINE_DEFAULT))); // todo: will verify whether there is a performance degrade using -1 here // parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT); parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_INFINITY); String maxColumns = job.get(MAX_COLUMNS, "" + DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING); parserSettings.setMaxColumns(Integer.parseInt(maxColumns)); parserSettings.getFormat().setQuote(job.get(QUOTE, QUOTE_DEFAULT).charAt(0)); parserSettings.getFormat().setQuoteEscape(job.get(ESCAPE, ESCAPE_DEFAULT).charAt(0)); // setting the content length to to limit the length of displayed contents being parsed/written // in the exception message when an error occurs. parserSettings.setErrorContentLength(CarbonCommonConstants.CARBON_ERROR_CONTENT_LENGTH); return parserSettings; }
public CsvParserSettings csvParserSettings() { CsvParserSettings parserSettings = new CsvParserSettings(); parserSettings.setLineSeparatorDetectionEnabled(true); parserSettings.setHeaderExtractionEnabled(true); parserSettings.selectFields("label", "elapsed", "success", "timeStamp"); RowListProcessor rowProcessor = new RowListProcessor(); ConcurrentRowProcessor concurrentRowProcessor = new ConcurrentRowProcessor(rowProcessor); parserSettings.setProcessor(concurrentRowProcessor); return parserSettings; } }
public CsvParserSettings csvParserSettings() { CsvParserSettings parserSettings = new CsvParserSettings(); parserSettings.setLineSeparatorDetectionEnabled(true); parserSettings.setHeaderExtractionEnabled(false); parserSettings.selectIndexes(TIMESTAMP.getColumn(), VALUE.getColumn(), HOST_AND_METRIC.getColumn()); RowListProcessor rowProcessor = new RowListProcessor(); ConcurrentRowProcessor concurrentRowProcessor = new ConcurrentRowProcessor(rowProcessor); parserSettings.setProcessor(concurrentRowProcessor); return parserSettings; } }