public static CsvParserSettings extractCsvParserSettings(Configuration job) { CsvParserSettings parserSettings = new CsvParserSettings(); parserSettings.getFormat().setDelimiter(job.get(DELIMITER, DELIMITER_DEFAULT).charAt(0)); parserSettings.getFormat().setComment(job.get(COMMENT, COMMENT_DEFAULT).charAt(0)); parserSettings.setLineSeparatorDetectionEnabled(true); parserSettings.setNullValue(""); parserSettings.setEmptyValue(""); parserSettings.setIgnoreLeadingWhitespaces(false); parserSettings.setIgnoreTrailingWhitespaces(false); parserSettings.setSkipEmptyLines( Boolean.valueOf(job.get(SKIP_EMPTY_LINE, CarbonCommonConstants.CARBON_SKIP_EMPTY_LINE_DEFAULT))); // todo: will verify whether there is a performance degrade using -1 here // parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT); parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_INFINITY); String maxColumns = job.get(MAX_COLUMNS, "" + DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING); parserSettings.setMaxColumns(Integer.parseInt(maxColumns)); parserSettings.getFormat().setQuote(job.get(QUOTE, QUOTE_DEFAULT).charAt(0)); parserSettings.getFormat().setQuoteEscape(job.get(ESCAPE, ESCAPE_DEFAULT).charAt(0)); // setting the content length to to limit the length of displayed contents being parsed/written // in the exception message when an error occurs. parserSettings.setErrorContentLength(CarbonCommonConstants.CARBON_ERROR_CONTENT_LENGTH); return parserSettings; }
/** * Provides a basic CSV configuration that allows writing CSV files that can be read by Microsoft Excel. * * @return a pre-configured {@link CsvWriterSettings} object with suggested settings for generating * CSV files that can be read by Microsoft Excel. */ public static CsvWriterSettings writeExcel() { CsvWriterSettings settings = new CsvWriterSettings(); settings.getFormat().setLineSeparator("\r\n"); settings.getFormat().setComment('\0'); settings.setEmptyValue(null); settings.setSkipEmptyLines(false); settings.trimValues(false); return settings; }
/** * Provides a basic CSV configuration that allows parsing CSV files produced by Microsoft Excel. * * @return a pre-configured {@link CsvParserSettings} object with suggested settings * for parsing CSV files produced by Microsoft Excel. */ public static CsvParserSettings parseExcel() { CsvParserSettings settings = new CsvParserSettings(); settings.getFormat().setLineSeparator("\r\n"); settings.getFormat().setComment('\0'); settings.setParseUnescapedQuotes(false); settings.setSkipEmptyLines(false); settings.trimValues(false); return settings; }