public static CsvParserSettings extractCsvParserSettings(Configuration job) { CsvParserSettings parserSettings = new CsvParserSettings(); parserSettings.getFormat().setDelimiter(job.get(DELIMITER, DELIMITER_DEFAULT).charAt(0)); parserSettings.getFormat().setComment(job.get(COMMENT, COMMENT_DEFAULT).charAt(0)); parserSettings.setLineSeparatorDetectionEnabled(true); parserSettings.setNullValue(""); parserSettings.setEmptyValue(""); parserSettings.setIgnoreLeadingWhitespaces(false); parserSettings.setIgnoreTrailingWhitespaces(false); parserSettings.setSkipEmptyLines( Boolean.valueOf(job.get(SKIP_EMPTY_LINE, CarbonCommonConstants.CARBON_SKIP_EMPTY_LINE_DEFAULT))); // todo: will verify whether there is a performance degrade using -1 here // parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT); parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_INFINITY); String maxColumns = job.get(MAX_COLUMNS, "" + DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING); parserSettings.setMaxColumns(Integer.parseInt(maxColumns)); parserSettings.getFormat().setQuote(job.get(QUOTE, QUOTE_DEFAULT).charAt(0)); parserSettings.getFormat().setQuoteEscape(job.get(ESCAPE, ESCAPE_DEFAULT).charAt(0)); // setting the content length to to limit the length of displayed contents being parsed/written // in the exception message when an error occurs. parserSettings.setErrorContentLength(CarbonCommonConstants.CARBON_ERROR_CONTENT_LENGTH); return parserSettings; }
private boolean parseFileByLine(String file, String encode, String split) { CsvParserSettings settings = new CsvParserSettings(); settings.setMaxColumns(65535); settings.setMaxCharsPerColumn(65535); settings.getFormat().setLineSeparator(loadData.getLineTerminatedBy()); settings.getFormat().setDelimiter(loadData.getFieldTerminatedBy().charAt(0));
settings.setMaxCharsPerColumn(65535); settings.getFormat().setLineSeparator(loadData.getLineTerminatedBy()); settings.getFormat().setDelimiter(loadData.getFieldTerminatedBy().charAt(0));