private CsvParser csvParser(CsvReadOptions options) { CsvParserSettings settings = new CsvParserSettings(); settings.setFormat(csvFormat(options)); if (options.maxNumberOfColumns() != null) { settings.setMaxColumns(options.maxNumberOfColumns()); } return new CsvParser(settings); }
settings.setMaxColumns(2000000);
public static CsvParserSettings extractCsvParserSettings(Configuration job) { CsvParserSettings parserSettings = new CsvParserSettings(); parserSettings.getFormat().setDelimiter(job.get(DELIMITER, DELIMITER_DEFAULT).charAt(0)); parserSettings.getFormat().setComment(job.get(COMMENT, COMMENT_DEFAULT).charAt(0)); parserSettings.setLineSeparatorDetectionEnabled(true); parserSettings.setNullValue(""); parserSettings.setEmptyValue(""); parserSettings.setIgnoreLeadingWhitespaces(false); parserSettings.setIgnoreTrailingWhitespaces(false); parserSettings.setSkipEmptyLines( Boolean.valueOf(job.get(SKIP_EMPTY_LINE, CarbonCommonConstants.CARBON_SKIP_EMPTY_LINE_DEFAULT))); // todo: will verify whether there is a performance degrade using -1 here // parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT); parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_INFINITY); String maxColumns = job.get(MAX_COLUMNS, "" + DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING); parserSettings.setMaxColumns(Integer.parseInt(maxColumns)); parserSettings.getFormat().setQuote(job.get(QUOTE, QUOTE_DEFAULT).charAt(0)); parserSettings.getFormat().setQuoteEscape(job.get(ESCAPE, ESCAPE_DEFAULT).charAt(0)); // setting the content length to to limit the length of displayed contents being parsed/written // in the exception message when an error occurs. parserSettings.setErrorContentLength(CarbonCommonConstants.CARBON_ERROR_CONTENT_LENGTH); return parserSettings; }
private CsvParser csvParser(CsvReadOptions options) { CsvParserSettings settings = new CsvParserSettings(); settings.setFormat(csvFormat(options)); if (options.maxNumberOfColumns() != null) { settings.setMaxColumns(options.maxNumberOfColumns()); } return new CsvParser(settings); }
private boolean parseFileByLine(String file, String encode, String split) { CsvParserSettings settings = new CsvParserSettings(); settings.setMaxColumns(65535); settings.setMaxCharsPerColumn(65535); settings.getFormat().setLineSeparator(loadData.getLineTerminatedBy());
private static CsvParser getParser(CsvFileLoader.Config config, int columnCount) { CsvParserSettings settings = new CsvParserSettings(); CsvFormat format = new CsvFormat(); format.setDelimiter(config.separator); settings.setFormat(format); settings.setIgnoreTrailingWhitespaces(true); settings.setEmptyValue(""); settings.setNullValue(null); settings.setReadInputOnSeparateThread(false); settings.setMaxColumns(columnCount); return(new CsvParser(settings)); }
settings.setMaxColumns(65535); settings.setMaxCharsPerColumn(65535); settings.getFormat().setLineSeparator(loadData.getLineTerminatedBy());
settings.setReadInputOnSeparateThread(false); if (this.actualSchema != null) settings.setMaxColumns(this.actualSchema.getColumnCount()); else settings.setMaxColumns(50000); CsvParser reader = new CsvParser(settings); reader.beginParsing(file);