/** * Private constructor to prevent instantiation */ public CsvWriter(Table table, CsvWriteOptions options) { this.table = table; this.header = options.header(); this.writer = options.writer(); this.settings = new CsvWriterSettings(); // Sets the character sequence to write for the values that are null. settings.setNullValue(nullValue); settings.getFormat().setDelimiter(options.separator()); settings.getFormat().setQuote(options.quoteChar()); settings.getFormat().setQuoteEscape(options.escapeChar()); settings.getFormat().setLineSeparator(options.lineEnd()); // writes empty lines as well. settings.setSkipEmptyLines(false); }
private CsvFormat csvFormat(CsvReadOptions options) { CsvFormat format = new CsvFormat(); if (options.separator() != null) { format.setDelimiter(options.separator()); } if (options.lineEnding() != null) { format.setLineSeparator(options.lineEnding()); } return format; }
public char getSeparator() { return settings.getFormat().getDelimiter(); }
/** * {@inheritDoc} */ @Override protected void configureFormat(CsvFormat format) { super.configureFormat(format); if (quote != null) { format.setQuote(quote); } if (quoteEscape != null) { format.setQuoteEscape(quoteEscape); } if (delimiter != null) { format.setDelimiter(delimiter); } }
public static CsvParserSettings extractCsvParserSettings(Configuration job) { CsvParserSettings parserSettings = new CsvParserSettings(); parserSettings.getFormat().setDelimiter(job.get(DELIMITER, DELIMITER_DEFAULT).charAt(0)); parserSettings.getFormat().setComment(job.get(COMMENT, COMMENT_DEFAULT).charAt(0)); parserSettings.setLineSeparatorDetectionEnabled(true); parserSettings.setNullValue(""); parserSettings.setEmptyValue(""); parserSettings.setIgnoreLeadingWhitespaces(false); parserSettings.setIgnoreTrailingWhitespaces(false); parserSettings.setSkipEmptyLines( Boolean.valueOf(job.get(SKIP_EMPTY_LINE, CarbonCommonConstants.CARBON_SKIP_EMPTY_LINE_DEFAULT))); // todo: will verify whether there is a performance degrade using -1 here // parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT); parserSettings.setMaxCharsPerColumn(CarbonCommonConstants.MAX_CHARS_PER_COLUMN_INFINITY); String maxColumns = job.get(MAX_COLUMNS, "" + DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING); parserSettings.setMaxColumns(Integer.parseInt(maxColumns)); parserSettings.getFormat().setQuote(job.get(QUOTE, QUOTE_DEFAULT).charAt(0)); parserSettings.getFormat().setQuoteEscape(job.get(ESCAPE, ESCAPE_DEFAULT).charAt(0)); // setting the content length to to limit the length of displayed contents being parsed/written // in the exception message when an error occurs. parserSettings.setErrorContentLength(CarbonCommonConstants.CARBON_ERROR_CONTENT_LENGTH); return parserSettings; }
public CSVDataFrameWriter(String fieldDelimiter, String lineDelimiter) { this(); this.format.setDelimiter(fieldDelimiter.charAt(0)); this.format.setLineSeparator(lineDelimiter); }
private static CsvParser getParser(CsvFileLoader.Config config, int columnCount) { CsvParserSettings settings = new CsvParserSettings(); CsvFormat format = new CsvFormat(); format.setDelimiter(config.separator); settings.setFormat(format); settings.setIgnoreTrailingWhitespaces(true); settings.setEmptyValue(""); settings.setNullValue(null); settings.setReadInputOnSeparateThread(false); settings.setMaxColumns(columnCount); return(new CsvParser(settings)); }
/** * Allows changing the format of the input on the fly. * * @param format the new format to use. */ public final void updateFormat(CsvFormat format) { newLine = format.getNormalizedNewline(); multiDelimiter = format.getDelimiterString().toCharArray(); if (multiDelimiter.length == 1) { multiDelimiter = null; delimiter = format.getDelimiter(); delimiters = new char[]{delimiter, newLine}; } else { delimiters = new char[]{multiDelimiter[0], newLine}; } quote = format.getQuote(); quoteEscape = format.getQuoteEscape(); escapeEscape = format.getCharToEscapeQuoteEscaping(); }
/** * Provides a basic CSV configuration that allows writing CSV files that can be read by Microsoft Excel. * * @return a pre-configured {@link CsvWriterSettings} object with suggested settings for generating * CSV files that can be read by Microsoft Excel. */ public static CsvWriterSettings writeExcel() { CsvWriterSettings settings = new CsvWriterSettings(); settings.getFormat().setLineSeparator("\r\n"); settings.getFormat().setComment('\0'); settings.setEmptyValue(null); settings.setSkipEmptyLines(false); settings.trimValues(false); return settings; }
/** * Returns the default CsvFormat configured to produce CSV outputs compliant to the <a href="http://tools.ietf.org/html/rfc4180">RFC4180</a> standard. * * @return and instance of CsvFormat configured to produce CSV outputs compliant to the <a href="http://tools.ietf.org/html/rfc4180">RFC4180</a> standard. */ @Override protected CsvFormat createDefaultFormat() { return new CsvFormat(); }
/** * Builds a new {@code CsvFormatDetector} * * @param maxRowSamples the number of row samples to collect before analyzing the statistics * @param settings the configuration provided by the user with potential defaults in case the detection is unable to discover the proper column delimiter or quote character. * @param whitespaceRangeStart starting range of characters considered to be whitespace. */ CsvFormatDetector(int maxRowSamples, CsvParserSettings settings, int whitespaceRangeStart) { this.MAX_ROW_SAMPLES = maxRowSamples; this.whitespaceRangeStart = whitespaceRangeStart; allowedDelimiters = settings.getDelimitersForDetection(); if (allowedDelimiters != null && allowedDelimiters.length > 0) { suggestedDelimiter = allowedDelimiters[0]; } else { String delimiter = settings.getFormat().getDelimiterString(); suggestedDelimiter = delimiter.length() > 1 ? ',' : settings.getFormat().getDelimiter(); allowedDelimiters = new char[0]; } normalizedNewLine = settings.getFormat().getNormalizedNewline(); comment = settings.getFormat().getComment(); }
private void init(String filename) { CsvParserSettings settings = new CsvParserSettings(); settings.getFormat().setLineSeparator("\n"); CsvParser csvParser = new CsvParser(settings); csvParser.beginParsing(getReader(filename)); this.parser = csvParser; }
public char getQuoteCharacter() { return settings.getFormat().getQuote(); }
public <T> List<T> getRows(String lineSep, String delimiter, CsvRowMapper<T> rowMapper) { CsvParserSettings settings = new CsvParserSettings(); settings.getFormat().setLineSeparator(lineSep); settings.getFormat().setDelimiter(delimiter.charAt(0)); RowListProcessor rowProcessor = new RowListProcessor(); settings.setProcessor(rowProcessor); CsvParser parser = new CsvParser(settings); parser.beginParsing(targetFile); List<T> rows = new ArrayList<>(); String[] row; int rowNumber = 1; while ((row = parser.parseNext()) != null) { T mappedRow = rowMapper.mapRow(rowNumber, row); if(mappedRow != null) { rows.add(rowMapper.mapRow(rowNumber, row)); } rowNumber++; } parser.stopParsing(); return rows; }
CsvFormat format = new CsvFormat(); format.setDelimiter(this.separator); settings.setFormat(format); settings.setEmptyValue("\"\"");
this.multiDelimiter = format.getDelimiterString().toCharArray(); if (multiDelimiter.length == 1) { delimiter = multiDelimiter[0]; multiDelimiter = null; this.quoteChar = format.getQuote(); this.escapeChar = format.getQuoteEscape(); this.escapeEscape = settings.getFormat().getCharToEscapeQuoteEscaping(); this.newLine = format.getNormalizedNewline();
/** * Provides a basic CSV configuration that allows parsing CSV files produced by Microsoft Excel. * * @return a pre-configured {@link CsvParserSettings} object with suggested settings * for parsing CSV files produced by Microsoft Excel. */ public static CsvParserSettings parseExcel() { CsvParserSettings settings = new CsvParserSettings(); settings.getFormat().setLineSeparator("\r\n"); settings.getFormat().setComment('\0'); settings.setParseUnescapedQuotes(false); settings.setSkipEmptyLines(false); settings.trimValues(false); return settings; }
/** * Returns the default CsvFormat configured to handle CSV inputs compliant to the <a href="http://tools.ietf.org/html/rfc4180">RFC4180</a> standard. * * @return and instance of CsvFormat configured to handle CSV inputs compliant to the <a href="http://tools.ietf.org/html/rfc4180">RFC4180</a> standard. */ @Override protected CsvFormat createDefaultFormat() { return new CsvFormat(); }
public char getQuoteCharacter() { return settings.getFormat().getQuote(); }
/** * Returns the CSV format detected when one of the following settings is enabled: * <ul> * <li>{@link CommonParserSettings#isLineSeparatorDetectionEnabled()}</li> * <li>{@link CsvParserSettings#isDelimiterDetectionEnabled()}</li> * <li>{@link CsvParserSettings#isQuoteDetectionEnabled()}</li> * </ul> * * The detected format will be available once the parsing process is initialized (i.e. when {@link AbstractParser#beginParsing(Reader) runs}. * * @return the detected CSV format, or {@code null} if no detection has been enabled or if the parsing process has not been started yet. */ public final CsvFormat getDetectedFormat() { CsvFormat out = null; if (settings.isDelimiterDetectionEnabled()) { out = settings.getFormat().clone(); out.setDelimiter(this.delimiter); } if (settings.isQuoteDetectionEnabled()) { out = out == null ? settings.getFormat().clone() : out; out.setQuote(quote); out.setQuoteEscape(quoteEscape); } if (settings.isLineSeparatorDetectionEnabled()) { out = out == null ? settings.getFormat().clone() : out; out.setLineSeparator(input.getLineSeparator()); } return out; }