private List<RecordField> getRecordFields() { if (this.recordFields != null) { return this.recordFields; } // Use a SortedMap keyed by index of the field so that we can get a List of field names in the correct order final SortedMap<Integer, String> sortedMap = new TreeMap<>(); for (final Map.Entry<String, Integer> entry : csvParser.getHeaderMap().entrySet()) { sortedMap.put(entry.getValue(), entry.getKey()); } final List<RecordField> fields = new ArrayList<>(); final List<String> rawFieldNames = new ArrayList<>(sortedMap.values()); for (final String rawFieldName : rawFieldNames) { final Optional<RecordField> option = schema.getField(rawFieldName); if (option.isPresent()) { fields.add(option.get()); } else { fields.add(new RecordField(rawFieldName, RecordFieldType.STRING.getDataType())); } } this.recordFields = fields; return fields; }
@Override public RecordSchema getSchema(Map<String, String> variables, final InputStream contentStream, final RecordSchema readSchema) throws SchemaNotFoundException { if (this.context == null) { throw new SchemaNotFoundException("Schema Access Strategy intended only for validation purposes and cannot obtain schema"); } try { final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader(); try (final Reader reader = new InputStreamReader(new BOMInputStream(contentStream)); final CSVParser csvParser = new CSVParser(reader, csvFormat)) { final List<RecordField> fields = new ArrayList<>(); for (final String columnName : csvParser.getHeaderMap().keySet()) { fields.add(new RecordField(columnName, RecordFieldType.STRING.getDataType(), true)); } return new SimpleRecordSchema(fields); } } catch (final Exception e) { throw new SchemaNotFoundException("Failed to read Header line from CSV", e); } }
private List<ColumnInfo> buildColumnInfoList(CSVParser parser) throws SQLException { List<String> columns = this.columns; switch (headerSource) { case FROM_TABLE: System.out.println(String.format("csv columns from database.")); break; case IN_LINE: columns = new ArrayList<String>(); for (String colName : parser.getHeaderMap().keySet()) { columns.add(colName); // iterates in column order } System.out.println(String.format("csv columns from header line. length=%s, %s", columns.size(), buildStringFromList(columns))); break; case SUPPLIED_BY_USER: System.out.println(String.format("csv columns from user. length=%s, %s", columns.size(), buildStringFromList(columns))); break; default: throw new IllegalStateException("parser has unknown column source."); } return SchemaUtil.generateColumnInfo(conn, tableName, columns, isStrict); }
int currentLine = 0; try (CSVParser parser = new CSVParser(new StringReader(content), getCSVFormat().withHeader())) { Set<String> columns = parser.getHeaderMap().keySet(); Map<String, Field> fieldMap = getFieldMap(objectType);
ourLog.debug("Header map: {}", parsed.getHeaderMap());
@Override public Map<String, Integer> getHeaderMap() { Map<String, Integer> map = parser.getHeaderMap(); if (map == null) { return Collections.emptyMap(); } else { if (map.containsKey(null)) {//Ignore columns without header map.remove(null); } return map; } }
public static List<Map<String, String>> parse(List<String> header, String content) { List<Map<String, String>> tableData = new ArrayList<>(); CSVParser csvRecords = readCsvRecords(header, content); Collection<String> headerToUse = header.isEmpty() ? csvRecords.getHeaderMap().keySet() : header; for (CSVRecord record : csvRecords) { tableData.add(createRow(headerToUse, record)); } return tableData; }
public List<Tuple> read(InputStream is) throws IOException { CSVFormat format = CSVFormat.RFC4180.withHeader().withDelimiter(delim.charAt(0)); CSVParser csvParser = new CSVParser(new InputStreamReader(is), format); List<CSVRecord> records = csvParser.getRecords(); header = csvParser.getHeaderMap().entrySet().stream() .sorted((e1, e2) -> e1.getValue().compareTo(e2.getValue())) .map(Map.Entry::getKey).toArray(String[]::new); labelCol = labelCol == -1 ? records.get(0).size() - 1 : labelCol; List<Tuple> ds = records.stream().parallel().map(this::extractValuedFeat).collect(Collectors.toList()); return ds; }
public static List<Map<String, String>> parse(List<String> header, String content) { List<Map<String, String>> tableData = new ArrayList<>(); CSVParser csvRecords = readCsvRecords(header, content); Collection<String> headerToUse = header.isEmpty() ? csvRecords.getHeaderMap().keySet() : header; for (CSVRecord record : csvRecords) { tableData.add(createRow(headerToUse, record)); } return tableData; }
/** * Output a header row to the content handler. * * @param parser * the parser * @param handler * the handler * @throws SAXException * the SAX exception */ private void handleHeaderRow(final CSVParser parser, final ContentHandler handler) throws SAXException { if (parser.getHeaderMap() == null || parser.getHeaderMap().size() == 0) { return; } final ArrayList<Entry<String, Integer>> headers = new ArrayList<>( parser.getHeaderMap().entrySet()); Collections.sort(headers, new HeaderOrderComparator()); handler.startElement(URI, TR, TR, EMPTY_ATTRIBUTES); for (final Entry<String, Integer> column : headers) { handleHeaderColumn(column, handler); } handler.endElement(URI, TR, TR); }
CSVParser parser = CSVFormat.EXCEL.withHeader().parse(in); Map<String, Integer> headerMap = parser.getHeaderMap(); int header1Index = -1; int header2Index = -1; for (Map.Entry<String, Integer> entry : headerMap.entrySet()) { String name = entry.getKey(); int index = entry.getValue(); switch (name.trim()) { case "Header1": header1Index = index; break; case "Header2": header2Index = index; break; } } for (CSVRecord record : parser) { ... mo.setHeader1(record.get(header1Index)); ... }
@Example({"ModuloCSVLineToString('data/myfile.csv','lat')","load values for 'lat' from the CSV file myfile.csv."}) public ModuloCSVLineToString(String filename, String fieldname) { this.filename = filename; CSVParser csvp = ResourceFinder.readFileCSV(filename); int column = csvp.getHeaderMap().get(fieldname); for (CSVRecord strings : csvp) { lines.add(strings.get(column)); } }
@Example({"ModuloCSVLineToString('data/myfile.csv','lat')","load values for 'lat' from the CSV file myfile.csv."}) public ModuloCSVLineToString(String filename, String fieldname) { this.filename = filename; CSVParser csvp = ResourceFinder.readFileCSV(filename); int column = csvp.getHeaderMap().get(fieldname); for (CSVRecord strings : csvp) { lines.add(strings.get(column)); } }
@Override public List<CSVReportEntry> apply(final GetCredentialReportResult report) { Assert.state(Textcsv.toString().equals(report.getReportFormat()), "unknown credential report format: " + report.getReportFormat()); try (final Reader r = new BufferedReader(new InputStreamReader(new ByteBufferBackedInputStream(report.getContent())))) { final CSVParser parser = new CSVParser(r, CSV_FORMAT); final Map<String, Integer> headers = parser.getHeaderMap(); Assert.state(headers.containsKey("user"), "Header 'user' not found in CSV"); Assert.state(headers.containsKey("arn"), "Header 'arn' not found in CSV"); Assert.state(headers.containsKey("password_enabled"), "Header 'password_enabled' not found in CSV"); Assert.state(headers.containsKey("mfa_active"), "Header 'mfa_active' not found in CSV"); Assert.state(headers.containsKey("access_key_1_active"), "Header 'access_key_1_active' not found in CSV"); Assert.state(headers.containsKey("access_key_2_active"), "Header 'access_key_2_active' not found in CSV"); return stream(parser.spliterator(), false).map(this::toCSVReportEntry).filter(Objects::nonNull).collect(toList()); } catch (final IOException e) { throw new RuntimeException("Could not read csv report", e); } }
private List<ColumnInfo> buildColumnInfoList(CSVParser parser) throws SQLException { List<String> columns = this.columns; switch (headerSource) { case FROM_TABLE: System.out.println(String.format("csv columns from database.")); break; case IN_LINE: columns = new ArrayList<String>(); for (String colName : parser.getHeaderMap().keySet()) { columns.add(colName); // iterates in column order } System.out.println(String.format("csv columns from header line. length=%s, %s", columns.size(), buildStringFromList(columns))); break; case SUPPLIED_BY_USER: System.out.println(String.format("csv columns from user. length=%s, %s", columns.size(), buildStringFromList(columns))); break; default: throw new IllegalStateException("parser has unknown column source."); } return SchemaUtil.generateColumnInfo(conn, tableName, columns, isStrict); }
private List<ColumnInfo> buildColumnInfoList(CSVParser parser) throws SQLException { List<String> columns = this.columns; switch (headerSource) { case FROM_TABLE: System.out.println(String.format("csv columns from database.")); break; case IN_LINE: columns = new ArrayList<String>(); for (String colName : parser.getHeaderMap().keySet()) { columns.add(colName); // iterates in column order } System.out.println(String.format("csv columns from header line. length=%s, %s", columns.size(), buildStringFromList(columns))); break; case SUPPLIED_BY_USER: System.out.println(String.format("csv columns from user. length=%s, %s", columns.size(), buildStringFromList(columns))); break; default: throw new IllegalStateException("parser has unknown column source."); } return SchemaUtil.generateColumnInfo(conn, tableName, columns, isStrict); }
@Override protected Collection<SolrInputDocument> load(@Nonnull final URL url) { this.dataMap = new HashMap<>(); try (BufferedReader in = new BufferedReader( new InputStreamReader(getInputStream(url), StandardCharsets.UTF_8))) { final CSVFormat parser = setupCSVParser(); // Process each csv record row. final CSVParser parsed = parser.parse(in); this.header = parsed.getHeaderMap().entrySet() .stream() .collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey)); for (final CSVRecord row : parsed) { processDataRow(row); } this.dataMap.put(VERSION_FIELD_NAME, getVersionDoc(url)); } catch (final IOException e) { this.logger.error("Failed to load vocabulary source: {}", e.getMessage()); } return this.dataMap.values(); }
@Override public List<Attribute> getTimedValueAttributes(String datasourceIdString) throws Exception { List<Attribute> attributes = new ArrayList<>(); for (SubjectRecipe subjectRecipe : subjectRecipes) { String headerRowUrl = getDataUrl(datasourceIdString, subjectRecipe.getSubjectType()) + "&recordlimit=0"; File headerRowStream = downloadUtils.fetchFile(new URL(headerRowUrl), getProvider().getLabel(), ".csv"); CSVParser csvParser = new CSVParser(new FileReader(headerRowStream), CSVFormat.RFC4180.withFirstRecordAsHeader()); for (String header : csvParser.getHeaderMap().keySet()) { if (!BLACK_LIST_HEADERS.contains(header)) { String attributeLabel = attributeLabelFromHeader(header); attributes.add(new Attribute(getProvider(), attributeLabel, header)); System.out.print(attributeLabel); } } } return attributes; }
/** * Convert the CSV string to an immutable table. */ protected static ImmutableTable<Long, String, Double> fromCsv(final String csv) throws IOException { ImmutableTable.Builder<Long, String, Double> builder = ImmutableTable.builder(); try (StringReader reader = new StringReader(csv); CSVParser parser = new CSVParser(reader, CSVFormat.RFC4180.withHeader());) { long rowIndex = 0; Map<String, Integer> headerMap = parser.getHeaderMap(); for (CSVRecord record : parser) { for (String key : headerMap.keySet()) { Double value; try { value = Double.valueOf(record.get(key)); } catch (NumberFormatException e) { value = Double.NaN; } builder.put(rowIndex, key, value); } rowIndex++; } } return builder.build(); }
@Override public void loadData(List<Tuple<String, File>> files, Importer importer) throws InvalidFileException, IOException { for (Tuple<String, File> file : files) { CSVParser parser = format.parse(new FileReader(file.getRight())); String filename = file.getLeft(); //remove well-known extensions if (filename.endsWith(".csv") || filename.endsWith(".tsv") || filename.endsWith(".txt")) { filename = filename.substring(0, filename.length() - 4); } importer.startCollection(filename); parser.getHeaderMap().forEach((name, column) -> importer.registerPropertyName(column, name)); parser.forEach(row -> { importer.startEntity(); for (int i = 0; i < row.size(); i++) { importer.setValue(i, row.get(i)); } importer.finishEntity(); }); importer.finishCollection(); } } }