private static void assertSchemaRelativeToParentDirWithCharset(Function2<Schema, File, DataFrame> parseLogic) throws IOException { StringReader schemaReader = new StringReader(SCHEMA_WITH_CHARSET_NAME); Schema schema = Schema.parseJson(schemaReader); // some trickery to find physical location of resources folder... File resourceFolder = new File(ParserTest.class.getResource("/iso-8859-1.txt").getPath()).getParentFile(); DataFrame dataFrame = parseLogic.apply(schema, resourceFolder); StringColumnId columnId = dataFrame.getColumnId(0, ColumnType.STRING); assertEquals("°", dataFrame.getValueAt(0, columnId)); }
assertEquals(NAME, df.getColumnId(0, ColumnType.STRING)); assertEquals(nameColumn, df.getColumn(NAME)); assertEquals(Array.of("Ada", "Homer", "Hillary"), nameColumn.getValues()); assertEquals(AGE, df.getColumnId(1, ColumnType.INT)); assertEquals(ageColumn, df.getColumn(AGE)); assertArrayEquals(new int[]{42, 99, 67}, ageColumn.valueStream().toArray()); assertEquals(HEIGHT, df.getColumnId(2, ColumnType.DOUBLE)); assertEquals(heightColumn, df.getColumn(HEIGHT)); assertArrayEquals(new double[]{1.74, 1.20, 1.70}, heightColumn.valueStream().toArray(), 0.01); assertEquals(VEGETARIAN, df.getColumnId(3, ColumnType.BOOLEAN)); assertEquals(vegetarianColumn, df.getColumn(VEGETARIAN)); assertEquals(Array.of(true, false, false), vegetarianColumn.valueStream().toArray()); assertEquals(DATE_OF_BIRTH, df.getColumnId(4, ColumnType.TIMESTAMP)); assertEquals(dateOfBirthColumn, df.getColumn(DATE_OF_BIRTH)); assertEquals(Array.of(AUG_26_1975, JAN_08_2006, OCT_26_1947), dateOfBirthColumn.getValues()); assertEquals(GENDER, df.getColumnId(5, ColumnType.CATEGORY)); assertEquals(genderColumn, df.getColumn(GENDER)); assertEquals(HashSet.of("Female", "Male"), genderColumn.getCategories());
private static void assertSchemaWithCharset(Function<Schema, DataFrame> parseLogic) throws IOException { StringReader schemaReader = new StringReader(SCHEMA_WITH_CHARSET_NAME); Schema schema = Schema.parseJson(schemaReader); DataFrame dataFrame = parseLogic.apply(schema); StringColumnId columnId = dataFrame.getColumnId(0, ColumnType.STRING); assertEquals("°", dataFrame.getValueAt(0, columnId)); }
@Test public void demo() { final String EXAMPLE = "Name\tColor\tServing Size (g)\n" + "String\tCategory\tDouble\n" + "Banana\tYellow\t118\n" + "Blueberry\tBlue\t148\n" + "Lemon\tYellow\t83\n" + "Apple\tGreen\t182"; DataFrame dataFrame = Parser.tsv(new StringReader(EXAMPLE)); // Lookup typed identifiers by column index final StringColumnId NAME = dataFrame.getColumnId(0, ColumnType.STRING); final CategoryColumnId COLOR = dataFrame.getColumnId(1, ColumnType.CATEGORY); final DoubleColumnId SERVING_SIZE = dataFrame.getColumnId(2, ColumnType.DOUBLE); // Use identifier to access columns & values StringColumn nameColumn = dataFrame.getColumn(NAME); IndexedSeq<String> nameValues = nameColumn.getValues(); // ... or access individual values via row index / column id String yellow = dataFrame.getValueAt(2, COLOR); }
assertEquals(Array.of("Name", "Age", "Height", "Vegetarian", "Date Of Birth", "Gender"), df.getColumnNames()); StringColumnId nameColumnId = df.getColumnId(0, ColumnType.STRING); StringColumn nameColumn = df.getColumn(nameColumnId); assertEquals(Array.of("Ada", "Homer", "Hillary"), nameColumn.getValues()); IntColumnId ageColumnId = df.getColumnId(1, ColumnType.INT); IntColumn ageColumn = df.getColumn(ageColumnId); assertArrayEquals(new int[]{42, 99, 67}, ageColumn.valueStream().toArray()); DoubleColumnId heightColumnId = df.getColumnId(2, ColumnType.DOUBLE); DoubleColumn heightColumn = df.getColumn(heightColumnId); assertArrayEquals(new double[]{1.74, 1.20, 1.70}, heightColumn.valueStream().toArray(), 0.01); BooleanColumnId vegetarianColumnId = df.getColumnId(3, ColumnType.BOOLEAN); BooleanColumn vegetarianColumn = df.getColumn(vegetarianColumnId); assertEquals(Array.of(true, false, false), vegetarianColumn.valueStream().toArray()); TimestampColumnId dateOfBirthColumnId = df.getColumnId(4, ColumnType.TIMESTAMP); TimestampColumn dateOfBirthColumn = df.getColumn(dateOfBirthColumnId); Function<? super Instant, Month> toMonth = instant -> instant.atZone(ZoneId.from(ZoneOffset.UTC)).getMonth(); assertEquals(333, dateOfBirthColumn.getValueAt(2).getLong(ChronoField.MILLI_OF_SECOND)); CategoryColumnId genderColumnId = df.getColumnId(5, ColumnType.CATEGORY); CategoryColumn genderColumn = df.getColumn(genderColumnId); assertEquals(HashSet.of("Female", "Male"), genderColumn.getCategories());
private static void assertMetaDataParsedCorrectly(DataFrame df) { Map<String, String> dataFrameMetaData = df.getMetaData(); assertEquals(1, dataFrameMetaData.size()); assertEquals(Option.of("netzwerg"), dataFrameMetaData.get("author")); Map<String, String> columnMetaData = df.getColumn(df.getColumnId(2, ColumnType.DOUBLE)).getMetaData(); assertEquals(1, columnMetaData.size()); assertEquals(Option.of("m"), columnMetaData.get("unit")); }