private static void assertSchemaRelativeToParentDirWithCharset(Function2<Schema, File, DataFrame> parseLogic) throws IOException { StringReader schemaReader = new StringReader(SCHEMA_WITH_CHARSET_NAME); Schema schema = Schema.parseJson(schemaReader); // some trickery to find physical location of resources folder... File resourceFolder = new File(ParserTest.class.getResource("/iso-8859-1.txt").getPath()).getParentFile(); DataFrame dataFrame = parseLogic.apply(schema, resourceFolder); StringColumnId columnId = dataFrame.getColumnId(0, ColumnType.STRING); assertEquals("°", dataFrame.getValueAt(0, columnId)); }
private static void assertSchemaWithCharset(Function<Schema, DataFrame> parseLogic) throws IOException { StringReader schemaReader = new StringReader(SCHEMA_WITH_CHARSET_NAME); Schema schema = Schema.parseJson(schemaReader); DataFrame dataFrame = parseLogic.apply(schema); StringColumnId columnId = dataFrame.getColumnId(0, ColumnType.STRING); assertEquals("°", dataFrame.getValueAt(0, columnId)); }
@Test public void demo() { final String EXAMPLE = "Name\tColor\tServing Size (g)\n" + "String\tCategory\tDouble\n" + "Banana\tYellow\t118\n" + "Blueberry\tBlue\t148\n" + "Lemon\tYellow\t83\n" + "Apple\tGreen\t182"; DataFrame dataFrame = Parser.tsv(new StringReader(EXAMPLE)); // Lookup typed identifiers by column index final StringColumnId NAME = dataFrame.getColumnId(0, ColumnType.STRING); final CategoryColumnId COLOR = dataFrame.getColumnId(1, ColumnType.CATEGORY); final DoubleColumnId SERVING_SIZE = dataFrame.getColumnId(2, ColumnType.DOUBLE); // Use identifier to access columns & values StringColumn nameColumn = dataFrame.getColumn(NAME); IndexedSeq<String> nameValues = nameColumn.getValues(); // ... or access individual values via row index / column id String yellow = dataFrame.getValueAt(2, COLOR); }
@Test public void customColumnTypes() { GenericColumnId fileColumnId = new GenericColumnId("File", new ColumnType<>("File", GenericColumnId.class)); File fileA = new File("/path/to/a.txt"); File fileB = new File("/path/to/b.txt"); GenericColumn<File, GenericColumnId> fileColumn = GenericColumn.ofAll(fileColumnId, fileA, fileB); DataFrame df = DataFrame.of(fileColumn); assertEquals(2, df.getRowCount()); assertEquals(1, df.getColumnCount()); GenericColumn<File, GenericColumnId> column = df.getColumn(fileColumnId); assertEquals(fileColumn, column); File fileValue = df.getValueAt(1, fileColumnId); assertEquals(fileB, fileValue); }
@Test public void demo() { // Type-safe column identifiers final StringColumnId NAME = StringColumnId.of("Name"); final CategoryColumnId COLOR = CategoryColumnId.of("Color"); final DoubleColumnId SERVING_SIZE = DoubleColumnId.of("Serving Size (g)"); // Convenient column creation StringColumn nameColumn = StringColumn.ofAll(NAME, "Banana", "Blueberry", "Lemon", "Apple"); CategoryColumn colorColumn = CategoryColumn.ofAll(COLOR, "Yellow", "Blue", "Yellow", "Green"); DoubleColumn servingSizeColumn = DoubleColumn.ofAll(SERVING_SIZE, 118, 148, 83, 182); // Grouping columns into a data frame DataFrame dataFrame = DataFrame.ofAll(nameColumn, colorColumn, servingSizeColumn); // Typed random access to individual values (based on rowIndex / columnId) String lemon = dataFrame.getValueAt(2, NAME); double appleServingSize = dataFrame.getValueAt(3, SERVING_SIZE); // Typed stream-based access to all values DoubleStream servingSizes = servingSizeColumn.valueStream(); double maxServingSize = servingSizes.summaryStatistics().getMax(); // Smart column implementations Set<String> colors = colorColumn.getCategories(); }
String stringValue = df.getValueAt(0, nameColumnId); assertEquals("Ada", stringValue); int intValue = df.getValueAt(1, ageColumnId); assertEquals(99, intValue); double doubleValue = df.getValueAt(0, heightColumnId); assertEquals(1.74, doubleValue, 0.01); boolean booleanValue = df.getValueAt(2, vegetarianColumnId); assertFalse(booleanValue); String categoryValue = df.getValueAt(2, genderColumnId); assertEquals("Female", categoryValue);
String stringValue = df.getValueAt(0, NAME); assertEquals("Ada", stringValue); int intValue = df.getValueAt(1, AGE); assertEquals(99, intValue); double doubleValue = df.getValueAt(0, HEIGHT); assertEquals(1.74, doubleValue, 0.01); boolean booleanValue = df.getValueAt(2, VEGETARIAN); assertFalse(booleanValue); Instant timestampValue = df.getValueAt(1, DATE_OF_BIRTH); assertEquals(JAN_08_2006, timestampValue); String categoryValue = df.getValueAt(2, GENDER); assertEquals("Female", categoryValue);