public static StringGrid fromInput(InputStream from, String sep) throws IOException { List<String> read = IOUtils.readLines(from); if (read.isEmpty()) throw new IllegalStateException("Nothing to read; file is empty"); return new StringGrid(sep, read); }
public static StringGrid fromFile(String file, String sep) throws IOException { List<String> read = FileUtils.readLines(new File(file)); if (read.isEmpty()) throw new IllegalStateException("Nothing to read; file is empty"); return new StringGrid(sep, read); }
public StringGrid getUniqueRows() { StringGrid ret = new StringGrid(this); ret.stripDuplicateRows(); return ret; }
public StringGrid select(int column, String value) { StringGrid grid = new StringGrid(sep, numColumns); for (int i = 0; i < size(); i++) { List<String> row = get(i); if (row.get(column).equals(value)) { grid.addRow(row); } } return grid; }
public StringGrid getAllWithSimilarity(double threshold, int firstColumn, int secondColumn) { for (int column : new int[] {firstColumn, secondColumn}) checkInvalidColumn(column); StringGrid grid = new StringGrid(sep, numColumns); for (List<String> list : this) { double sim = MathUtils.stringSimilarity(list.get(firstColumn), list.get(secondColumn)); if (sim >= threshold) grid.addRow(list); } return grid; }
public StringGrid getRowsWithDuplicateValuesInColumn(int column) { checkInvalidColumn(column); StringGrid grid = new StringGrid(sep, numColumns); List<String> columns = getColumn(column); Counter<String> counter = new Counter<>(); for (String val : columns) counter.incrementCount(val, 1.0f); counter.dropElementsBelowThreshold(2.0f); Set<String> keys = counter.keySet(); for (List<String> row : this) { for (String key : keys) if (row.get(column).equals(key)) grid.addRow(row); } return grid; }
public StringGrid getRowWithOnlyOneOccurrence(int column) { checkInvalidColumn(column); StringGrid grid = new StringGrid(sep, numColumns); List<String> columns = getColumn(column); Counter<String> counter = new Counter<>(); for (String val : columns) counter.incrementCount(val, 1.0f); Set<String> keys = new HashSet<>(counter.keySet()); for (String key : keys) { if (counter.getCount(key) > 1) { counter.removeKey(key); } } for (List<String> row : this) { for (String key : keys) if (row.get(column).equals(key)) grid.addRow(row); } return grid; }