public void fillDown(String value, int column) { checkInvalidColumn(column); for (List<String> list : this) list.set(column, value); }
public List<String> getColumn(int column) { checkInvalidColumn(column); List<String> ret = new ArrayList<>(); for (List<String> list : this) { ret.add(list.get(column)); } return ret; }
public void merge(int column1, int column2) { checkInvalidColumn(column1); checkInvalidColumn(column2); if (column1 != column2) for (List<String> list : this) { StringBuilder sb = new StringBuilder(); sb.append(list.get(column1)); sb.append(list.get(column2)); list.set(Math.min(column1, column2), sb.toString().replaceAll("\"", "").replace(sep, " ")); list.remove(Math.max(column1, column2)); } numColumns--; }
public void filterBySimilarity(double threshold, int firstColumn, int secondColumn) { for (int column : new int[] {firstColumn, secondColumn}) checkInvalidColumn(column); List<List<String>> remove = new ArrayList<>(); for (List<String> list : this) { double sim = MathUtils.stringSimilarity(list.get(firstColumn), list.get(secondColumn)); if (sim < threshold) remove.add(list); } removeAll(remove); }
public StringGrid getAllWithSimilarity(double threshold, int firstColumn, int secondColumn) { for (int column : new int[] {firstColumn, secondColumn}) checkInvalidColumn(column); StringGrid grid = new StringGrid(sep, numColumns); for (List<String> list : this) { double sim = MathUtils.stringSimilarity(list.get(firstColumn), list.get(secondColumn)); if (sim >= threshold) grid.addRow(list); } return grid; }
public StringGrid getRowsWithDuplicateValuesInColumn(int column) { checkInvalidColumn(column); StringGrid grid = new StringGrid(sep, numColumns); List<String> columns = getColumn(column); Counter<String> counter = new Counter<>(); for (String val : columns) counter.incrementCount(val, 1.0f); counter.dropElementsBelowThreshold(2.0f); Set<String> keys = counter.keySet(); for (List<String> row : this) { for (String key : keys) if (row.get(column).equals(key)) grid.addRow(row); } return grid; }
public StringGrid getRowWithOnlyOneOccurrence(int column) { checkInvalidColumn(column); StringGrid grid = new StringGrid(sep, numColumns); List<String> columns = getColumn(column); Counter<String> counter = new Counter<>(); for (String val : columns) counter.incrementCount(val, 1.0f); Set<String> keys = new HashSet<>(counter.keySet()); for (String key : keys) { if (counter.getCount(key) > 1) { counter.removeKey(key); } } for (List<String> row : this) { for (String key : keys) if (row.get(column).equals(key)) grid.addRow(row); } return grid; }