static ArrayList<byte[]> loadColumnValues(ReadableTable inpTable, int colIndex) throws IOException { TableReader reader = inpTable.getReader(); try { ArrayList<byte[]> result = Lists.newArrayList(); HashSet<String> dedup = new HashSet<String>(); while (reader.next()) { String[] split = reader.getRow(); String colValue; // special single column file, e.g. common_indicator.txt if (split.length == 1) { colValue = split[0]; } // normal case else { if (split.length <= colIndex) { throw new ArrayIndexOutOfBoundsException("Column no. " + colIndex + " not found, line split is " + Arrays.asList(split)); } colValue = split[colIndex]; } if (dedup.contains(colValue) == false) { dedup.add(colValue); result.add(Bytes.toBytes(colValue)); } } return result; } finally { reader.close(); } }
public void takeSnapshot(ReadableTable table, TableDesc tableDesc) throws IOException { this.signature = table.getSignature(); this.columnDelimeter = table.getColumnDelimeter(); int maxIndex = tableDesc.getMaxColumnIndex(); TableReader reader = table.getReader(); ArrayList<String[]> allRows = new ArrayList<String[]>(); while (reader.next()) { String[] row = reader.getRow(); if (row.length <= maxIndex) { throw new IllegalStateException("Bad hive table row, " + tableDesc + " expect " + (maxIndex + 1) + " columns, but got " + Arrays.toString(row)); } allRows.add(row); } this.rows = allRows; }
@Test public void basicTest() throws Exception { String tableName = "EDW.TEST_SITES"; HiveTable hiveTable = new HiveTable(MetadataManager.getInstance(getTestConfig()), tableName); TableDesc tableDesc = MetadataManager.getInstance(getTestConfig()).getTableDesc(tableName); String snapshotPath = snapshotMgr.buildSnapshot(hiveTable, tableDesc).getResourcePath(); snapshotMgr.wipeoutCache(); SnapshotTable snapshot = snapshotMgr.getSnapshotTable(snapshotPath); // compare hive & snapshot TableReader hiveReader = hiveTable.getReader(); TableReader snapshotReader = snapshot.getReader(); while (true) { boolean hiveNext = hiveReader.next(); boolean snapshotNext = snapshotReader.next(); assertEquals(hiveNext, snapshotNext); if (hiveNext == false) break; String[] hiveRow = hiveReader.getRow(); String[] snapshotRow = snapshotReader.getRow(); assertArrayEquals(hiveRow, snapshotRow); } } }
protected void init() throws IOException { int[] keyIndex = new int[keyColumns.length]; for (int i = 0; i < keyColumns.length; i++) { keyIndex[i] = tableDesc.findColumnByName(keyColumns[i]).getZeroBasedIndex(); } TableReader reader = table.getReader(); try { while (reader.next()) { initRow(reader.getRow(), keyIndex); } } finally { reader.close(); } }