public HiveFileSet(String name, HiveDataset dataset) { super(name, dataset); this.table = dataset.getTable(); this.hiveDataset = dataset; } }
private HiveTargetPathHelper createTestTargetPathHelper(Properties properties) { HiveDataset dataset = Mockito.mock(HiveDataset.class); Table table = new Table(new org.apache.hadoop.hive.metastore.api.Table()); table.setDbName("dbName"); table.setTableName("tableName"); Mockito.when(dataset.getTable()).thenReturn(table); Mockito.when(dataset.getTableRootPath()).thenReturn(Optional.of(TABLE_ROOT)); Mockito.when(dataset.getProperties()).thenReturn(properties); HiveTargetPathHelper helper = new HiveTargetPathHelper(dataset); return helper; }
@Override public Path datasetRoot() { return super.getTable().getDataLocation(); } }
private List<FileSet<CopyEntity>> generateAllFileSets(Map<List<String>, Partition> partitionMap) { List<FileSet<CopyEntity>> fileSets = Lists.newArrayList(); for (Map.Entry<List<String>, Partition> partition : partitionMap.entrySet()) { fileSets.add(fileSetForPartition(partition.getValue())); HiveCopyEntityHelper.this.targetPartitions.remove(partition.getKey()); } if (!HiveCopyEntityHelper.this.targetPartitions.isEmpty()) { fileSets.add(new HivePartitionsDeregisterFileSet( HiveCopyEntityHelper.this.dataset.getTable().getCompleteName() + DEREGISTER_FILE_SET, HiveCopyEntityHelper.this.dataset, HiveCopyEntityHelper.this.targetPartitions.values(), HiveCopyEntityHelper.this)); } return fileSets; }
private int findTier(Requestor<FileSet<CopyEntity>> requestor) { if (!(requestor instanceof CopyableDatasetRequestor)) { throw new ClassCastException(String.format("%s can only be used for %s.", SimpleHiveDatasetTieringPrioritizer.class.getName(), CopyableDatasetRequestor.class.getName())); } Dataset dataset = ((CopyableDatasetRequestor) requestor).getDataset(); if (!(dataset instanceof HiveDataset)) { throw new ClassCastException(String.format("%s can only be used for %s.", SimpleHiveDatasetTieringPrioritizer.class.getName(), HiveDataset.class.getName())); } HiveDataset hiveDataset = (HiveDataset) dataset; for (Map.Entry<Integer, WhitelistBlacklist> tier : tiersMap.entrySet()) { WhitelistBlacklist whitelistBlacklist = tier.getValue(); if (whitelistBlacklist.acceptTable(hiveDataset.getTable().getDbName(), hiveDataset.getTable().getTableName())) { return tier.getKey(); } } return Integer.MAX_VALUE; } }
private static void executeDropTableQuery(HiveDataset hiveDataset, Properties properties) throws IOException { String dbName = hiveDataset.getTable().getDbName(); String tableName = hiveDataset.getTable().getTableName(); Optional<String> datasetOwner = Optional.fromNullable(hiveDataset.getTable().getOwner()); try (HiveProxyQueryExecutor hiveProxyQueryExecutor = ProxyUtils .getQueryExecutor(new State(properties), datasetOwner)) { hiveProxyQueryExecutor.executeQuery(HivePurgerQueryTemplate.getDropTableQuery(dbName, tableName), datasetOwner); } catch (SQLException e) { throw new IOException(e); } }
protected HiveWorkUnit workUnitForTable(HiveDataset hiveDataset) throws IOException { HiveWorkUnit hiveWorkUnit = new HiveWorkUnit(hiveDataset); if (isAvro(hiveDataset.getTable())) { hiveWorkUnit.setTableSchemaUrl(this.avroSchemaManager.getSchemaUrl(hiveDataset.getTable())); } return hiveWorkUnit; }
DatasetDescriptor getSourceDataset() { String sourceTable = dataset.getTable().getDbName() + "." + dataset.getTable().getTableName(); DatasetDescriptor sourceDataset = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, sourceTable); sourceDataset.addMetadata(DatasetConstants.FS_URI, dataset.getFs().getUri().toString()); return sourceDataset; }
public HiveTargetPathHelper(HiveDataset dataset) { this.dataset = dataset; this.relocateDataFiles = Boolean .valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES)); this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT) ? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT), this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName())) : Optional.<Path> absent(); this.targetTablePrefixTobeReplaced = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED))) : Optional.<Path> absent(); this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT))) : Optional.<Path> absent(); }
/** * Automatically sets the dataset urn by calling {@link #setDatasetUrn(String)} */ public void setHiveDataset(HiveDataset hiveDataset) { this.setProp(HIVE_DATASET_SERIALIZED_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(hiveDataset, HiveDataset.class)); setDatasetUrn(hiveDataset.getTable().getCompleteName()); }
/** * Automatically serializes the {@link HiveDataset} by calling {@link #setHiveDataset(HiveDataset)} * @param hiveDataset for which the workunit is being created */ @SuppressWarnings("deprecation") public HiveWorkUnit(HiveDataset hiveDataset) { super(); setHiveDataset(hiveDataset); if (hiveDataset.getTable().getTableType() != TableType.VIRTUAL_VIEW) { setTableLocation(hiveDataset.getTable().getSd().getLocation()); } }
protected HiveWorkUnit workUnitForPartition(HiveDataset hiveDataset, Partition partition) throws IOException { HiveWorkUnit hiveWorkUnit = new HiveWorkUnit(hiveDataset, partition); if (isAvro(hiveDataset.getTable())) { hiveWorkUnit.setTableSchemaUrl(this.avroSchemaManager.getSchemaUrl(hiveDataset.getTable())); hiveWorkUnit.setPartitionSchemaUrl(this.avroSchemaManager.getSchemaUrl(partition)); } return hiveWorkUnit; }
/** * This method returns a sorted list of partitions. */ public List<Partition> getPartitionsFromDataset() throws IOException{ try (AutoReturnableObject<IMetaStoreClient> client = getClientPool().getClient()) { List<Partition> partitions = HiveUtils.getPartitions(client.get(), getTable(), Optional.<String>absent()); return sortPartitions(partitions); } }
@Test public void testBlacklist() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.BLACKLIST, "db2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); }
@Test public void testWhitelist() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "db1"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); }
@Test public void testTableList() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.DB_KEY, "db1"); properties.put(HiveDatasetFinder.TABLE_PATTERN_KEY, "table1|table2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); }
private CopyableDatasetRequestor getRequestor(String dbName, String tableName) { CopyableDatasetRequestor requestor = Mockito.mock(CopyableDatasetRequestor.class); HiveDataset dataset = Mockito.mock(HiveDataset.class); Table table = new Table(new org.apache.hadoop.hive.metastore.api.Table()); table.setDbName(dbName); table.setTableName(tableName); Mockito.when(dataset.getTable()).thenReturn(table); Mockito.when(requestor.getDataset()).thenReturn(dataset); return requestor; } }
@Test public void testCopyTable() throws Exception { String destinationTable = "copyTable"; File tmpDir = Files.createTempDir(); tmpDir.deleteOnExit(); WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(this.dataset, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), String.format("%s=part1", this.partitionColumn)); HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit)); hiveMaterializer.run(); Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL); hiveMaterializer.commit(); Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL); List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3); Assert.assertEquals(allTable.size(), 4); Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "102", "103", "104")); }
@Test public void testMaterializeTable() throws Exception { String destinationTable = "materializeTable"; File tmpDir = Files.createTempDir(); tmpDir.deleteOnExit(); WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(this.dataset, HiveConverterUtils.StorageFormat.AVRO, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), null); HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit)); hiveMaterializer.run(); Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL); hiveMaterializer.commit(); Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL); List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3); Assert.assertEquals(allTable.size(), 8); Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "102", "103", "104", "201", "202", "203", "204")); }
@Test public void testMaterializeQuery() throws Exception { String destinationTable = "materializeQuery"; File tmpDir = Files.createTempDir(); tmpDir.deleteOnExit(); WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit( String.format("SELECT * FROM %s.%s WHERE name = 'foo'", this.dbName, this.sourceTableName), HiveConverterUtils.StorageFormat.AVRO, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath())); HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit)); hiveMaterializer.run(); Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL); hiveMaterializer.commit(); Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL); List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3); Assert.assertEquals(allTable.size(), 4); Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "103", "201", "203")); }